GNU Octave  4.4.1
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-stream.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 1996-2018 John W. Eaton
4 Copyright (C) 2015-2018 Lachlan Andrew, Monash University
5 
6 This file is part of Octave.
7 
8 Octave is free software: you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <https://www.gnu.org/licenses/>.
21 
22 */
23 
24 #if defined (HAVE_CONFIG_H)
25 # include "config.h"
26 #endif
27 
28 #include <cassert>
29 #include <cctype>
30 #include <cstring>
31 
32 #include <algorithm>
33 #include <deque>
34 #include <fstream>
35 #include <iomanip>
36 #include <iostream>
37 #include <sstream>
38 #include <string>
39 
40 #include "Array.h"
41 #include "Cell.h"
42 #include "byte-swap.h"
43 #include "lo-ieee.h"
44 #include "lo-mappers.h"
45 #include "lo-utils.h"
46 #include "oct-locbuf.h"
47 #include "quit.h"
48 #include "str-vec.h"
49 
50 #include "error.h"
51 #include "errwarn.h"
52 #include "input.h"
53 #include "interpreter.h"
54 #include "octave.h"
55 #include "oct-iostrm.h"
56 #include "oct-stdstrm.h"
57 #include "oct-stream.h"
58 #include "ov.h"
59 #include "ovl.h"
60 #include "pager.h"
61 #include "utils.h"
62 
63 namespace octave
64 {
65  // Programming Note: There are two very different error functions used
66  // in the stream code. When invoked with "error (...)" the member
67  // function from octave::stream or octave::base_stream is called. This
68  // function sets the error state on the stream AND returns control to
69  // the caller. The caller must then return a value at the end of the
70  // function. When invoked with "::error (...)" the exception-based
71  // error function from error.h is used. This function will throw an
72  // exception and not return control to the caller. BE CAREFUL and
73  // invoke the correct error function!
74 
75  // Possible values for conv_err:
76  //
77  // 1 : not a real scalar
78  // 2 : value is NaN
79  // 3 : value is not an integer
80 
81  static int
82  convert_to_valid_int (const octave_value& tc, int& conv_err)
83  {
84  conv_err = 0;
85 
86  int retval = 0;
87 
88  double dval = 0.0;
89 
90  try
91  {
92  dval = tc.double_value ();
93  }
94  catch (const octave::execution_exception&)
95  {
97 
98  conv_err = 1;
99  }
100 
101  if (! conv_err)
102  {
103  if (! lo_ieee_isnan (dval))
104  {
105  int ival = octave::math::nint (dval);
106 
107  if (ival == dval)
108  retval = ival;
109  else
110  conv_err = 3;
111  }
112  else
113  conv_err = 2;
114  }
115 
116  return retval;
117  }
118 
119  static octave_idx_type
120  get_size (double d, const std::string& who)
121  {
122  octave_idx_type retval = -1;
123 
124  if (lo_ieee_isnan (d))
125  ::error ("%s: NaN is invalid as size specification", who.c_str ());
126 
127  if (octave::math::isinf (d))
128  retval = -1;
129  else
130  {
131  if (d < 0.0)
132  ::error ("%s: negative value invalid as size specification",
133  who.c_str ());
134 
136  ::error ("%s: dimension too large for Octave's index type",
137  who.c_str ());
138 
140  }
141 
142  return retval;
143  }
144 
145  static void
146  get_size (const Array<double>& size,
148  bool& one_elt_size_spec, const std::string& who)
149  {
150  nr = -1;
151  nc = -1;
152 
153  one_elt_size_spec = false;
154 
155  double dnr = -1.0;
156  double dnc = -1.0;
157 
158  octave_idx_type sz_len = size.numel ();
159 
160  if (sz_len == 1)
161  {
162  one_elt_size_spec = true;
163 
164  dnr = size(0);
165 
166  dnc = (dnr == 0.0) ? 0.0 : 1.0;
167  }
168  else if (sz_len == 2)
169  {
170  dnr = size(0);
171 
172  if (octave::math::isinf (dnr))
173  ::error ("%s: invalid size specification", who.c_str ());
174 
175  dnc = size(1);
176  }
177  else
178  ::error ("%s: invalid size specification", who.c_str ());
179 
180  nr = get_size (dnr, who);
181 
182  if (dnc >= 0.0)
183  nc = get_size (dnc, who);
184  }
185 
186  static std::string
188  {
190 
191  size_t len = s.length ();
192 
193  size_t i = 0;
194 
195  while (i < len)
196  {
197  unsigned char c = s[i++];
198 
199  if (c == '-' && i > 1 && i < len
200  && ( static_cast<unsigned char> (s[i-2])
201  <= static_cast<unsigned char> (s[i])))
202  {
203  // Add all characters from the range except the first (we
204  // already added it below).
205 
206  for (c = s[i-2]+1; c < s[i]; c++)
207  retval += c;
208  }
209  else
210  {
211  // Add the character to the class. Only add '-' if it is
212  // the last character in the class.
213 
214  if (c != '-' || i == len)
215  retval += c;
216  }
217  }
218 
219  return retval;
220  }
221 
222  class
224  {
225  public:
226 
228  {
229  whitespace_conversion = 1,
230  literal_conversion = 2,
231  null = 3
232  };
233 
234  scanf_format_elt (const std::string& txt = "", int w = 0, bool d = false,
235  char typ = '\0', char mod = '\0',
236  const std::string& ch_class = "")
237  : text (txt), width (w), discard (d), type (typ),
238  modifier (mod), char_class (ch_class)
239  { }
240 
241  scanf_format_elt (const scanf_format_elt&) = default;
242 
243  scanf_format_elt& operator = (const scanf_format_elt&) = default;
244 
245  ~scanf_format_elt (void) = default;
246 
247  // The C-style format string.
249 
250  // The maximum field width.
251  int width;
252 
253  // TRUE if we are not storing the result of this conversion.
254  bool discard;
255 
256  // Type of conversion -- 'd', 'i', 'o', 'u', 'x', 'e', 'f', 'g',
257  // 'c', 's', 'p', '%', or '['.
258  char type;
259 
260  // A length modifier -- 'h', 'l', or 'L'.
261  char modifier;
262 
263  // The class of characters in a '[' format.
265  };
266 
267  class
269  {
270  public:
271 
272  scanf_format_list (const std::string& fmt = "");
273 
274  // No copying!
275 
276  scanf_format_list (const scanf_format_list&) = delete;
277 
278  scanf_format_list& operator = (const scanf_format_list&) = delete;
279 
280  ~scanf_format_list (void);
281 
282  octave_idx_type num_conversions (void) { return nconv; }
283 
284  // The length can be different than the number of conversions.
285  // For example, "x %d y %d z" has 2 conversions but the length of
286  // the list is 3 because of the characters that appear after the
287  // last conversion.
288 
289  size_t length (void) const { return fmt_elts.size (); }
290 
291  const scanf_format_elt * first (void)
292  {
293  curr_idx = 0;
294  return current ();
295  }
296 
297  const scanf_format_elt * current (void) const
298  {
299  return length () > 0 ? fmt_elts[curr_idx] : nullptr;
300  }
301 
302  const scanf_format_elt * next (bool cycle = true)
303  {
304  static scanf_format_elt dummy
305  ("", 0, false, scanf_format_elt::null, '\0', "");
306 
307  curr_idx++;
308 
309  if (curr_idx >= length ())
310  {
311  if (cycle)
312  curr_idx = 0;
313  else
314  return &dummy;
315  }
316 
317  return current ();
318  }
319 
320  void printme (void) const;
321 
322  bool ok (void) const { return (nconv >= 0); }
323 
324  operator bool () const { return ok (); }
325 
326  bool all_character_conversions (void);
327 
328  bool all_numeric_conversions (void);
329 
330  private:
331 
332  // Number of conversions specified by this format string, or -1 if
333  // invalid conversions have been found.
335 
336  // Index to current element;
337  size_t curr_idx;
338 
339  // List of format elements.
340  std::deque<scanf_format_elt*> fmt_elts;
341 
342  // Temporary buffer.
343  std::ostringstream buf;
344 
345  void add_elt_to_list (int width, bool discard, char type, char modifier,
346  const std::string& char_class = "");
347 
348  void process_conversion (const std::string& s, size_t& i, size_t n,
349  int& width, bool& discard, char& type,
350  char& modifier);
351 
352  int finish_conversion (const std::string& s, size_t& i, size_t n,
353  int& width, bool discard, char& type,
354  char modifier);
355  };
356 
358  : nconv (0), curr_idx (0), fmt_elts (), buf ()
359  {
360  size_t n = s.length ();
361 
362  size_t i = 0;
363 
364  int width = 0;
365  bool discard = false;
366  char modifier = '\0';
367  char type = '\0';
368 
369  bool have_more = true;
370 
371  while (i < n)
372  {
373  have_more = true;
374 
375  if (s[i] == '%')
376  {
377  // Process percent-escape conversion type.
378 
379  process_conversion (s, i, n, width, discard, type, modifier);
380 
381  have_more = (buf.tellp () != 0);
382  }
383  else if (isspace (s[i]))
384  {
386 
387  width = 0;
388  discard = false;
389  modifier = '\0';
390  buf << ' ';
391 
392  while (++i < n && isspace (s[i]))
393  ; // skip whitespace
394 
395  add_elt_to_list (width, discard, type, modifier);
396 
397  have_more = false;
398  }
399  else
400  {
402 
403  width = 0;
404  discard = false;
405  modifier = '\0';
406 
407  while (i < n && ! isspace (s[i]) && s[i] != '%')
408  buf << s[i++];
409 
410  add_elt_to_list (width, discard, type, modifier);
411 
412  have_more = false;
413  }
414 
415  if (nconv < 0)
416  {
417  have_more = false;
418  break;
419  }
420  }
421 
422  if (have_more)
423  add_elt_to_list (width, discard, type, modifier);
424 
425  buf.clear ();
426  buf.str ("");
427  }
428 
430  {
431  size_t n = fmt_elts.size ();
432 
433  for (size_t i = 0; i < n; i++)
434  {
435  scanf_format_elt *elt = fmt_elts[i];
436  delete elt;
437  }
438  }
439 
440  void
441  scanf_format_list::add_elt_to_list (int width, bool discard, char type,
442  char modifier,
443  const std::string& char_class)
444  {
445  std::string text = buf.str ();
446 
447  if (! text.empty ())
448  {
449  scanf_format_elt *elt
450  = new scanf_format_elt (text, width, discard, type,
451  modifier, char_class);
452 
453  fmt_elts.push_back (elt);
454  }
455 
456  buf.clear ();
457  buf.str ("");
458  }
459 
460  void
462  size_t n, int& width, bool& discard,
463  char& type, char& modifier)
464  {
465  width = 0;
466  discard = false;
467  modifier = '\0';
468  type = '\0';
469 
470  buf << s[i++];
471 
472  bool have_width = false;
473 
474  while (i < n)
475  {
476  switch (s[i])
477  {
478  case '*':
479  if (discard)
480  nconv = -1;
481  else
482  {
483  discard = true;
484  buf << s[i++];
485  }
486  break;
487 
488  case '0': case '1': case '2': case '3': case '4':
489  case '5': case '6': case '7': case '8': case '9':
490  if (have_width)
491  nconv = -1;
492  else
493  {
494  char c = s[i++];
495  width = 10 * width + c - '0';
496  have_width = true;
497  buf << c;
498  while (i < n && isdigit (s[i]))
499  {
500  c = s[i++];
501  width = 10 * width + c - '0';
502  buf << c;
503  }
504  }
505  break;
506 
507  case 'h': case 'l': case 'L':
508  if (modifier != '\0')
509  nconv = -1;
510  else
511  modifier = s[i++];
512  break;
513 
514  case 'd': case 'i': case 'o': case 'u': case 'x':
515  if (modifier == 'L')
516  {
517  nconv = -1;
518  break;
519  }
520  goto fini;
521 
522  case 'e': case 'f': case 'g':
523  if (modifier == 'h')
524  {
525  nconv = -1;
526  break;
527  }
528 
529  // No float or long double conversions, thanks.
530  buf << 'l';
531 
532  goto fini;
533 
534  case 'c': case 's': case 'p': case '%': case '[':
535  if (modifier != '\0')
536  {
537  nconv = -1;
538  break;
539  }
540  goto fini;
541 
542  fini:
543  {
544  if (finish_conversion (s, i, n, width, discard,
545  type, modifier) == 0)
546  return;
547  }
548  break;
549 
550  default:
551  nconv = -1;
552  break;
553  }
554 
555  if (nconv < 0)
556  break;
557  }
558 
559  nconv = -1;
560  }
561 
562  int
564  size_t n, int& width, bool discard,
565  char& type, char modifier)
566  {
567  int retval = 0;
568 
569  std::string char_class;
570 
571  size_t beg_idx = std::string::npos;
572  size_t end_idx = std::string::npos;
573 
574  if (s[i] == '%')
575  {
576  type = '%';
577  buf << s[i++];
578  }
579  else
580  {
581  type = s[i];
582 
583  if (s[i] == '[')
584  {
585  buf << s[i++];
586 
587  if (i < n)
588  {
589  beg_idx = i;
590 
591  if (s[i] == '^')
592  {
593  type = '^';
594  buf << s[i++];
595 
596  if (i < n)
597  {
598  beg_idx = i;
599 
600  if (s[i] == ']')
601  buf << s[i++];
602  }
603  }
604  else if (s[i] == ']')
605  buf << s[i++];
606  }
607 
608  while (i < n && s[i] != ']')
609  buf << s[i++];
610 
611  if (i < n && s[i] == ']')
612  {
613  end_idx = i-1;
614  buf << s[i++];
615  }
616 
617  if (s[i-1] != ']')
618  retval = nconv = -1;
619  }
620  else
621  buf << s[i++];
622 
623  nconv++;
624  }
625 
626  if (nconv >= 0)
627  {
628  if (beg_idx != std::string::npos && end_idx != std::string::npos)
629  char_class = expand_char_class (s.substr (beg_idx,
630  end_idx - beg_idx + 1));
631 
632  add_elt_to_list (width, discard, type, modifier, char_class);
633  }
634 
635  return retval;
636  }
637 
638  void
640  {
641  size_t n = fmt_elts.size ();
642 
643  for (size_t i = 0; i < n; i++)
644  {
645  scanf_format_elt *elt = fmt_elts[i];
646 
647  std::cerr
648  << "width: " << elt->width << "\n"
649  << "discard: " << elt->discard << "\n"
650  << "type: ";
651 
653  std::cerr << "literal text\n";
655  std::cerr << "whitespace\n";
656  else
657  std::cerr << elt->type << "\n";
658 
659  std::cerr
660  << "modifier: " << elt->modifier << "\n"
661  << "char_class: '" << undo_string_escapes (elt->char_class) << "'\n"
662  << "text: '" << undo_string_escapes (elt->text) << "'\n\n";
663  }
664  }
665 
666  bool
668  {
669  size_t n = fmt_elts.size ();
670 
671  if (n > 0)
672  {
673  for (size_t i = 0; i < n; i++)
674  {
675  scanf_format_elt *elt = fmt_elts[i];
676 
677  switch (elt->type)
678  {
679  case 'c': case 's': case '%': case '[': case '^':
682  break;
683 
684  default:
685  return false;
686  break;
687  }
688  }
689 
690  return true;
691  }
692  else
693  return false;
694  }
695 
696  bool
698  {
699  size_t n = fmt_elts.size ();
700 
701  if (n > 0)
702  {
703  for (size_t i = 0; i < n; i++)
704  {
705  scanf_format_elt *elt = fmt_elts[i];
706 
707  switch (elt->type)
708  {
709  case 'd': case 'i': case 'o': case 'u': case 'x':
710  case 'e': case 'f': case 'g':
711  break;
712 
713  default:
714  return false;
715  break;
716  }
717  }
718 
719  return true;
720  }
721  else
722  return false;
723  }
724 
725  class
727  {
728  public:
729 
730  printf_format_elt (const std::string& txt = "", int n = 0, int w = -1,
731  int p = -1, const std::string& f = "",
732  char typ = '\0', char mod = '\0')
733  : text (txt), args (n), fw (w), prec (p), flags (f),
734  type (typ), modifier (mod)
735  { }
736 
737  printf_format_elt (const printf_format_elt&) = default;
738 
739  printf_format_elt& operator = (const printf_format_elt&) = default;
740 
741  ~printf_format_elt (void) = default;
742 
743  // The C-style format string.
745 
746  // How many args do we expect to consume?
747  int args;
748 
749  // Field width.
750  int fw;
751 
752  // Precision.
753  int prec;
754 
755  // Flags -- '-', '+', ' ', '0', or '#'.
757 
758  // Type of conversion -- 'd', 'i', 'o', 'x', 'X', 'u', 'c', 's',
759  // 'f', 'e', 'E', 'g', 'G', 'p', or '%'
760  char type;
761 
762  // A length modifier -- 'h', 'l', or 'L'.
763  char modifier;
764  };
765 
766  class
768  {
769  public:
770 
771  printf_format_list (const std::string& fmt = "");
772 
773  // No copying!
774 
775  printf_format_list (const printf_format_list&) = delete;
776 
777  printf_format_list& operator = (const printf_format_list&) = delete;
778 
779  ~printf_format_list (void);
780 
781  octave_idx_type num_conversions (void) { return nconv; }
782 
783  const printf_format_elt * first (void)
784  {
785  curr_idx = 0;
786  return current ();
787  }
788 
789  const printf_format_elt * current (void) const
790  {
791  return length () > 0 ? fmt_elts[curr_idx] : nullptr;
792  }
793 
794  size_t length (void) const { return fmt_elts.size (); }
795 
796  const printf_format_elt * next (bool cycle = true)
797  {
798  curr_idx++;
799 
800  if (curr_idx >= length ())
801  {
802  if (cycle)
803  curr_idx = 0;
804  else
805  return nullptr;
806  }
807 
808  return current ();
809  }
810 
811  bool last_elt_p (void) { return (curr_idx + 1 == length ()); }
812 
813  void printme (void) const;
814 
815  bool ok (void) const { return (nconv >= 0); }
816 
817  operator bool () const { return ok (); }
818 
819  private:
820 
821  // Number of conversions specified by this format string, or -1 if
822  // invalid conversions have been found.
824 
825  // Index to current element;
826  size_t curr_idx;
827 
828  // List of format elements.
829  std::deque<printf_format_elt*> fmt_elts;
830 
831  // Temporary buffer.
832  std::ostringstream buf;
833 
834  void add_elt_to_list (int args, const std::string& flags, int fw,
835  int prec, char type, char modifier);
836 
837  void process_conversion (const std::string& s, size_t& i, size_t n,
838  int& args, std::string& flags, int& fw,
839  int& prec, char& modifier, char& type);
840 
841  void finish_conversion (const std::string& s, size_t& i, int args,
842  const std::string& flags, int fw, int prec,
843  char modifier, char& type);
844  };
845 
847  : nconv (0), curr_idx (0), fmt_elts (), buf ()
848  {
849  size_t n = s.length ();
850 
851  size_t i = 0;
852 
853  int args = 0;
854  std::string flags;
855  int fw = -1;
856  int prec = -1;
857  char modifier = '\0';
858  char type = '\0';
859 
860  bool have_more = true;
861  bool empty_buf = true;
862 
863  if (n == 0)
864  {
865  printf_format_elt *elt
866  = new printf_format_elt ("", args, fw, prec, flags, type, modifier);
867 
868  fmt_elts.push_back (elt);
869  }
870  else
871  {
872  while (i < n)
873  {
874  have_more = true;
875 
876  empty_buf = (buf.tellp () == 0);
877 
878  switch (s[i])
879  {
880  case '%':
881  {
882  if (empty_buf)
883  {
884  process_conversion (s, i, n, args, flags, fw, prec,
885  type, modifier);
886 
887  // If there is nothing in the buffer, then
888  // add_elt_to_list must have just been called, so we
889  // are already done with the current element and we
890  // don't need to call add_elt_to_list if this is our
891  // last trip through the loop.
892 
893  have_more = (buf.tellp () != 0);
894  }
895  else
896  add_elt_to_list (args, flags, fw, prec, type, modifier);
897  }
898  break;
899 
900  default:
901  {
902  args = 0;
903  flags = "";
904  fw = -1;
905  prec = -1;
906  modifier = '\0';
907  type = '\0';
908  buf << s[i++];
909  empty_buf = false;
910  }
911  break;
912  }
913 
914  if (nconv < 0)
915  {
916  have_more = false;
917  break;
918  }
919  }
920 
921  if (have_more)
922  add_elt_to_list (args, flags, fw, prec, type, modifier);
923 
924  buf.clear ();
925  buf.str ("");
926  }
927  }
928 
930  {
931  size_t n = fmt_elts.size ();
932 
933  for (size_t i = 0; i < n; i++)
934  {
935  printf_format_elt *elt = fmt_elts[i];
936  delete elt;
937  }
938  }
939 
940  void
942  int fw, int prec, char type,
943  char modifier)
944  {
945  std::string text = buf.str ();
946 
947  if (! text.empty ())
948  {
949  printf_format_elt *elt
950  = new printf_format_elt (text, args, fw, prec, flags,
951  type, modifier);
952 
953  fmt_elts.push_back (elt);
954  }
955 
956  buf.clear ();
957  buf.str ("");
958  }
959 
960  void
962  size_t n, int& args,
963  std::string& flags, int& fw,
964  int& prec, char& modifier,
965  char& type)
966  {
967  args = 0;
968  flags = "";
969  fw = -1;
970  prec = -1;
971  modifier = '\0';
972  type = '\0';
973 
974  buf << s[i++];
975 
976  bool nxt = false;
977 
978  while (i < n)
979  {
980  switch (s[i])
981  {
982  case '-': case '+': case ' ': case '0': case '#':
983  flags += s[i];
984  buf << s[i++];
985  break;
986 
987  default:
988  nxt = true;
989  break;
990  }
991 
992  if (nxt)
993  break;
994  }
995 
996  if (i < n)
997  {
998  if (s[i] == '*')
999  {
1000  fw = -2;
1001  args++;
1002  buf << s[i++];
1003  }
1004  else
1005  {
1006  if (isdigit (s[i]))
1007  {
1008  int nn = 0;
1009  std::string tmp = s.substr (i);
1010  sscanf (tmp.c_str (), "%d%n", &fw, &nn);
1011  }
1012 
1013  while (i < n && isdigit (s[i]))
1014  buf << s[i++];
1015  }
1016  }
1017 
1018  if (i < n && s[i] == '.')
1019  {
1020  // nothing before the . means 0.
1021  if (fw == -1)
1022  fw = 0;
1023 
1024  // . followed by nothing is 0.
1025  prec = 0;
1026 
1027  buf << s[i++];
1028 
1029  if (i < n)
1030  {
1031  if (s[i] == '*')
1032  {
1033  prec = -2;
1034  args++;
1035  buf << s[i++];
1036  }
1037  else
1038  {
1039  if (isdigit (s[i]))
1040  {
1041  int nn = 0;
1042  std::string tmp = s.substr (i);
1043  sscanf (tmp.c_str (), "%d%n", &prec, &nn);
1044  }
1045 
1046  while (i < n && isdigit (s[i]))
1047  buf << s[i++];
1048  }
1049  }
1050  }
1051 
1052  if (i < n)
1053  {
1054  // Accept and record modifier, but don't place it in the format
1055  // item text. All integer conversions are handled as 64-bit
1056  // integers.
1057 
1058  switch (s[i])
1059  {
1060  case 'h': case 'l': case 'L':
1061  modifier = s[i++];
1062  break;
1063 
1064  default:
1065  break;
1066  }
1067  }
1068 
1069  if (i < n)
1070  finish_conversion (s, i, args, flags, fw, prec, modifier, type);
1071  else
1072  nconv = -1;
1073  }
1074 
1075  void
1077  int args, const std::string& flags,
1078  int fw, int prec, char modifier,
1079  char& type)
1080  {
1081  switch (s[i])
1082  {
1083  case 'd': case 'i': case 'o': case 'x': case 'X':
1084  case 'u': case 'c':
1085  if (modifier == 'L')
1086  {
1087  nconv = -1;
1088  break;
1089  }
1090  goto fini;
1091 
1092  case 'f': case 'e': case 'E': case 'g': case 'G':
1093  if (modifier == 'h' || modifier == 'l')
1094  {
1095  nconv = -1;
1096  break;
1097  }
1098  goto fini;
1099 
1100  case 's': case 'p': case '%':
1101  if (modifier != '\0')
1102  {
1103  nconv = -1;
1104  break;
1105  }
1106  goto fini;
1107 
1108  fini:
1109 
1110  type = s[i];
1111 
1112  buf << s[i++];
1113 
1114  if (type != '%' || args != 0)
1115  nconv++;
1116 
1117  if (type != '%')
1118  args++;
1119 
1120  add_elt_to_list (args, flags, fw, prec, type, modifier);
1121 
1122  break;
1123 
1124  default:
1125  nconv = -1;
1126  break;
1127  }
1128  }
1129 
1130  void
1132  {
1133  size_t n = fmt_elts.size ();
1134 
1135  for (size_t i = 0; i < n; i++)
1136  {
1137  printf_format_elt *elt = fmt_elts[i];
1138 
1139  std::cerr
1140  << "args: " << elt->args << "\n"
1141  << "flags: '" << elt->flags << "'\n"
1142  << "width: " << elt->fw << "\n"
1143  << "prec: " << elt->prec << "\n"
1144  << "type: '" << elt->type << "'\n"
1145  << "modifier: '" << elt->modifier << "'\n"
1146  << "text: '" << undo_string_escapes (elt->text) << "'\n\n";
1147  }
1148  }
1149 
1150  // Calculate x^n. Used for ...e+nn so that, for example, 1e2 is
1151  // exactly 100 and 5e-1 is 1/2
1152 
1153  static double
1154  pown (double x, unsigned int n)
1155  {
1156  double retval = 1;
1157 
1158  for (unsigned int d = n; d; d >>= 1)
1159  {
1160  if (d & 1)
1161  retval *= x;
1162  x *= x;
1163  }
1164 
1165  return retval;
1166  }
1167 
1168  static Cell
1170  {
1171  Cell retval (dim_vector (1, 2));
1172 
1173  retval(0) = Cell (octave_value ("inf"));
1174  retval(1) = Cell (octave_value ("nan"));
1175 
1176  return retval;
1177  }
1178 
1179  // Delimited stream, optimized to read strings of characters separated
1180  // by single-character delimiters.
1181  //
1182  // The reason behind this class is that octstream doesn't provide
1183  // seek/tell, but the opportunity has been taken to optimise for the
1184  // textscan workload.
1185  //
1186  // The function reads chunks into a 4kiB buffer, and marks where the
1187  // last delimiter occurs. Reads up to this delimiter can be fast.
1188  // After that last delimiter, the remaining text is moved to the front
1189  // of the buffer and the buffer is refilled. This also allows cheap
1190  // seek and tell operations within a "fast read" block.
1191 
1192  class
1194  {
1195  public:
1196 
1197  delimited_stream (std::istream& is, const std::string& delimiters,
1198  int longest_lookahead, octave_idx_type bsize = 4096);
1199 
1200  delimited_stream (std::istream& is, const delimited_stream& ds);
1201 
1202  // No copying!
1203 
1204  delimited_stream (const delimited_stream&) = delete;
1205 
1206  delimited_stream& operator = (const delimited_stream&) = delete;
1207 
1208  ~delimited_stream (void);
1209 
1210  // Called when optimized sequence of get is finished. Ensures that
1211  // there is a remaining delimiter in buf, or loads more data in.
1212  void field_done (void)
1213  {
1214  if (idx >= last)
1215  refresh_buf ();
1216  }
1217 
1218  // Load new data into buffer, and set eob, last, idx.
1219  // Return EOF at end of file, 0 otherwise.
1220  int refresh_buf (void);
1221 
1222  // Get a character, relying on caller to call field_done if
1223  // a delimiter has been reached.
1224  int get (void)
1225  {
1226  if (delimited)
1227  return eof () ? std::istream::traits_type::eof () : *idx++;
1228  else
1229  return get_undelim ();
1230  }
1231 
1232  // Get a character, checking for underrun of the buffer.
1233  int get_undelim (void);
1234 
1235  // Read character that will be got by the next get.
1236  int peek (void) { return eof () ? std::istream::traits_type::eof () : *idx; }
1237 
1238  // Read character that will be got by the next get.
1239  int peek_undelim (void);
1240 
1241  // Undo a 'get' or 'get_undelim'. It is the caller's responsibility
1242  // to avoid overflow by calling putbacks only for a character got by
1243  // get() or get_undelim(), with no intervening
1244  // get, get_delim, field_done, refresh_buf, getline, read or seekg.
1245  void putback (char /*ch*/ = 0) { if (! eof ()) --idx; }
1246 
1247  int getline (std::string& dest, char delim);
1248 
1249  // int skipline (char delim);
1250 
1251  char * read (char *buffer, int size, char* &new_start);
1252 
1253  // Return a position suitable to "seekg", valid only within this
1254  // block between calls to field_done.
1255  char * tellg (void) { return idx; }
1256 
1257  void seekg (char *old_idx) { idx = old_idx; }
1258 
1259  bool eof (void)
1260  {
1261  return (eob == buf && i_stream.eof ()) || (flags & std::ios_base::eofbit);
1262  }
1263 
1264  operator const void* (void) { return (! eof () && ! flags) ? this : nullptr; }
1265 
1266  bool fail (void) { return flags & std::ios_base::failbit; }
1267 
1268  std::ios_base::iostate rdstate (void) { return flags; }
1269 
1270  void setstate (std::ios_base::iostate m) { flags = flags | m; }
1271 
1272  void clear (std::ios_base::iostate m
1273  = (std::ios_base::eofbit & ~std::ios_base::eofbit))
1274  {
1275  flags = flags & m;
1276  }
1277 
1278  // Report if any characters have been consumed.
1279  // (get, read, etc. not cancelled by putback or seekg)
1280 
1281  void progress_benchmark (void) { progress_marker = idx; }
1282 
1283  bool no_progress (void) { return progress_marker == idx; }
1284 
1285  private:
1286 
1287  // Number of characters to read from the file at once.
1288  int bufsize;
1289 
1290  // Stream to read from.
1291  std::istream& i_stream;
1292 
1293  // Temporary storage for a "chunk" of data.
1294  char *buf;
1295 
1296  // Current read pointer.
1297  char *idx;
1298 
1299  // Location of last delimiter in the buffer at buf (undefined if
1300  // delimited is false).
1301  char *last;
1302 
1303  // Position after last character in buffer.
1304  char *eob;
1305 
1306  // True if there is delimiter in the bufer after idx.
1308 
1309  // Longest lookahead required.
1310  int longest;
1311 
1312  // Sequence of single-character delimiters.
1314 
1315  // Position of start of buf in original stream.
1316  std::streampos buf_in_file;
1317 
1318  // Marker to see if a read consumes any characters.
1320 
1321  std::ios_base::iostate flags;
1322  };
1323 
1324  // Create a delimited stream, reading from is, with delimiters delims,
1325  // and allowing reading of up to tellg + longest_lookeahead. When is
1326  // is at EOF, lookahead may be padded by ASCII nuls.
1327 
1329  const std::string& delimiters,
1330  int longest_lookahead,
1331  octave_idx_type bsize)
1332  : bufsize (bsize), i_stream (is), longest (longest_lookahead),
1333  delims (delimiters),
1334  flags (std::ios::failbit & ~std::ios::failbit) // can't cast 0
1335  {
1336  buf = new char[bufsize];
1337  eob = buf + bufsize;
1338  idx = eob; // refresh_buf shouldn't try to copy old data
1339  progress_marker = idx;
1340  refresh_buf (); // load the first batch of data
1341  }
1342 
1343  // Used to create a stream from a strstream from data read from a dstr.
1345  const delimited_stream& ds)
1346  : delimited_stream (is, ds.delims, ds.longest, ds.bufsize)
1347  { }
1348 
1350  {
1351  // Seek to the correct position in i_stream.
1352  if (! eof ())
1353  {
1354  i_stream.clear ();
1355  i_stream.seekg (buf_in_file);
1356  i_stream.read (buf, idx - buf);
1357  }
1358 
1359  delete [] buf;
1360  }
1361 
1362  // Read a character from the buffer, refilling the buffer from the file
1363  // if necessary.
1364 
1365  int
1367  {
1368  int retval;
1369  if (eof ())
1370  {
1371  setstate (std::ios_base::failbit);
1372  return std::istream::traits_type::eof ();
1373  }
1374 
1375  if (idx < eob)
1376  retval = *idx++;
1377  else
1378  {
1379  refresh_buf ();
1380 
1381  if (eof ())
1382  {
1383  setstate (std::ios_base::eofbit);
1384  retval = std::istream::traits_type::eof ();
1385  }
1386  else
1387  retval = *idx++;
1388  }
1389 
1390  if (idx >= last)
1391  delimited = false;
1392 
1393  return retval;
1394  }
1395 
1396  // Return the next character to be read without incrementing the
1397  // pointer, refilling the buffer from the file if necessary.
1398 
1399  int
1401  {
1402  int retval = get_undelim ();
1403  putback ();
1404 
1405  return retval;
1406  }
1407 
1408  // Copy remaining unprocessed data to the start of the buffer and load
1409  // new data to fill it. Return EOF if the file is at EOF before
1410  // reading any data and all of the data that has been read has been
1411  // processed.
1412 
1413  int
1415  {
1416  if (eof ())
1417  return std::istream::traits_type::eof ();
1418 
1419  int retval;
1420 
1421  if (eob < idx)
1422  idx = eob;
1423 
1424  size_t old_remaining = eob - idx;
1425 
1426  octave_quit (); // allow ctrl-C
1427 
1428  if (old_remaining > 0)
1429  {
1430  buf_in_file += (idx - buf);
1431  memmove (buf, idx, old_remaining);
1432  }
1433  else
1434  buf_in_file = i_stream.tellg (); // record for destructor
1435 
1436  progress_marker -= idx - buf; // where original idx would have been
1437  idx = buf;
1438 
1439  int gcount; // chars read
1440  if (! i_stream.eof ())
1441  {
1442  i_stream.read (buf + old_remaining, bufsize - old_remaining);
1443  gcount = i_stream.gcount ();
1444  }
1445  else
1446  gcount = 0;
1447 
1448  eob = buf + old_remaining + gcount;
1449  last = eob;
1450  if (gcount == 0)
1451  {
1452  delimited = false;
1453 
1454  if (eob != buf) // no more data in file, but still some to go
1455  retval = 0;
1456  else
1457  // file and buffer are both done.
1458  retval = std::istream::traits_type::eof ();
1459  }
1460  else
1461  {
1462  delimited = true;
1463 
1464  for (last = eob - longest; last - buf >= 0; last--)
1465  {
1466  if (delims.find (*last) != std::string::npos)
1467  break;
1468  }
1469 
1470  if (last < buf)
1471  delimited = false;
1472 
1473  retval = 0;
1474  }
1475 
1476  // Ensure fast peek doesn't give valid char
1477  if (retval == std::istream::traits_type::eof ())
1478  *idx = '\0'; // FIXME: check that no TreatAsEmpty etc starts w. \0?
1479 
1480  return retval;
1481  }
1482 
1483  // Return a pointer to a block of data of size size, assuming that a
1484  // sufficiently large buffer is available in buffer, if required.
1485  // If called when delimited == true, and size is no greater than
1486  // longest_lookahead then this will not call refresh_buf, so seekg
1487  // still works. Otherwise, seekg may be invalidated.
1488 
1489  char *
1490  delimited_stream::read (char *buffer, int size, char* &prior_tell)
1491  {
1492  char *retval;
1493 
1494  if (eob - idx > size)
1495  {
1496  retval = idx;
1497  idx += size;
1498  if (idx > last)
1499  delimited = false;
1500  }
1501  else
1502  {
1503  // If there was a tellg pointing to an earlier point than the current
1504  // read position, try to keep it in the active buffer.
1505  // In the current code, prior_tell==idx for each call,
1506  // so this is not necessary, just a precaution.
1507 
1508  if (eob - prior_tell + size < bufsize)
1509  {
1510  octave_idx_type gap = idx - prior_tell;
1511  idx = prior_tell;
1512  refresh_buf ();
1513  idx += gap;
1514  }
1515  else // can't keep the tellg in range. May skip some data.
1516  {
1517  refresh_buf ();
1518  }
1519 
1520  prior_tell = buf;
1521 
1522  if (eob - idx > size)
1523  {
1524  retval = idx;
1525  idx += size;
1526  if (idx > last)
1527  delimited = false;
1528  }
1529  else
1530  {
1531  if (size <= bufsize) // small read, but reached EOF
1532  {
1533  retval = idx;
1534  memset (eob, 0, size + (idx - buf));
1535  idx += size;
1536  }
1537  else // Reading more than the whole buf; return it in buffer
1538  {
1539  retval = buffer;
1540  // FIXME: read bufsize at a time
1541  int i;
1542  for (i = 0; i < size && ! eof (); i++)
1543  *buffer++ = get_undelim ();
1544  if (eof ())
1545  memset (buffer, 0, size - i);
1546  }
1547  }
1548  }
1549 
1550  return retval;
1551  }
1552 
1553  // Return in OUT an entire line, terminated by delim. On input, OUT
1554  // must have length at least 1.
1555 
1556  int
1558  {
1559  int len = out.length ();
1560  int used = 0;
1561  int ch;
1562  while ((ch = get_undelim ()) != delim
1563  && ch != std::istream::traits_type::eof ())
1564  {
1565  out[used++] = ch;
1566  if (used == len)
1567  {
1568  len <<= 1;
1569  out.resize (len);
1570  }
1571  }
1572  out.resize (used);
1573  field_done ();
1574 
1575  return ch;
1576  }
1577 
1578  // A single conversion specifier, such as %f or %c.
1579 
1580  class
1582  {
1583  public:
1584 
1586  {
1587  whitespace_conversion = 1,
1588  literal_conversion = 2
1589  };
1590 
1591  textscan_format_elt (const std::string& txt, int w = 0, int p = -1,
1592  int bw = 0, bool dis = false, char typ = '\0',
1593  const std::string& ch_class = std::string ())
1594  : text (txt), width (w), prec (p), bitwidth (bw),
1595  char_class (ch_class), type (typ), discard (dis),
1596  numeric (typ == 'd' || typ == 'u' || type == 'f' || type == 'n')
1597  { }
1598 
1600  : text (e.text), width (e.width), prec (e.prec),
1601  bitwidth (e.bitwidth), char_class (e.char_class), type (e.type),
1602  discard (e.discard), numeric (e.numeric)
1603  { }
1604 
1606  {
1607  if (this != &e)
1608  {
1609  text = e.text;
1610  width = e.width;
1611  prec = e.prec;
1612  bitwidth = e.bitwidth;
1613  discard = e.discard;
1614  type = e.type;
1615  numeric = e.numeric;
1616  char_class = e.char_class;
1617  }
1618 
1619  return *this;
1620  }
1621 
1622  // The C-style format string.
1624 
1625  // The maximum field width.
1626  unsigned int width;
1627 
1628  // The maximum number of digits to read after the decimal in a
1629  // floating point conversion.
1630  int prec;
1631 
1632  // The size of the result. For integers, bitwidth may be 8, 16, 34,
1633  // or 64. For floating point values, bitwidth may be 32 or 64.
1635 
1636  // The class of characters in a `[' or `^' format.
1638 
1639  // Type of conversion
1640  // -- `d', `u', `f', `n', `s', `q', `c', `%', `C', `D', `[' or `^'.
1641  char type;
1642 
1643  // TRUE if we are not storing the result of this conversion.
1644  bool discard;
1645 
1646  // TRUE if the type is 'd', 'u', 'f', 'n'
1647  bool numeric;
1648  };
1649 
1650  // The (parsed) sequence of format specifiers.
1651 
1652  class textscan;
1653 
1654  class
1656  {
1657  public:
1658 
1659  textscan_format_list (const std::string& fmt = std::string (),
1660  const std::string& who = "textscan");
1661  // No copying!
1662 
1663  textscan_format_list (const textscan_format_list&) = delete;
1664 
1665  textscan_format_list& operator = (const textscan_format_list&) = delete;
1666 
1667  ~textscan_format_list (void);
1668 
1669  octave_idx_type num_conversions (void) const { return nconv; }
1670 
1671  // The length can be different than the number of conversions.
1672  // For example, "x %d y %d z" has 2 conversions but the length of
1673  // the list is 3 because of the characters that appear after the
1674  // last conversion.
1675 
1676  size_t numel (void) const { return fmt_elts.size (); }
1677 
1679  {
1680  curr_idx = 0;
1681  return current ();
1682  }
1683 
1684  const textscan_format_elt * current (void) const
1685  {
1686  return numel () > 0 ? fmt_elts[curr_idx] : nullptr;
1687  }
1688 
1689  const textscan_format_elt * next (bool cycle = true)
1690  {
1691  curr_idx++;
1692 
1693  if (curr_idx >= numel ())
1694  {
1695  if (cycle)
1696  curr_idx = 0;
1697  else
1698  return nullptr;
1699  }
1700 
1701  return current ();
1702  }
1703 
1704  void printme (void) const;
1705 
1706  bool ok (void) const { return (nconv >= 0); }
1707 
1708  operator const void* (void) const { return ok () ? this : nullptr; }
1709 
1710  // What function name should be shown when reporting errors.
1712 
1713  // True if number of %f to be set from data file.
1715 
1716  // At least one conversion specifier is s,q,c, or [...].
1718 
1719  int read_first_row (delimited_stream& is, textscan& ts);
1720 
1721  std::list<octave_value> out_buf (void) const { return (output_container); }
1722 
1723  private:
1724 
1725  // Number of conversions specified by this format string, or -1 if
1726  // invalid conversions have been found.
1728 
1729  // Index to current element;
1730  size_t curr_idx;
1731 
1732  // List of format elements.
1733  std::deque<textscan_format_elt*> fmt_elts;
1734 
1735  // list holding column arrays of types specified by conversions
1736  std::list<octave_value> output_container;
1737 
1738  // Temporary buffer.
1739  std::ostringstream buf;
1740 
1741  void add_elt_to_list (unsigned int width, int prec, int bitwidth,
1742  octave_value val_type, bool discard,
1743  char type,
1744  const std::string& char_class = std::string ());
1745 
1746  void process_conversion (const std::string& s, size_t& i, size_t n);
1747 
1748  std::string parse_char_class (const std::string& pattern) const;
1749 
1750  int finish_conversion (const std::string& s, size_t& i, size_t n,
1751  unsigned int& width, int& prec, int& bitwidth,
1752  octave_value& val_type,
1753  bool discard, char& type);
1754  };
1755 
1756  // Main class to implement textscan. Read data and parse it
1757  // according to a format.
1758  //
1759  // The calling sequence is
1760  //
1761  // textscan scanner ();
1762  // scanner.scan (...);
1763 
1764  class
1765  OCTINTERP_API
1766  textscan
1767  {
1768  public:
1769 
1770  textscan (const std::string& who_arg = "textscan");
1771 
1772  // No copying!
1773 
1774  textscan (const textscan&) = delete;
1775 
1776  textscan& operator = (const textscan&) = delete;
1777 
1778  ~textscan (void) = default;
1779 
1780  octave_value scan (std::istream& isp, const std::string& fmt,
1781  octave_idx_type ntimes,
1782  const octave_value_list& options,
1783  octave_idx_type& read_count);
1784 
1785  private:
1786 
1787  friend class textscan_format_list;
1788 
1789  // What function name should be shown when reporting errors.
1791 
1793 
1794  // Three cases for delim_table and delim_list
1795  // 1. delim_table empty, delim_list empty: whitespace delimiters
1796  // 2. delim_table = look-up table of delim chars, delim_list empty.
1797  // 3. delim_table non-empty, delim_list = Cell array of delim strings
1798 
1800 
1801  // delim_table[i] == '\0' if i is not a delimiter.
1803 
1804  // String of delimiter characters.
1806 
1808 
1809  // How far ahead to look to detect an open comment.
1811 
1812  // First character of open comment.
1814 
1816 
1818 
1819  // 'inf' and 'nan' for formatted_double.
1821 
1822  // Array of strings of delimiters.
1824 
1825  // Longest delimiter.
1827 
1832 
1833  // Longest string to treat as "N/A".
1835 
1837 
1838  short eol1;
1839  short eol2;
1841 
1845 
1847 
1848  octave_value do_scan (std::istream& isp, textscan_format_list& fmt_list,
1849  octave_idx_type ntimes);
1850 
1851  void parse_options (const octave_value_list& args,
1852  textscan_format_list& fmt_list);
1853 
1854  int read_format_once (delimited_stream& isp, textscan_format_list& fmt_list,
1855  std::list<octave_value>& retval,
1856  Array<octave_idx_type> row, int& done_after);
1857 
1858  void scan_one (delimited_stream& is, const textscan_format_elt& fmt,
1860 
1861  // Methods to process a particular conversion specifier.
1862  double read_double (delimited_stream& is,
1863  const textscan_format_elt& fmt) const;
1864 
1865  void scan_complex (delimited_stream& is, const textscan_format_elt& fmt,
1866  Complex& val) const;
1867 
1868  int scan_bracket (delimited_stream& is, const std::string& pattern,
1869  std::string& val) const;
1870 
1871  int scan_caret (delimited_stream& is, const std::string& pattern,
1872  std::string& val) const;
1873 
1874  void scan_string (delimited_stream& is, const textscan_format_elt& fmt,
1875  std::string& val) const;
1876 
1877  void scan_cstring (delimited_stream& is, const textscan_format_elt& fmt,
1878  std::string& val) const;
1879 
1880  void scan_qstring (delimited_stream& is, const textscan_format_elt& fmt,
1881  std::string& val);
1882 
1883  // Helper methods.
1884  std::string read_until (delimited_stream& is, const Cell& delimiters,
1885  const std::string& ends) const;
1886 
1887  int lookahead (delimited_stream& is, const Cell& targets, int max_len,
1888  bool case_sensitive = true) const;
1889 
1890  bool match_literal (delimited_stream& isp, const textscan_format_elt& elem);
1891 
1892  int skip_whitespace (delimited_stream& is, bool EOLstop = false);
1893 
1894  int skip_delim (delimited_stream& is);
1895 
1896  bool is_delim (unsigned char ch) const
1897  {
1898  return ((delim_table.empty () && (isspace (ch) || ch == eol1 || ch == eol2))
1899  || delim_table[ch] != '\0');
1900  }
1901 
1902  bool isspace (unsigned int ch) const { return whitespace_table[ch & 0xff]; }
1903 
1904  // True if the only delimiter is whitespace.
1905  bool whitespace_delim (void) const { return delim_table.empty (); }
1906  };
1907 
1909  const std::string& who_arg)
1910  : who (who_arg), set_from_first (false), has_string (false),
1911  nconv (0), curr_idx (0), fmt_elts (), buf ()
1912  {
1913  size_t n = s.length ();
1914 
1915  size_t i = 0;
1916 
1917  unsigned int width = -1; // Unspecified width = max (except %c)
1918  int prec = -1;
1919  int bitwidth = 0;
1920  bool discard = false;
1921  char type = '\0';
1922 
1923  bool have_more = true;
1924 
1925  if (s.empty ())
1926  {
1927  buf.clear ();
1928  buf.str ("");
1929 
1930  buf << "%f";
1931 
1932  bitwidth = 64;
1933  type = 'f';
1934  add_elt_to_list (width, prec, bitwidth, octave_value (NDArray ()),
1935  discard, type);
1936  have_more = false;
1937  set_from_first = true;
1938  nconv = 1;
1939  }
1940  else
1941  {
1942  set_from_first = false;
1943 
1944  while (i < n)
1945  {
1946  have_more = true;
1947 
1948  if (s[i] == '%' && (i+1 == n || s[i+1] != '%'))
1949  {
1950  // Process percent-escape conversion type.
1951 
1952  process_conversion (s, i, n);
1953 
1954  // If there is nothing in the buffer, then add_elt_to_list
1955  // must have just been called, so we are already done with
1956  // the current element and we don't need to call
1957  // add_elt_to_list if this is our last trip through the
1958  // loop.
1959 
1960  have_more = (buf.tellp () != 0);
1961  }
1962  else if (isspace (s[i]))
1963  {
1964  while (++i < n && isspace (s[i]))
1965  /* skip whitespace */;
1966 
1967  have_more = false;
1968  }
1969  else
1970  {
1972 
1973  width = 0;
1974  prec = -1;
1975  bitwidth = 0;
1976  discard = true;
1977 
1978  while (i < n && ! isspace (s[i])
1979  && (s[i] != '%' || (i+1 < n && s[i+1] == '%')))
1980  {
1981  if (s[i] == '%') // if double %, skip one
1982  i++;
1983  buf << s[i++];
1984  width++;
1985  }
1986 
1987  add_elt_to_list (width, prec, bitwidth, octave_value (),
1988  discard, type);
1989 
1990  have_more = false;
1991  }
1992 
1993  if (nconv < 0)
1994  {
1995  have_more = false;
1996  break;
1997  }
1998  }
1999  }
2000 
2001  if (have_more)
2002  add_elt_to_list (width, prec, bitwidth, octave_value (), discard, type);
2003 
2004  buf.clear ();
2005  buf.str ("");
2006  }
2007 
2009  {
2010  size_t n = numel ();
2011 
2012  for (size_t i = 0; i < n; i++)
2013  {
2014  textscan_format_elt *elt = fmt_elts[i];
2015  delete elt;
2016  }
2017  }
2018 
2019  void
2020  textscan_format_list::add_elt_to_list (unsigned int width, int prec,
2021  int bitwidth, octave_value val_type,
2022  bool discard, char type,
2023  const std::string& char_class)
2024  {
2025  std::string text = buf.str ();
2026 
2027  if (! text.empty ())
2028  {
2029  textscan_format_elt *elt
2030  = new textscan_format_elt (text, width, prec, bitwidth, discard, type,
2031  char_class);
2032 
2033  if (! discard)
2034  output_container.push_back (val_type);
2035 
2036  fmt_elts.push_back (elt);
2037  }
2038 
2039  buf.clear ();
2040  buf.str ("");
2041  }
2042 
2043  void
2045  size_t n)
2046  {
2047  unsigned width = 0;
2048  int prec = -1;
2049  int bitwidth = 0;
2050  bool discard = false;
2051  octave_value val_type;
2052  char type = '\0';
2053 
2054  buf << s[i++];
2055 
2056  bool have_width = false;
2057 
2058  while (i < n)
2059  {
2060  switch (s[i])
2061  {
2062  case '*':
2063  if (discard)
2064  nconv = -1;
2065  else
2066  {
2067  discard = true;
2068  buf << s[i++];
2069  }
2070  break;
2071 
2072  case '0': case '1': case '2': case '3': case '4':
2073  case '5': case '6': case '7': case '8': case '9':
2074  if (have_width)
2075  nconv = -1;
2076  else
2077  {
2078  char c = s[i++];
2079  width = width * 10 + c - '0';
2080  have_width = true;
2081  buf << c;
2082  while (i < n && isdigit (s[i]))
2083  {
2084  c = s[i++];
2085  width = width * 10 + c - '0';
2086  buf << c;
2087  }
2088 
2089  if (i < n && s[i] == '.')
2090  {
2091  buf << s[i++];
2092  prec = 0;
2093  while (i < n && isdigit (s[i]))
2094  {
2095  c = s[i++];
2096  prec = prec * 10 + c - '0';
2097  buf << c;
2098  }
2099  }
2100  }
2101  break;
2102 
2103  case 'd': case 'u':
2104  {
2105  bool done = true;
2106  buf << (type = s[i++]);
2107  if (i < n)
2108  {
2109  if (s[i] == '8')
2110  {
2111  bitwidth = 8;
2112  if (type == 'd')
2113  val_type = octave_value (int8NDArray ());
2114  else
2115  val_type = octave_value (uint8NDArray ());
2116  buf << s[i++];
2117  }
2118  else if (s[i] == '1' && i+1 < n && s[i+1] == '6')
2119  {
2120  bitwidth = 16;
2121  if (type == 'd')
2122  val_type = octave_value (int16NDArray ());
2123  else
2124  val_type = octave_value (uint16NDArray ());
2125  buf << s[i++];
2126  buf << s[i++];
2127  }
2128  else if (s[i] == '3' && i+1 < n && s[i+1] == '2')
2129  {
2130  done = false; // use default size below
2131  buf << s[i++];
2132  buf << s[i++];
2133  }
2134  else if (s[i] == '6' && i+1 < n && s[i+1] == '4')
2135  {
2136  bitwidth = 64;
2137  if (type == 'd')
2138  val_type = octave_value (int64NDArray ());
2139  else
2140  val_type = octave_value (uint64NDArray ());
2141  buf << s[i++];
2142  buf << s[i++];
2143  }
2144  else
2145  done = false;
2146  }
2147  else
2148  done = false;
2149 
2150  if (! done)
2151  {
2152  bitwidth = 32;
2153  if (type == 'd')
2154  val_type = octave_value (int32NDArray ());
2155  else
2156  val_type = octave_value (uint32NDArray ());
2157  }
2158  goto fini;
2159  }
2160 
2161  case 'f':
2162  buf << (type = s[i++]);
2163  bitwidth = 64;
2164  if (i < n)
2165  {
2166  if (s[i] == '3' && i+1 < n && s[i+1] == '2')
2167  {
2168  bitwidth = 32;
2169  val_type = octave_value (FloatNDArray ());
2170  buf << s[i++];
2171  buf << s[i++];
2172  }
2173  else if (s[i] == '6' && i+1 < n && s[i+1] == '4')
2174  {
2175  val_type = octave_value (NDArray ());
2176  buf << s[i++];
2177  buf << s[i++];
2178  }
2179  else
2180  val_type = octave_value (NDArray ());
2181  }
2182  else
2183  val_type = octave_value (NDArray ());
2184  goto fini;
2185 
2186  case 'n':
2187  buf << (type = s[i++]);
2188  bitwidth = 64;
2189  val_type = octave_value (NDArray ());
2190  goto fini;
2191 
2192  case 's': case 'q': case '[': case 'c':
2193  if (! discard)
2194  val_type = octave_value (Cell ());
2195  buf << (type = s[i++]);
2196  has_string = true;
2197  goto fini;
2198 
2199  fini:
2200  {
2201  if (! have_width)
2202  {
2203  if (type == 'c') // %c defaults to one character
2204  width = 1;
2205  else
2206  width = static_cast<unsigned int> (-1); // others: unlimited
2207  }
2208 
2209  if (finish_conversion (s, i, n, width, prec, bitwidth, val_type,
2210  discard, type) == 0)
2211  return;
2212  }
2213  break;
2214 
2215  default:
2216  error ("%s: '%%%c' is not a valid format specifier",
2217  who.c_str (), s[i]);
2218  }
2219 
2220  if (nconv < 0)
2221  break;
2222  }
2223 
2224  nconv = -1;
2225  }
2226 
2227  // Parse [...] and [^...]
2228  //
2229  // Matlab does not expand expressions like A-Z, but they are useful, and
2230  // so we parse them "carefully". We treat '-' as a usual character
2231  // unless both start and end characters are from the same class (upper
2232  // case, lower case, numeric), or this is not the first '-' in the
2233  // pattern.
2234  //
2235  // Keep both a running list of characters and a mask of which chars have
2236  // occurred. The first is efficient for patterns with few characters.
2237  // The latter is efficient for [^...] patterns.
2238 
2239  std::string
2241  {
2242  int len = pattern.length ();
2243  if (len == 0)
2244  return "";
2245 
2246  std::string retval (256, '\0');
2247  std::string mask (256, '\0'); // number of times chr has been seen
2248 
2249  int in = 0, out = 0;
2250  unsigned char ch, prev = 0;
2251  bool flip = false;
2252 
2253  ch = pattern[in];
2254  if (ch == '^')
2255  {
2256  in++;
2257  flip = true;
2258  }
2259  mask[pattern[in]] = '\1';
2260  retval[out++] = pattern[in++]; // even copy ']' if it is first
2261 
2262  bool prev_was_range = false; // disallow "a-m-z" as a pattern
2263  bool prev_prev_was_range = false;
2264  for (; in < len; in++)
2265  {
2266  bool was_range = false;
2267  ch = pattern[in];
2268  if (ch == ']')
2269  break;
2270 
2271  if (prev == '-' && in > 1 && isalnum (ch) && ! prev_prev_was_range)
2272  {
2273  unsigned char start_of_range = pattern[in-2];
2274  if (start_of_range < ch
2275  && ((isupper (ch) && isupper (start_of_range))
2276  || (islower (ch) && islower (start_of_range))
2277  || (isdigit (ch) && isdigit (start_of_range))
2278  || mask['-'] > 1)) // not the first '-'
2279  {
2280  was_range = true;
2281  out--;
2282  mask['-']--;
2283  for (int i = start_of_range; i <= ch; i++)
2284  {
2285  if (mask[i] == '\0')
2286  {
2287  mask[i] = '\1';
2288  retval[out++] = i;
2289  }
2290  }
2291  }
2292  }
2293  if (! was_range)
2294  {
2295  if (mask[ch]++ == 0)
2296  retval[out++] = ch;
2297  else if (ch != '-')
2298  warning_with_id ("octave:textscan-pattern",
2299  "%s: [...] contains two '%c's",
2300  who.c_str (), ch);
2301 
2302  if (prev == '-' && mask['-'] >= 2)
2304  ("octave:textscan-pattern",
2305  "%s: [...] contains two '-'s outside range expressions",
2306  who.c_str ());
2307  }
2308  prev = ch;
2309  prev_prev_was_range = prev_was_range;
2310  prev_was_range = was_range;
2311  }
2312 
2313  if (flip) // [^...]
2314  {
2315  out = 0;
2316  for (int i = 0; i < 256; i++)
2317  if (! mask[i])
2318  retval[out++] = i;
2319  }
2320 
2321  retval.resize (out);
2322 
2323  return retval;
2324  }
2325 
2326  int
2328  size_t n, unsigned int& width,
2329  int& prec, int& bitwidth,
2330  octave_value& val_type, bool discard,
2331  char& type)
2332  {
2333  int retval = 0;
2334 
2335  std::string char_class;
2336 
2337  size_t beg_idx = std::string::npos;
2338  size_t end_idx = std::string::npos;
2339 
2340  if (type != '%')
2341  {
2342  nconv++;
2343  if (type == '[')
2344  {
2345  if (i < n)
2346  {
2347  beg_idx = i;
2348 
2349  if (s[i] == '^')
2350  {
2351  type = '^';
2352  buf << s[i++];
2353 
2354  if (i < n)
2355  {
2356  beg_idx = i;
2357 
2358  if (s[i] == ']')
2359  buf << s[i++];
2360  }
2361  }
2362  else if (s[i] == ']')
2363  buf << s[i++];
2364  }
2365 
2366  while (i < n && s[i] != ']')
2367  buf << s[i++];
2368 
2369  if (i < n && s[i] == ']')
2370  {
2371  end_idx = i-1;
2372  buf << s[i++];
2373  }
2374 
2375  if (s[i-1] != ']')
2376  retval = nconv = -1;
2377  }
2378  }
2379 
2380  if (nconv >= 0)
2381  {
2382  if (beg_idx != std::string::npos && end_idx != std::string::npos)
2383  char_class = parse_char_class (s.substr (beg_idx,
2384  end_idx - beg_idx + 1));
2385 
2386  add_elt_to_list (width, prec, bitwidth, val_type, discard, type,
2387  char_class);
2388  }
2389 
2390  return retval;
2391  }
2392 
2393  void
2395  {
2396  size_t n = numel ();
2397 
2398  for (size_t i = 0; i < n; i++)
2399  {
2400  textscan_format_elt *elt = fmt_elts[i];
2401 
2402  std::cerr
2403  << "width: " << elt->width << "\n"
2404  << "digits " << elt->prec << "\n"
2405  << "bitwidth: " << elt->bitwidth << "\n"
2406  << "discard: " << elt->discard << "\n"
2407  << "type: ";
2408 
2410  std::cerr << "literal text\n";
2412  std::cerr << "whitespace\n";
2413  else
2414  std::cerr << elt->type << "\n";
2415 
2416  std::cerr
2417  << "char_class: `" << undo_string_escapes (elt->char_class) << "'\n"
2418  << "text: `" << undo_string_escapes (elt->text) << "'\n\n";
2419  }
2420  }
2421 
2422  // If FORMAT is explicitly "", it is assumed to be "%f" repeated enough
2423  // times to read the first row of the file. Set it now.
2424 
2425  int
2427  {
2428  // Read first line and strip end-of-line, which may be two characters
2429  std::string first_line (20, ' ');
2430 
2431  is.getline (first_line, static_cast<char> (ts.eol2));
2432 
2433  if (! first_line.empty () && first_line.back () == ts.eol1)
2434  first_line.pop_back ();
2435 
2436  std::istringstream strstr (first_line);
2437  delimited_stream ds (strstr, is);
2438 
2439  dim_vector dv (1,1); // initial size of each output_container
2440  Complex val;
2441  octave_value val_type;
2442  nconv = 0;
2443  int max_empty = 1000; // failsafe, if ds fails but not with eof
2444  int retval = 0;
2445 
2446  // read line, creating output_container as we go
2447  while (! ds.eof ())
2448  {
2449  bool already_skipped_delim = false;
2450  ts.skip_whitespace (ds);
2451  ds.progress_benchmark ();
2452  bool progress = false;
2453  ts.scan_complex (ds, *fmt_elts[0], val);
2454  if (ds.fail ())
2455  {
2456  ds.clear (ds.rdstate () & ~std::ios::failbit);
2457 
2458  if (ds.eof ())
2459  break;
2460 
2461  // Unless this was a missing value (i.e., followed by a delimiter),
2462  // return with an error status.
2463  ts.skip_delim (ds);
2464  if (ds.no_progress ())
2465  {
2466  retval = 4;
2467  break;
2468  }
2469  already_skipped_delim = true;
2470 
2471  val = ts.empty_value.scalar_value ();
2472 
2473  if (! --max_empty)
2474  break;
2475  }
2476 
2477  if (val.imag () == 0)
2478  val_type = octave_value (NDArray (dv, val.real ()));
2479  else
2480  val_type = octave_value (ComplexNDArray (dv, val));
2481 
2482  output_container.push_back (val_type);
2483 
2484  if (! already_skipped_delim)
2485  ts.skip_delim (ds);
2486 
2487  if (! progress && ds.no_progress ())
2488  break;
2489 
2490  nconv++;
2491  }
2492 
2493  output_container.pop_front (); // discard empty element from constructor
2494 
2495  // Create fmt_list now that the size is known
2496  for (octave_idx_type i = 1; i < nconv; i++)
2497  fmt_elts.push_back (new textscan_format_elt (*fmt_elts[0]));
2498 
2499  return retval; // May have returned 4 above.
2500  }
2501 
2503  : who (who_arg), buf (), whitespace_table (), delim_table (),
2504  delims (), comment_style (), comment_len (0), comment_char (-2),
2505  buffer_size (0), date_locale (), inf_nan (init_inf_nan ()),
2506  empty_value (numeric_limits<double>::NaN ()), exp_chars ("edED"),
2507  header_lines (0), treat_as_empty (), treat_as_empty_len (0),
2508  whitespace (" \b\t"), eol1 ('\r'), eol2 ('\n'),
2509  return_on_error (1), collect_output (false),
2510  multiple_delims_as_one (false), default_exp (true), lines (0)
2511  { }
2512 
2513  octave_value
2514  textscan::scan (std::istream& isp, const std::string& fmt,
2515  octave_idx_type ntimes, const octave_value_list& options,
2516  octave_idx_type& count)
2517  {
2518  textscan_format_list fmt_list (fmt);
2519 
2520  parse_options (options, fmt_list);
2521 
2522  octave_value result = do_scan (isp, fmt_list, ntimes);
2523 
2524  // FIXME: this is probably not the best way to get count. The
2525  // position could easily be larger than octave_idx_type when using
2526  // 32-bit indexing.
2527 
2528  std::ios::iostate state = isp.rdstate ();
2529  isp.clear ();
2530  count = static_cast<octave_idx_type> (isp.tellg ());
2531  isp.setstate (state);
2532 
2533  return result;
2534  }
2535 
2536  octave_value
2537  textscan::do_scan (std::istream& isp, textscan_format_list& fmt_list,
2538  octave_idx_type ntimes)
2539  {
2541 
2542  if (fmt_list.num_conversions () == -1)
2543  error ("%s: invalid format specified", who.c_str ());
2544 
2545  if (fmt_list.num_conversions () == 0)
2546  error ("%s: no valid format conversion specifiers", who.c_str ());
2547 
2548  // skip the first header_lines
2549  std::string dummy;
2550  for (int i = 0; i < header_lines && isp; i++)
2551  getline (isp, dummy, static_cast<char> (eol2));
2552 
2553  // Create our own buffered stream, for fast get/putback/tell/seek.
2554 
2555  // First, see how far ahead it should let us look.
2556  int max_lookahead = std::max (std::max (comment_len, treat_as_empty_len),
2557  std::max (delim_len, 3)); // 3 for NaN and Inf
2558 
2559  // Next, choose a buffer size to avoid reading too much, or too often.
2560  octave_idx_type buf_size = 4096;
2561  if (buffer_size)
2562  buf_size = buffer_size;
2563  else if (ntimes > 0)
2564  {
2565  // Avoid overflow of 80*ntimes...
2566  buf_size = std::min (buf_size, std::max (ntimes, 80 * ntimes));
2567  buf_size = std::max (buf_size, ntimes);
2568  }
2569  // Finally, create the stream.
2570  delimited_stream is (isp,
2571  (delim_table.empty () ? whitespace + "\r\n" : delims),
2572  max_lookahead, buf_size);
2573 
2574  // Grow retval dynamically. "size" is half the initial size
2575  // (FIXME: Should we start smaller if ntimes is large?)
2576  octave_idx_type size = ((ntimes < 8 && ntimes >= 0) ? ntimes : 1);
2577  Array<octave_idx_type> row_idx (dim_vector (1,2));
2578  row_idx(1) = 0;
2579 
2580  int err = 0;
2581  octave_idx_type row = 0;
2582 
2583  if (multiple_delims_as_one) // bug #44750?
2584  skip_delim (is);
2585 
2586  int done_after; // Number of columns read when EOF seen.
2587 
2588  // If FORMAT explicitly "", read first line and see how many "%f" match
2589  if (fmt_list.set_from_first)
2590  {
2591  err = fmt_list.read_first_row (is, *this);
2592  lines = 1;
2593 
2594  done_after = fmt_list.numel () + 1;
2595  if (! err)
2596  row = 1; // the above puts the first line into fmt_list.out_buf ()
2597  }
2598  else
2599  done_after = fmt_list.out_buf ().size () + 1;
2600 
2601  std::list<octave_value> out = fmt_list.out_buf ();
2602 
2603  // We will later merge adjacent columns of the same type.
2604  // Check now which columns to merge.
2605  // Reals may become complex, and so we can't trust types
2606  // after reading in data.
2607  // If the format was "", that conversion may already have happened,
2608  // so force all to be merged (as all are %f).
2609  bool merge_with_prev[fmt_list.numel ()];
2610  int conv = 0;
2611  if (collect_output)
2612  {
2613  int prev_type = -1;
2614  for (const auto& col : out)
2615  {
2616  if (col.type_id () == prev_type
2617  || (fmt_list.set_from_first && prev_type != -1))
2618  merge_with_prev[conv++] = true;
2619  else
2620  merge_with_prev[conv++] = false;
2621 
2622  prev_type = col.type_id ();
2623  }
2624  }
2625 
2626  // This should be caught by earlier code, but this avoids a possible
2627  // infinite loop below.
2628  if (fmt_list.num_conversions () == 0)
2629  error ("%s: No conversions specified", who.c_str ());
2630 
2631  // Read the data. This is the main loop.
2632  if (! err)
2633  {
2634  for (/* row set ~30 lines above */; row < ntimes || ntimes == -1; row++)
2635  {
2636  if (row == 0 || row >= size)
2637  {
2638  size += size+1;
2639  for (auto& col : out)
2640  col = col.resize (dim_vector (size, 1), 0);
2641  }
2642 
2643  row_idx(0) = row;
2644  err = read_format_once (is, fmt_list, out, row_idx, done_after);
2645 
2646  if ((err & ~1) > 0 || ! is || (lines >= ntimes && ntimes > -1))
2647  break;
2648  }
2649  }
2650 
2651  if ((err & 4) && ! return_on_error)
2652  error ("%s: Read error in field %d of row %d", who.c_str (),
2653  done_after + 1, row + 1);
2654 
2655  // If file does not end in EOL, do not pad columns with NaN.
2656  bool uneven_columns = false;
2657  if (err & 4)
2658  uneven_columns = true;
2659  else if (isp.eof ())
2660  {
2661  isp.clear ();
2662  isp.seekg (-1, std::ios_base::end);
2663  int last_char = isp.get ();
2664  isp.setstate (isp.eofbit);
2665  uneven_columns = (last_char != eol1 && last_char != eol2);
2666  }
2667 
2668  // convert return value to Cell array
2670 
2671  // (err & 1) means "error, and no columns read this row
2672  // FIXME: This may redundant now that done_after=0 says the same
2673  if (err & 1)
2674  done_after = out.size () + 1;
2675 
2676  int valid_rows = (row == ntimes
2677  ? ntimes
2678  : ((err & 1) && (err & 8)) ? row : row+1);
2679  dim_vector dv (valid_rows, 1);
2680 
2681  ra_idx(0) = 0;
2682  int i = 0;
2683  if (! collect_output)
2684  {
2685  retval = Cell (dim_vector (1, out.size ()));
2686  for (auto& col : out)
2687  {
2688  // trim last columns if that was requested
2689  if (i == done_after && uneven_columns)
2690  dv = dim_vector (std::max (valid_rows - 1, 0), 1);
2691 
2692  ra_idx(1) = i;
2693  retval = do_cat_op (retval, octave_value (Cell (col.resize (dv,0))),
2694  ra_idx);
2695  i++;
2696  }
2697  }
2698  else // group adjacent cells of the same type into a single cell
2699  {
2700  octave_value cur; // current cell, accumulating columns
2701  octave_idx_type group_size = 0; // columns in this cell
2702  int prev_type = -1;
2703 
2704  conv = 0;
2705  retval = Cell ();
2706  for (auto& col : out)
2707  {
2708  if (! merge_with_prev[conv++]) // including first time
2709  {
2710  if (prev_type != -1)
2711  {
2712  ra_idx(1) = i++;
2713  retval = do_cat_op (retval, octave_value (Cell (cur)),
2714  ra_idx);
2715  }
2716  cur = octave_value (col.resize (dv,0));
2717  group_size = 1;
2718  prev_type = col.type_id ();
2719  }
2720  else
2721  {
2722  ra_idx(1) = group_size++;
2723  cur = do_cat_op (cur, octave_value (col.resize (dv,0)),
2724  ra_idx);
2725  }
2726  }
2727  ra_idx(1) = i;
2729  }
2730 
2731  return retval;
2732  }
2733 
2734  // Read a double considering the "precision" field of FMT and the
2735  // EXP_CHARS option of OPTIONS.
2736 
2737  double
2739  const textscan_format_elt& fmt) const
2740  {
2741  int sign = 1;
2742  unsigned int width_left = fmt.width;
2743  double retval = 0;
2744  bool valid = false; // syntactically correct double?
2745 
2746  int ch = is.peek ();
2747 
2748  if (ch == '+')
2749  {
2750  is.get ();
2751  ch = is.peek ();
2752  if (width_left)
2753  width_left--;
2754  }
2755  else if (ch == '-')
2756  {
2757  sign = -1;
2758  is.get ();
2759  ch = is.peek ();
2760  if (width_left)
2761  width_left--;
2762  }
2763 
2764  // Read integer part
2765  if (ch != '.')
2766  {
2767  if (ch >= '0' && ch <= '9') // valid if at least one digit
2768  valid = true;
2769  while (width_left-- && is && (ch = is.get ()) >= '0' && ch <= '9')
2770  retval = retval * 10 + (ch - '0');
2771  width_left++;
2772  }
2773 
2774  // Read fractional part, up to specified precision
2775  if (ch == '.' && width_left)
2776  {
2777  double multiplier = 1;
2778  int precision = fmt.prec;
2779  int i;
2780 
2781  if (width_left)
2782  width_left--; // Consider width of '.'
2783 
2784  if (precision == -1)
2785  precision = 1<<30; // FIXME: Should be MAXINT
2786 
2787  if (! valid) // if there was nothing before '.'...
2788  is.get (); // ...ch was a "peek", not "get".
2789 
2790  for (i = 0; i < precision; i++)
2791  {
2792  if (width_left-- && is && (ch = is.get ()) >= '0' && ch <= '9')
2793  retval += (ch - '0') * (multiplier *= 0.1);
2794  else
2795  {
2796  width_left++;
2797  break;
2798  }
2799  }
2800 
2801  // round up if we truncated and the next digit is >= 5
2802  if ((i == precision || ! width_left) && (ch = is.get ()) >= '5'
2803  && ch <= '9')
2804  retval += multiplier;
2805 
2806  if (i > 0)
2807  valid = true; // valid if at least one digit after '.'
2808 
2809  // skip remainder after '.', to field width, to look for exponent
2810  if (i == precision)
2811  while (width_left-- && is && (ch = is.get ()) >= '0' && ch <= '9')
2812  ; // discard
2813 
2814  width_left++;
2815  }
2816 
2817  // look for exponent part in, e.g., 6.023E+23
2818  bool used_exp = false;
2819  if (valid && width_left > 1 && exp_chars.find (ch) != std::string::npos)
2820  {
2821  int ch1 = is.peek ();
2822  if (ch1 == '-' || ch1 == '+' || (ch1 >= '0' && ch1 <= '9'))
2823  {
2824  // if 1.0e+$ or some such, this will set failbit, as we want
2825  width_left--; // count "E"
2826  int exp = 0;
2827  int exp_sign = 1;
2828  if (ch1 == '+')
2829  {
2830  if (width_left)
2831  width_left--;
2832  is.get ();
2833  }
2834  else if (ch1 == '-')
2835  {
2836  exp_sign = -1;
2837  is.get ();
2838  if (width_left)
2839  width_left--;
2840  }
2841  valid = false;
2842  while (width_left-- && is && (ch = is.get ()) >= '0' && ch <= '9')
2843  {
2844  exp = exp*10 + ch - '0';
2845  valid = true;
2846  }
2847  width_left++;
2848  if (ch != std::istream::traits_type::eof () && width_left)
2849  is.putback (ch);
2850 
2851  double multiplier = pown (10, exp);
2852  if (exp_sign > 0)
2853  retval *= multiplier;
2854  else
2855  retval /= multiplier;
2856 
2857  used_exp = true;
2858  }
2859  }
2860  is.clear ();
2861  if (! used_exp && ch != std::istream::traits_type::eof () && width_left)
2862  is.putback (ch);
2863 
2864  // Check for +/- inf and NaN
2865  if (! valid && width_left >= 3)
2866  {
2867  int i = lookahead (is, inf_nan, 3, false); // false -> case insensitive
2868  if (i == 0)
2869  {
2871  valid = true;
2872  }
2873  else if (i == 1)
2874  {
2876  valid = true;
2877  }
2878  }
2879 
2880  if (! valid)
2881  is.setstate (std::ios::failbit);
2882  else
2883  is.setstate (is.rdstate () & ~std::ios::failbit);
2884 
2885  return retval * sign;
2886  }
2887 
2888  // Read a single number: real, complex, inf, NaN, possibly with limited
2889  // precision. Calls to this should be preceded by skip_whitespace.
2890  // Calling that inside scan_complex would violate its const declaration.
2891 
2892  void
2894  Complex& val) const
2895  {
2896  double im = 0;
2897  double re = 0;
2898  bool as_empty = false; // did we fail but match a "treat_as_empty" string?
2899  bool inf = false;
2900 
2901  int ch = is.peek ();
2902  if (ch == '+' || ch == '-') // check for [+-][ij] with no coefficients
2903  {
2904  ch = is.get ();
2905  int ch2 = is.peek ();
2906  if (ch2 == 'i' || ch2 == 'j')
2907  {
2908  double value = 1;
2909  is.get ();
2910  // Check not -inf
2911  if (is.peek () == 'n')
2912  {
2913  char *pos = is.tellg ();
2914  std::ios::iostate state = is.rdstate ();
2915 
2916  is.get ();
2917  ch2 = is.get ();
2918  if (ch2 == 'f')
2919  {
2920  inf = true;
2921  re = (ch == '+' ? numeric_limits<double>::Inf ()
2923  value = 0;
2924  }
2925  else
2926  {
2927  is.clear (state);
2928  is.seekg (pos); // reset to position before look-ahead
2929  }
2930  }
2931 
2932  im = (ch == '+') ? value : -value;
2933  }
2934  else
2935  is.putback (ch);
2936  }
2937 
2938  if (! im && ! inf) // if not [+-][ij] or [+-]inf, read real normally
2939  {
2940  char *pos = is.tellg ();
2941  std::ios::iostate state = is.rdstate ();
2942  //re = octave_read_value<double> (is);
2943  re = read_double (is, fmt);
2944 
2945  // check for "treat as empty" string
2946  if (treat_as_empty.numel ()
2947  && (is.fail () || math::is_NaN_or_NA (Complex (re))
2948  || re == numeric_limits<double>::Inf ()))
2949  {
2950 
2951  for (int i = 0; i < treat_as_empty.numel (); i++)
2952  {
2953  if (ch == treat_as_empty (i).string_value ()[0])
2954  {
2955  as_empty = true; // first char matches, so read the lot
2956  break;
2957  }
2958  }
2959  if (as_empty) // if first char matched...
2960  {
2961  as_empty = false; // ...look for the whole string
2962 
2963  is.clear (state); // treat_as_empty "-" causes partial read
2964  is.seekg (pos); // reset to position before failed read
2965 
2966  // treat_as_empty strings may be different sizes.
2967  // Read ahead longest, put it all back, then re-read the string
2968  // that matches.
2969  std::string look_buf (treat_as_empty_len, '\0');
2970  char *look = is.read (&look_buf[0], look_buf.size (), pos);
2971 
2972  is.clear (state);
2973  is.seekg (pos); // reset to position before look-ahead
2974  // FIXME: is.read could invalidate pos
2975 
2976  for (int i = 0; i < treat_as_empty.numel (); i++)
2977  {
2978  std::string s = treat_as_empty (i).string_value ();
2979  if (! strncmp (s.c_str (), look, s.size ()))
2980  {
2981  as_empty = true;
2982  // read just the right amount
2983  is.read (&look_buf[0], s.size (), pos);
2984  break;
2985  }
2986  }
2987  }
2988  }
2989 
2990  if (! is.eof () && ! as_empty)
2991  {
2992  state = is.rdstate (); // before tellg, since that fails at EOF
2993  pos = is.tellg ();
2994  ch = is.peek (); // ch == EOF if read failed; no need to chk fail
2995  if (ch == 'i' || ch == 'j') // pure imaginary
2996  {
2997  is.get ();
2998  im = re;
2999  re = 0;
3000  }
3001  else if (ch == '+' || ch == '-') // see if it is real+imag[ij]
3002  {
3003  // save stream state in case we have to restore it
3004  pos = is.tellg ();
3005  state = is.rdstate ();
3006 
3007  //im = octave_read_value<double> (is);
3008  im = read_double (is, fmt);
3009  if (is.fail ())
3010  im = 1;
3011 
3012  if (is.peek () == 'i' || is.peek () == 'j')
3013  is.get ();
3014  else
3015  {
3016  im = 0; // no valid imaginary part. Restore state
3017  is.clear (state); // eof shouldn't cause fail.
3018  is.seekg (pos);
3019  }
3020  }
3021  else if (is.eof ()) // we've read enough to be a "valid" read
3022  is.clear (state); // failed peek shouldn't cause fail
3023  }
3024  }
3025  if (as_empty)
3027  else
3028  val = Complex (re, im);
3029  }
3030 
3031  // Return in VAL the run of characters from IS NOT contained in PATTERN.
3032 
3033  int
3035  std::string& val) const
3036  {
3037  int c1 = std::istream::traits_type::eof ();
3038  std::ostringstream obuf; // Is this optimized for growing?
3039 
3040  while (is && ((c1 = (is && ! is.eof ())
3041  ? is.get_undelim ()
3042  : std::istream::traits_type::eof ())
3043  != std::istream::traits_type::eof ())
3044  && pattern.find (c1) == std::string::npos)
3045  obuf << static_cast<char> (c1);
3046 
3047  val = obuf.str ();
3048 
3049  if (c1 != std::istream::traits_type::eof ())
3050  is.putback (c1);
3051 
3052  return c1;
3053  }
3054 
3055  // Read until one of the strings in DELIMITERS is found. For
3056  // efficiency, ENDS is a list of the last character of each delimiter.
3057 
3058  std::string
3060  const std::string& ends) const
3061  {
3062  std::string retval ("");
3063  bool done = false;
3064  do
3065  {
3066  // find sequence ending with an ending char
3067  std::string next;
3068  scan_caret (is, ends.c_str (), next);
3069  retval = retval + next; // FIXME: could use repeated doubling of size
3070 
3071  int last = (! is.eof ()
3072  ? is.get_undelim () : std::istream::traits_type::eof ());
3073 
3074  if (last != std::istream::traits_type::eof ())
3075  {
3076  if (last == eol1 || last == eol2)
3077  break;
3078 
3079  retval = retval + static_cast<char> (last);
3080  for (int i = 0; i < delimiters.numel (); i++)
3081  {
3082  std::string delim = delimiters(i).string_value ();
3083  size_t start = (retval.length () > delim.length ()
3084  ? retval.length () - delim.length ()
3085  : 0);
3086  std::string may_match = retval.substr (start);
3087  if (may_match == delim)
3088  {
3089  done = true;
3090  retval = retval.substr (0, start);
3091  if (start == 0)
3092  is.putback (last);
3093  break;
3094  }
3095  }
3096  }
3097  }
3098  while (! done && is && ! is.eof ());
3099 
3100  return retval;
3101  }
3102 
3103  // Read stream until either fmt.width chars have been read, or
3104  // options.delimiter has been found. Does *not* rely on fmt being 's'.
3105  // Used by formats like %6f to limit to 6.
3106 
3107  void
3109  std::string& val) const
3110  {
3111  if (delim_list.isempty ())
3112  {
3113  unsigned int i = 0;
3114  unsigned int width = fmt.width;
3115 
3116  for (i = 0; i < width; i++)
3117  {
3118  // Grow string in an exponential fashion if necessary.
3119  if (i >= val.length ())
3120  val.append (std::max (val.length (),
3121  static_cast<size_t> (16)), '\0');
3122 
3123  int ch = is.get ();
3124  if (is_delim (ch) || ch == std::istream::traits_type::eof ())
3125  {
3126  is.putback (ch);
3127  break;
3128  }
3129  else
3130  val[i] = ch;
3131  }
3132  val = val.substr (0, i); // trim pre-allocation
3133  }
3134  else // Cell array of multi-character delimiters
3135  {
3136  std::string ends (delim_list.numel () + 2, '\0');
3137  int i;
3138  for (i = 0; i < delim_list.numel (); i++)
3139  {
3140  std::string tmp = delim_list(i).string_value ();
3141  ends[i] = tmp.back ();
3142  }
3143  ends[i++] = eol1;
3144  ends[i++] = eol2;
3146  }
3147  }
3148 
3149  // Return in VAL the run of characters from IS contained in PATTERN.
3150 
3151  int
3153  std::string& val) const
3154  {
3155  int c1 = std::istream::traits_type::eof ();
3156  std::ostringstream obuf; // Is this optimized for growing?
3157 
3158  while (is && pattern.find (c1 = is.get_undelim ()) != std::string::npos)
3159  obuf << static_cast<char> (c1);
3160 
3161  val = obuf.str ();
3162  if (c1 != std::istream::traits_type::eof ())
3163  is.putback (c1);
3164  return c1;
3165  }
3166 
3167  // Return in VAL a string, either delimited by whitespace/delimiters, or
3168  // enclosed in a pair of double quotes ("..."). Enclosing quotes are
3169  // removed. A consecutive pair "" is inserted into VAL as a single ".
3170 
3171  void
3173  std::string& val)
3174  {
3175  skip_whitespace (is);
3176 
3177  if (is.peek () != '"')
3178  scan_string (is, fmt, val);
3179  else
3180  {
3181  is.get ();
3182  scan_caret (is, R"(")", val); // read everything until "
3183  is.get (); // swallow "
3184 
3185  while (is && is.peek () == '"') // if double ", insert one in stream,
3186  { // and keep looking for single "
3187  is.get ();
3188  std::string val1;
3189  scan_caret (is, R"(")", val1);
3190  val = val + '"' + val1;
3191  is.get_undelim ();
3192  }
3193  }
3194  }
3195 
3196  // Read from IS into VAL a string of the next fmt.width characters,
3197  // including any whitespace or delimiters.
3198 
3199  void
3201  std::string& val) const
3202  {
3203  val.resize (fmt.width);
3204 
3205  for (unsigned int i = 0; is && i < fmt.width; i++)
3206  {
3207  int ch = is.get_undelim ();
3208  if (ch != std::istream::traits_type::eof ())
3209  val[i] = ch;
3210  else
3211  {
3212  val.resize (i);
3213  break;
3214  }
3215  }
3216  }
3217 
3218  // Read a single '%...' conversion and place it in position ROW of OV.
3219 
3220  void
3223  {
3224  skip_whitespace (is);
3225 
3226  is.clear ();
3227 
3228  octave_value val;
3229  if (fmt.numeric)
3230  {
3231  if (fmt.type == 'f' || fmt.type == 'n')
3232  {
3233  Complex v;
3234  skip_whitespace (is);
3235  scan_complex (is, fmt, v);
3236 
3237  if (! fmt.discard && ! is.fail ())
3238  {
3239  if (fmt.bitwidth == 64)
3240  {
3241  if (ov.isreal () && v.imag () == 0)
3242  ov.internal_rep ()->fast_elem_insert (row(0), v.real ());
3243  else
3244  {
3245  if (ov.isreal ()) // cat does type conversion
3246  ov = do_cat_op (ov, octave_value (v), row);
3247  else
3248  ov.internal_rep ()->fast_elem_insert (row(0), v);
3249  }
3250  }
3251  else
3252  {
3253  if (ov.isreal () && v.imag () == 0)
3254  ov.internal_rep ()->fast_elem_insert (row(0),
3255  float (v.real ()));
3256  else
3257  {
3258  if (ov.isreal ()) // cat does type conversion
3259  ov = do_cat_op (ov, octave_value (v), row);
3260  else
3261  ov.internal_rep ()->fast_elem_insert (row(0),
3262  FloatComplex (v));
3263  }
3264  }
3265  }
3266  }
3267  else
3268  {
3269  double v; // Matlab docs say 1e30 etc should be valid for %d and
3270  // 1000 as a %d8 should be 127, so read as double.
3271  // Some loss of precision for d64 and u64.
3272  skip_whitespace (is);
3273  v = read_double (is, fmt);
3274  if (! fmt.discard && ! is.fail ())
3275  switch (fmt.bitwidth)
3276  {
3277  case 64:
3278  switch (fmt.type)
3279  {
3280  case 'd':
3281  {
3282  octave_int64 vv = v;
3283  ov.internal_rep ()->fast_elem_insert (row(0), vv);
3284  }
3285  break;
3286 
3287  case 'u':
3288  {
3289  octave_uint64 vv = v;
3290  ov.internal_rep ()->fast_elem_insert (row(0), vv);
3291  }
3292  break;
3293  }
3294  break;
3295 
3296  case 32:
3297  switch (fmt.type)
3298  {
3299  case 'd':
3300  {
3301  octave_int32 vv = v;
3302  ov.internal_rep ()->fast_elem_insert (row(0), vv);
3303  }
3304  break;
3305 
3306  case 'u':
3307  {
3308  octave_uint32 vv = v;
3309  ov.internal_rep ()->fast_elem_insert (row(0), vv);
3310  }
3311  break;
3312  }
3313  break;
3314 
3315  case 16:
3316  if (fmt.type == 'd')
3317  {
3318  octave_int16 vv = v;
3319  ov.internal_rep ()->fast_elem_insert (row(0), vv);
3320  }
3321  else
3322  {
3323  octave_uint16 vv = v;
3324  ov.internal_rep ()->fast_elem_insert (row(0), vv);
3325  }
3326  break;
3327 
3328  case 8:
3329  if (fmt.type == 'd')
3330  {
3331  octave_int8 vv = v;
3332  ov.internal_rep ()->fast_elem_insert (row(0), vv);
3333  }
3334  else
3335  {
3336  octave_uint8 vv = v;
3337  ov.internal_rep ()->fast_elem_insert (row(0), vv);
3338  }
3339  break;
3340  }
3341  }
3342 
3343  if (is.fail () & ! fmt.discard)
3344  ov = do_cat_op (ov, empty_value, row);
3345  }
3346  else
3347  {
3348  std::string vv (" "); // initial buffer. Grows as needed
3349  switch (fmt.type)
3350  {
3351  case 's':
3352  scan_string (is, fmt, vv);
3353  break;
3354 
3355  case 'q':
3356  scan_qstring (is, fmt, vv);
3357  break;
3358 
3359  case 'c':
3360  scan_cstring (is, fmt, vv);
3361  break;
3362 
3363  case '[':
3364  scan_bracket (is, fmt.char_class.c_str (), vv);
3365  break;
3366 
3367  case '^':
3368  scan_caret (is, fmt.char_class.c_str (), vv);
3369  break;
3370  }
3371 
3372  if (! fmt.discard)
3373  ov.internal_rep ()->fast_elem_insert (row (0),
3374  Cell (octave_value (vv)));
3375 
3376  // FIXME: why does failbit get set at EOF, instead of eofbit?
3377  if (! vv.empty ())
3378  is.clear (is.rdstate () & ~std::ios_base::failbit);
3379  }
3380 
3381  is.field_done ();
3382  }
3383 
3384  // Read data corresponding to the entire format string once, placing the
3385  // values in row ROW of retval.
3386 
3387  int
3389  textscan_format_list& fmt_list,
3390  std::list<octave_value>& retval,
3391  Array<octave_idx_type> row, int& done_after)
3392  {
3393  const textscan_format_elt *elem = fmt_list.first ();
3394  std::list<octave_value>::iterator out = retval.begin ();
3395  bool no_conversions = true;
3396  bool done = false;
3397  bool conversion_failed = false; // Record for ReturnOnError
3398  bool nothing_worked = true;
3399 
3400  octave_quit ();
3401 
3402  for (size_t i = 0; i < fmt_list.numel (); i++)
3403  {
3404  bool this_conversion_failed = false;
3405 
3406  // Clear fail of previous numeric conversions.
3407  is.clear ();
3408 
3409  switch (elem->type)
3410  {
3411  case 'C':
3412  case 'D':
3413  warning ("%s: conversion %c not yet implemented",
3414  who.c_str (), elem->type);
3415  break;
3416 
3417  case 'u':
3418  case 'd':
3419  case 'f':
3420  case 'n':
3421  case 's':
3422  case '[':
3423  case '^':
3424  case 'q':
3425  case 'c':
3426  scan_one (is, *elem, *out, row);
3427  break;
3428 
3430  match_literal (is, *elem);
3431  break;
3432 
3433  default:
3434  error ("Unknown format element '%c'", elem->type);
3435  }
3436 
3437  if (! is.fail ())
3438  {
3439  if (! elem->discard)
3440  no_conversions = false;
3441  }
3442  else
3443  {
3444  is.clear (is.rdstate () & ~std::ios::failbit);
3445 
3446  if (! is.eof ())
3447  {
3448  if (delim_list.isempty ())
3449  {
3450  if (! is_delim (is.peek ()))
3451  this_conversion_failed = true;
3452  }
3453  else // Cell array of multi-character delimiters
3454  {
3455  char *pos = is.tellg ();
3456  if (-1 == lookahead (is, delim_list, delim_len))
3457  this_conversion_failed = true;
3458  is.clear ();
3459  is.seekg (pos); // reset to position before look-ahead
3460  }
3461  }
3462  }
3463 
3464  if (! elem->discard)
3465  out++;
3466 
3467  elem = fmt_list.next ();
3468  char *pos = is.tellg ();
3469 
3470  // FIXME: these conversions "ignore delimiters". Should they include
3471  // delimiters at the start of the conversion, or can those be skipped?
3473  // && elem->type != '[' && elem->type != '^' && elem->type != 'c'
3474  )
3475  skip_delim (is);
3476 
3477  if (is.eof ())
3478  {
3479  if (! done)
3480  done_after = i+1;
3481 
3482  // note EOF, but process others to get empty_val.
3483  done = true;
3484  }
3485 
3486  if (this_conversion_failed)
3487  {
3488  if (is.tellg () == pos && ! conversion_failed)
3489  {
3490  // done_after = first failure
3491  done_after = i; // note fail, but parse others to get empty_val
3492  conversion_failed = true;
3493  }
3494  else
3495  this_conversion_failed = false;
3496  }
3497  else if (! done && ! conversion_failed)
3498  nothing_worked = false;
3499  }
3500 
3501  if (done)
3502  is.setstate (std::ios::eofbit);
3503 
3504  return no_conversions
3505  + (is.eof () ? 2 : 0)
3506  + (conversion_failed ? 4 : 0)
3507  + (nothing_worked ? 8 : 0);
3508 
3509  }
3510 
3511  void
3513  textscan_format_list& fmt_list)
3514  {
3515  int last = args.length ();
3516  int n = last;
3517 
3518  if (n & 1)
3519  error ("%s: %d parameters given, but only %d values",
3520  who.c_str (), n-n/2, n/2);
3521 
3522  delim_len = 1;
3523  bool have_delims = false;
3524  for (int i = 0; i < last; i += 2)
3525  {
3526  std::string param = args(i).xstring_value ("%s: Invalid parameter type <%s> for parameter %d",
3527  who.c_str (),
3528  args(i).type_name ().c_str (),
3529  i/2 + 1);
3530  std::transform (param.begin (), param.end (), param.begin (), ::tolower);
3531 
3532  if (param == "delimiter")
3533  {
3534  bool invalid = true;
3535  if (args(i+1).is_string ())
3536  {
3537  invalid = false;
3538  have_delims = true;
3539  delims = args(i+1).string_value ();
3540  if (args(i+1).is_sq_string ())
3542  }
3543  else if (args(i+1).iscell ())
3544  {
3545  invalid = false;
3546  delim_list = args(i+1).cell_value ();
3547  delim_table = " "; // non-empty, to flag non-default delim
3548 
3549  // Check that all elements are strings, and find max length
3550  for (int j = 0; j < delim_list.numel (); j++)
3551  {
3552  if (! delim_list(j).is_string ())
3553  invalid = true;
3554  else
3555  {
3556  if (delim_list(j).is_sq_string ())
3558  .string_value ());
3559  octave_idx_type len = delim_list(j).string_value ()
3560  .length ();
3561  delim_len = std::max (static_cast<int> (len), delim_len);
3562  }
3563  }
3564  }
3565  if (invalid)
3566  error ("%s: Delimiters must be either a string or cell array of strings",
3567  who.c_str ());
3568  }
3569  else if (param == "commentstyle")
3570  {
3571  if (args(i+1).is_string ())
3572  {
3573  // check here for names like "C++", "C", "shell", ...?
3574  comment_style = Cell (args(i+1));
3575  }
3576  else if (args(i+1).iscell ())
3577  {
3578  comment_style = args(i+1).cell_value ();
3579  int len = comment_style.numel ();
3580  if ((len >= 1 && ! comment_style (0).is_string ())
3581  || (len >= 2 && ! comment_style (1).is_string ())
3582  || (len >= 3))
3583  error ("%s: CommentStyle must be either a string or cell array of one or two strings",
3584  who.c_str ());
3585  }
3586  else
3587  error ("%s: CommentStyle must be either a string or cell array of one or two strings",
3588  who.c_str ());
3589 
3590  // How far ahead do we need to look to detect an open comment
3591  // and which character do we look for?
3592  if (comment_style.numel () >= 1)
3593  {
3594  comment_len = comment_style (0).string_value ().size ();
3595  comment_char = comment_style (0).string_value ()[0];
3596  }
3597  }
3598  else if (param == "treatasempty")
3599  {
3600  bool invalid = false;
3601  if (args(i+1).is_string ())
3602  {
3603  treat_as_empty = Cell (args(i+1));
3604  treat_as_empty_len = args(i+1).string_value ().size ();
3605  }
3606  else if (args(i+1).iscell ())
3607  {
3608  treat_as_empty = args(i+1).cell_value ();
3609  for (int j = 0; j < treat_as_empty.numel (); j++)
3610  if (! treat_as_empty (j).is_string ())
3611  invalid = true;
3612  else
3613  {
3614  int k = treat_as_empty (j).string_value ().size ();
3615  if (k > treat_as_empty_len)
3617  }
3618  }
3619  if (invalid)
3620  error ("%s: TreatAsEmpty must be either a string or cell array of one or two strings",
3621  who.c_str ());
3622 
3623  // FIXME: Ensure none is a prefix of a later one. Sort by length?
3624  }
3625  else if (param == "collectoutput")
3626  {
3627  collect_output = args(i+1).xbool_value ("%s: CollectOutput must be logical or numeric", who.c_str ());
3628  }
3629  else if (param == "emptyvalue")
3630  {
3631  empty_value = args(i+1).xscalar_value ("%s: EmptyValue must be numeric", who.c_str ());
3632  }
3633  else if (param == "headerlines")
3634  {
3635  header_lines = args(i+1).xscalar_value ("%s: HeaderLines must be numeric", who.c_str ());
3636  }
3637  else if (param == "bufsize")
3638  {
3639  buffer_size = args(i+1).xscalar_value ("%s: BufSize must be numeric", who.c_str ());
3640  }
3641  else if (param == "multipledelimsasone")
3642  {
3643  multiple_delims_as_one = args(i+1).xbool_value ("%s: MultipleDelimsAsOne must be logical or numeric", who.c_str ());
3644  }
3645  else if (param == "returnonerror")
3646  {
3647  return_on_error = args(i+1).xbool_value ("%s: ReturnOnError must be logical or numeric", who.c_str ());
3648  }
3649  else if (param == "whitespace")
3650  {
3651  whitespace = args(i+1).xstring_value ("%s: Whitespace must be a character string", who.c_str ());
3652  }
3653  else if (param == "expchars")
3654  {
3655  exp_chars = args(i+1).xstring_value ("%s: ExpChars must be a character string", who.c_str ());
3656  default_exp = false;
3657  }
3658  else if (param == "endofline")
3659  {
3660  bool valid = true;
3661  std::string s = args(i+1).xstring_value (R"(%s: EndOfLine must be at most one character or '\r\n')", who.c_str ());
3662  if (args(i+1).is_sq_string ())
3663  s = do_string_escapes (s);
3664  int l = s.length ();
3665  if (l == 0)
3666  eol1 = eol2 = -2;
3667  else if (l == 1)
3668  eol1 = eol2 = s.c_str ()[0];
3669  else if (l == 2)
3670  {
3671  eol1 = s.c_str ()[0];
3672  eol2 = s.c_str ()[1];
3673  if (eol1 != '\r' || eol2 != '\n') // Why limit it?
3674  valid = false;
3675  }
3676  else
3677  valid = false;
3678 
3679  if (! valid)
3680  error (R"(%s: EndOfLine must be at most one character or '\r\n')",
3681  who.c_str ());
3682  }
3683  else
3684  error ("%s: unrecognized option '%s'", who.c_str (), param.c_str ());
3685  }
3686 
3687  // Remove any user-supplied delimiter from whitespace list
3688  for (unsigned int j = 0; j < delims.length (); j++)
3689  {
3690  whitespace.erase (std::remove (whitespace.begin (),
3691  whitespace.end (),
3692  delims[j]),
3693  whitespace.end ());
3694  }
3695  for (int j = 0; j < delim_list.numel (); j++)
3696  {
3697  std::string delim = delim_list(j).string_value ();
3698  if (delim.length () == 1)
3699  whitespace.erase (std::remove (whitespace.begin (),
3700  whitespace.end (),
3701  delim[0]),
3702  whitespace.end ());
3703  }
3704 
3705  whitespace_table = std::string (256, '\0');
3706  for (unsigned int i = 0; i < whitespace.length (); i++)
3707  whitespace_table[whitespace[i]] = '1';
3708 
3709  // For Matlab compatibility, add 0x20 to whitespace, unless
3710  // whitespace is explicitly ignored.
3711  if (! (whitespace.empty () && fmt_list.has_string))
3712  whitespace_table[' '] = '1';
3713 
3714  // Create look-up table of delimiters, based on 'delimiter'
3715  delim_table = std::string (256, '\0');
3716  if (eol1 >= 0 && eol1 < 256)
3717  delim_table[eol1] = '1'; // EOL is always a delimiter
3718  if (eol2 >= 0 && eol2 < 256)
3719  delim_table[eol2] = '1'; // EOL is always a delimiter
3720  if (! have_delims)
3721  for (unsigned int i = 0; i < 256; i++)
3722  {
3723  if (isspace (i))
3724  delim_table[i] = '1';
3725  }
3726  else
3727  for (unsigned int i = 0; i < delims.length (); i++)
3728  delim_table[delims[i]] = '1';
3729  }
3730 
3731  // Skip comments, and characters specified by the "Whitespace" option.
3732  // If EOLstop == true, don't skip end of line.
3733 
3734  int
3736  {
3737  int c1 = std::istream::traits_type::eof ();
3738  bool found_comment = false;
3739 
3740  do
3741  {
3742  found_comment = false;
3743  int prev = -1;
3744  while (is && (c1 = is.get_undelim ()) != std::istream::traits_type::eof ()
3745  && ( ( (c1 == eol1 || c1 == eol2) && ++lines && ! EOLstop)
3746  || isspace (c1)))
3747  {
3748  if (prev == eol1 && eol1 != eol2 && c1 == eol2)
3749  lines--;
3750  prev = c1;
3751  }
3752 
3753  if (c1 == comment_char) // see if we match an open comment
3754  {
3755  // save stream state in case we have to restore it
3756  char *pos = is.tellg ();
3757  std::ios::iostate state = is.rdstate ();
3758 
3759  std::string tmp (comment_len, '\0');
3760  char *look = is.read (&tmp[0], comment_len-1, pos); // already read first char
3761  if (is && comment_style.numel () > 0 &&
3762  ! strncmp (comment_style(0).string_value ().substr (1).c_str (),
3763  look, comment_len-1))
3764  {
3765  found_comment = true;
3766 
3767  std::string dummy;
3768  if (comment_style.numel () == 1) // skip to end of line
3769  {
3770  std::string eol (3, '\0');
3771  eol[0] = eol1;
3772  eol[1] = eol2;
3773 
3774  scan_caret (is, eol, dummy);
3775  c1 = is.get_undelim ();
3776  if (c1 == eol1 && eol1 != eol2 && is.peek_undelim () == eol2)
3777  is.get_undelim ();
3778  lines++;
3779  }
3780  else // matching pair
3781  {
3782  std::string end_c = comment_style(1).string_value ();
3783  // last char of end-comment sequence
3784  std::string last = end_c.substr (end_c.size () - 1);
3785  std::string may_match ("");
3786  do
3787  {
3788  // find sequence ending with last char
3789  scan_caret (is, last, dummy);
3790  is.get_undelim (); // (read LAST itself)
3791 
3792  may_match = may_match + dummy + last;
3793  if (may_match.length () > end_c.length ())
3794  {
3795  size_t start = may_match.length () - end_c.length ();
3796  may_match = may_match.substr (start);
3797  }
3798  }
3799  while (may_match != end_c && is && ! is.eof ());
3800  }
3801  }
3802  else // wasn't really a comment; restore state
3803  {
3804  is.clear (state);
3805  is.seekg (pos);
3806  }
3807  }
3808  }
3809  while (found_comment);
3810 
3811  if (c1 != std::istream::traits_type::eof ())
3812  is.putback (c1);
3813  return c1;
3814  }
3815 
3816  // See if the next few characters match one of the strings in target.
3817  // For efficiency, MAX_LEN is the cached longest length of any target.
3818  // Return -1 if none is found, or the index of the match.
3819 
3820  int
3821  textscan::lookahead (delimited_stream& is, const Cell& targets, int max_len,
3822  bool case_sensitive) const
3823  {
3824  // target strings may be different sizes.
3825  // Read ahead longest, put it all back, then re-read the string
3826  // that matches.
3827 
3828  char *pos = is.tellg ();
3829 
3830  std::string tmp (max_len, '\0');
3831  char *look = is.read (&tmp[0], tmp.size (), pos);
3832 
3833  is.clear ();
3834  is.seekg (pos); // reset to position before read
3835  // FIXME: pos may be corrupted by is.read
3836 
3837  int i;
3838  int (*compare)(const char *, const char *, size_t);
3839  compare = (case_sensitive ? strncmp : strncasecmp);
3840 
3841  for (i = 0; i < targets.numel (); i++)
3842  {
3843  std::string s = targets (i).string_value ();
3844  if (! (*compare) (s.c_str (), look, s.size ()))
3845  {
3846  is.read (&tmp[0], s.size (), pos); // read just the right amount
3847  break;
3848  }
3849  }
3850 
3851  if (i == targets.numel ())
3852  i = -1;
3853 
3854  return i;
3855  }
3856 
3857  // Skip delimiters -- multiple if MultipleDelimsAsOne specified.
3858  int
3860  {
3861  int c1 = skip_whitespace (is, true); // 'true': stop once EOL is read
3862  if (delim_list.numel () == 0) // single character delimiter
3863  {
3864  if (is_delim (c1) || c1 == eol1 || c1 == eol2)
3865  {
3866  is.get ();
3867  if (c1 == eol1 && is.peek_undelim () == eol2)
3868  is.get_undelim (); // if \r\n, skip the \n too.
3869 
3871  {
3872  int prev = -1;
3873  // skip multiple delims.
3874  // Increment lines for each end-of-line seen; for \r\n, decrement
3875  while (is && ((c1 = is.get_undelim ())
3876  != std::istream::traits_type::eof ())
3877  && (((c1 == eol1 || c1 == eol2) && ++lines)
3878  || isspace (c1) || is_delim (c1)))
3879  {
3880  if (prev == eol1 && eol1 != eol2 && c1 == eol2)
3881  lines--;
3882  prev = c1;
3883  }
3884  if (c1 != std::istream::traits_type::eof ())
3885  is.putback (c1);
3886  }
3887  }
3888  }
3889  else // multi-character delimiter
3890  {
3891  int first_match;
3892 
3893  if (c1 == eol1 || c1 == eol2
3894  || (-1 != (first_match = lookahead (is, delim_list, delim_len))))
3895  {
3896  if (c1 == eol1)
3897  {
3898  is.get_undelim ();
3899  if (is.peek_undelim () == eol2)
3900  is.get_undelim ();
3901  }
3902  else if (c1 == eol2)
3903  {
3904  is.get_undelim ();
3905  }
3906 
3908  {
3909  int prev = -1;
3910  // skip multiple delims.
3911  // Increment lines for each end-of-line seen; for \r\n, decrement
3912  while (is && ((c1 = skip_whitespace (is, true))
3913  != std::istream::traits_type::eof ())
3914  && (((c1 == eol1 || c1 == eol2) && ++lines)
3915  || -1 != lookahead (is, delim_list, delim_len)))
3916  {
3917  if (prev == eol1 && eol1 != eol2 && c1 == eol2)
3918  lines--;
3919  prev = c1;
3920  }
3921  }
3922  }
3923  }
3924 
3925  return c1;
3926  }
3927 
3928  // Read in as much of the input as coincides with the literal in the
3929  // format string. Return "true" if the entire literal is matched, else
3930  // false (and set failbit).
3931 
3932  bool
3934  {
3935  // "false" -> treat EOL as normal space
3936  // since a delimiter at the start of a line is a mismatch, not empty field
3937  skip_whitespace (is, false);
3938 
3939  for (unsigned int i = 0; i < fmt.width; i++)
3940  {
3941  int ch = is.get_undelim ();
3942  if (ch != fmt.text[i])
3943  {
3944  if (ch != std::istream::traits_type::eof ())
3945  is.putback (ch);
3946  is.setstate (std::ios::failbit);
3947  return false;
3948  }
3949  }
3950  return true;
3951  }
3952 
3953  void
3955  {
3956  fail = true;
3957  errmsg = msg;
3958  }
3959 
3960  void
3961  base_stream::error (const std::string& who, const std::string& msg)
3962  {
3963  fail = true;
3964  errmsg = who + ": " + msg;
3965  }
3966 
3967  void
3969  {
3970  fail = false;
3971  errmsg = "";
3972  }
3973 
3974  void
3976  {
3977  std::istream *is = input_stream ();
3978  std::ostream *os = output_stream ();
3979 
3980  if (is)
3981  is->clear ();
3982 
3983  if (os)
3984  os->clear ();
3985  }
3986 
3987  // Functions that are defined for all input streams (input streams
3988  // are those that define is).
3989 
3990  std::string
3992  bool strip_newline, const std::string& who)
3993  {
3994  if (application::interactive () && file_number () == 0)
3995  ::error ("%s: unable to read from stdin while running interactively",
3996  who.c_str ());
3997 
3999 
4000  err = false;
4001 
4002  std::istream *isp = input_stream ();
4003 
4004  if (! isp)
4005  {
4006  err = true;
4007  invalid_operation (who, "reading");
4008  }
4009  else
4010  {
4011  std::istream& is = *isp;
4012 
4013  std::ostringstream buf;
4014 
4015  int c = 0;
4016  int char_count = 0;
4017 
4018  if (max_len != 0)
4019  {
4020  while (is && (c = is.get ()) != std::istream::traits_type::eof ())
4021  {
4022  char_count++;
4023 
4024  // Handle CRLF, CR, or LF as line ending.
4025  if (c == '\r')
4026  {
4027  if (! strip_newline)
4028  buf << static_cast<char> (c);
4029 
4030  c = is.get ();
4031 
4032  if (c != std::istream::traits_type::eof ())
4033  {
4034  if (c == '\n')
4035  {
4036  char_count++;
4037 
4038  if (! strip_newline)
4039  buf << static_cast<char> (c);
4040  }
4041  else
4042  is.putback (c);
4043  }
4044 
4045  break;
4046  }
4047  else if (c == '\n')
4048  {
4049  if (! strip_newline)
4050  buf << static_cast<char> (c);
4051 
4052  break;
4053  }
4054  else
4055  buf << static_cast<char> (c);
4056 
4057  if (max_len > 0 && char_count == max_len)
4058  break;
4059  }
4060  }
4061 
4062  if (! is.eof () && char_count > 0)
4063  {
4064  // GAGME. Matlab seems to check for EOF even if the last character
4065  // in a file is a newline character. This is NOT what the
4066  // corresponding C-library functions do.
4067  int disgusting_compatibility_hack = is.get ();
4068  if (! is.eof ())
4069  is.putback (disgusting_compatibility_hack);
4070  }
4071 
4072  if (is.good () || (is.eof () && char_count > 0))
4073  retval = buf.str ();
4074  else
4075  {
4076  err = true;
4077 
4078  if (is.eof () && char_count == 0)
4079  error (who, "at end of file");
4080  else
4081  error (who, "read error");
4082  }
4083  }
4084 
4085  return retval;
4086  }
4087 
4088  std::string
4090  const std::string& who)
4091  {
4092  return do_gets (max_len, err, true, who);
4093  }
4094 
4095  std::string
4097  const std::string& who)
4098  {
4099  return do_gets (max_len, err, false, who);
4100  }
4101 
4102  off_t
4103  base_stream::skipl (off_t num, bool& err, const std::string& who)
4104  {
4105  if (application::interactive () && file_number () == 0)
4106  ::error ("%s: unable to read from stdin while running interactively",
4107  who.c_str ());
4108 
4109  off_t cnt = -1;
4110 
4111  err = false;
4112 
4113  std::istream *isp = input_stream ();
4114 
4115  if (! isp)
4116  {
4117  err = true;
4118  invalid_operation (who, "reading");
4119  }
4120  else
4121  {
4122  std::istream& is = *isp;
4123 
4124  int c = 0;
4125  int lastc = -1;
4126  cnt = 0;
4127 
4128  while (is && (c = is.get ()) != std::istream::traits_type::eof ())
4129  {
4130  // Handle CRLF, CR, or LF as line ending.
4131  if (c == '\r' || (c == '\n' && lastc != '\r'))
4132  {
4133  if (++cnt == num)
4134  break;
4135  }
4136 
4137  lastc = c;
4138  }
4139 
4140  // Maybe eat the following \n if \r was just met.
4141  if (c == '\r' && is.peek () == '\n')
4142  is.get ();
4143 
4144  if (is.bad ())
4145  {
4146  err = true;
4147  error (who, "read error");
4148  }
4149 
4150  if (err)
4151  cnt = -1;
4152  }
4153 
4154  return cnt;
4155  }
4156 
4157  template <typename T>
4158  std::istream&
4159  octave_scan_1 (std::istream& is, const scanf_format_elt& fmt,
4160  T *valptr)
4161  {
4162  T value = T ();
4163 
4164  switch (fmt.type)
4165  {
4166  case 'o':
4167  is >> std::oct >> value >> std::dec;
4168  break;
4169 
4170  case 'x':
4171  is >> std::hex >> value >> std::dec;
4172  break;
4173 
4174  case 'i':
4175  {
4176  int c1 = std::istream::traits_type::eof ();
4177 
4178  while (is && (c1 = is.get ()) != std::istream::traits_type::eof ()
4179  && isspace (c1))
4180  ; // skip whitespace
4181 
4182  if (c1 != std::istream::traits_type::eof ())
4183  {
4184  if (c1 == '0')
4185  {
4186  int c2 = is.peek ();
4187 
4188  if (c2 == 'x' || c2 == 'X')
4189  {
4190  is.ignore ();
4191  if (std::isxdigit (is.peek ()))
4192  is >> std::hex >> value >> std::dec;
4193  else
4194  value = 0;
4195  }
4196  else
4197  {
4198  if (c2 == '0' || c2 == '1' || c2 == '2'
4199  || c2 == '3' || c2 == '4' || c2 == '5'
4200  || c2 == '6' || c2 == '7')
4201  is >> std::oct >> value >> std::dec;
4202  else if (c2 == '8' || c2 == '9')
4203  {
4204  // FIXME: Would like to set error state on octave
4205  // stream. See bug #46493. But only std::istream is
4206  // input to fcn.
4207  // error ("internal failure to match octal format");
4208  value = 0;
4209  }
4210  else
4211  value = 0;
4212  }
4213  }
4214  else
4215  {
4216  is.putback (c1);
4217 
4218  is >> value;
4219  }
4220  }
4221  }
4222  break;
4223 
4224  default:
4225  is >> value;
4226  break;
4227  }
4228 
4229  // If conversion produces an integer that overflows, failbit is set but
4230  // value is non-zero. We want to treat this case as success, so clear
4231  // failbit from the stream state to keep going.
4232  // FIXME: Maybe set error state on octave stream as above? Matlab does
4233  // *not* indicate an error message on overflow.
4234  if ((is.rdstate () & std::ios::failbit) && value != T ())
4235  is.clear (is.rdstate () & ~std::ios::failbit);
4236 
4237  // Only copy the converted value if the stream is in a state where we
4238  // want to continue reading.
4239  if (! (is.rdstate () & std::ios::failbit))
4240  *valptr = value;
4241 
4242  return is;
4243  }
4244 
4245  template <typename T>
4246  std::istream&
4247  octave_scan (std::istream& is, const scanf_format_elt& fmt, T *valptr)
4248  {
4249  if (fmt.width)
4250  {
4251  // Limit input to fmt.width characters by reading into a
4252  // temporary stringstream buffer.
4253  std::string tmp;
4254 
4255  is.width (fmt.width);
4256  is >> tmp;
4257 
4258  std::istringstream ss (tmp);
4259 
4260  octave_scan_1 (ss, fmt, valptr);
4261  }
4262  else
4263  octave_scan_1 (is, fmt, valptr);
4264 
4265  return is;
4266  }
4267 
4268  // Note that this specialization is only used for reading characters, not
4269  // character strings. See BEGIN_S_CONVERSION for details.
4270 
4271  template <>
4272  std::istream&
4273  octave_scan<> (std::istream& is, const scanf_format_elt& /* fmt */,
4274  char *valptr)
4275  {
4276  return is >> valptr;
4277  }
4278 
4279  template <>
4280  std::istream&
4281  octave_scan<> (std::istream& is, const scanf_format_elt& fmt, double *valptr)
4282  {
4283  double& ref = *valptr;
4284 
4285  switch (fmt.type)
4286  {
4287  case 'e':
4288  case 'f':
4289  case 'g':
4290  {
4291  int c1 = std::istream::traits_type::eof ();
4292 
4293  while (is && (c1 = is.get ()) != std::istream::traits_type::eof ()
4294  && isspace (c1))
4295  ; // skip whitespace
4296 
4297  if (c1 != std::istream::traits_type::eof ())
4298  {
4299  is.putback (c1);
4300 
4301  ref = octave_read_value<double> (is);
4302  }
4303  }
4304  break;
4305 
4306  default:
4307  panic_impossible ();
4308  break;
4309  }
4310 
4311  return is;
4312  }
4313 
4314  template <typename T>
4315  void
4316  do_scanf_conv (std::istream& is, const scanf_format_elt& fmt,
4317  T valptr, Matrix& mval, double *data, octave_idx_type& idx,
4318  octave_idx_type& conversion_count, octave_idx_type nr,
4319  octave_idx_type max_size, bool discard)
4320  {
4321  octave_scan (is, fmt, valptr);
4322 
4323  if (! is)
4324  return;
4325 
4326  if (idx == max_size && ! discard)
4327  {
4328  max_size *= 2;
4329 
4330  if (nr > 0)
4331  mval.resize (nr, max_size / nr, 0.0);
4332  else
4333  mval.resize (max_size, 1, 0.0);
4334 
4335  data = mval.fortran_vec ();
4336  }
4337 
4338  if (! discard)
4339  {
4340  conversion_count++;
4341  data[idx++] = *(valptr);
4342  }
4343  }
4344 
4345  template void
4346  do_scanf_conv (std::istream&, const scanf_format_elt&, double*,
4347  Matrix&, double*, octave_idx_type&, octave_idx_type&,
4349 
4350 #define DO_WHITESPACE_CONVERSION() \
4351  do \
4352  { \
4353  int c = std::istream::traits_type::eof (); \
4354  \
4355  while (is && (c = is.get ()) != std::istream::traits_type::eof () \
4356  && isspace (c)) \
4357  { /* skip whitespace */ } \
4358  \
4359  if (c != std::istream::traits_type::eof ()) \
4360  is.putback (c); \
4361  } \
4362  while (0)
4363 
4364 #define DO_LITERAL_CONVERSION() \
4365  do \
4366  { \
4367  int c = std::istream::traits_type::eof (); \
4368  \
4369  int n = fmt.length (); \
4370  int i = 0; \
4371  \
4372  while (i < n && is && (c = is.get ()) != std::istream::traits_type::eof ()) \
4373  { \
4374  if (c == static_cast<unsigned char> (fmt[i])) \
4375  { \
4376  i++; \
4377  continue; \
4378  } \
4379  else \
4380  { \
4381  is.putback (c); \
4382  break; \
4383  } \
4384  } \
4385  \
4386  if (i != n) \
4387  is.setstate (std::ios::failbit); \
4388  } \
4389  while (0)
4390 
4391 #define DO_PCT_CONVERSION() \
4392  do \
4393  { \
4394  int c = is.get (); \
4395  \
4396  if (c != std::istream::traits_type::eof ()) \
4397  { \
4398  if (c != '%') \
4399  { \
4400  is.putback (c); \
4401  is.setstate (std::ios::failbit); \
4402  } \
4403  } \
4404  else \
4405  is.setstate (std::ios::failbit); \
4406  } \
4407  while (0)
4408 
4409 #define BEGIN_C_CONVERSION() \
4410  is.unsetf (std::ios::skipws); \
4411  \
4412  int width = (elt->width ? elt->width : 1); \
4413  \
4414  std::string tmp (width, '\0'); \
4415  \
4416  int c = std::istream::traits_type::eof (); \
4417  int n = 0; \
4418  \
4419  while (is && n < width \
4420  && (c = is.get ()) != std::istream::traits_type::eof ()) \
4421  tmp[n++] = static_cast<char> (c); \
4422  \
4423  if (n > 0 && c == std::istream::traits_type::eof ()) \
4424  is.clear (); \
4425  \
4426  tmp.resize (n)
4427 
4428  // For a '%s' format, skip initial whitespace and then read until the
4429  // next whitespace character or until WIDTH characters have been read.
4430 #define BEGIN_S_CONVERSION() \
4431  int width = elt->width; \
4432  \
4433  std::string tmp; \
4434  \
4435  do \
4436  { \
4437  if (width) \
4438  { \
4439  tmp = std::string (width, '\0'); \
4440  \
4441  int c = std::istream::traits_type::eof (); \
4442  \
4443  int n = 0; \
4444  \
4445  while (is && (c = is.get ()) != std::istream::traits_type::eof ()) \
4446  { \
4447  if (! isspace (c)) \
4448  { \
4449  tmp[n++] = static_cast<char> (c); \
4450  break; \
4451  } \
4452  } \
4453  \
4454  while (is && n < width \
4455  && (c = is.get ()) != std::istream::traits_type::eof ()) \
4456  { \
4457  if (isspace (c)) \
4458  { \
4459  is.putback (c); \
4460  break; \
4461  } \
4462  else \
4463  tmp[n++] = static_cast<char> (c); \
4464  } \
4465  \
4466  if (n > 0 && c == std::istream::traits_type::eof ()) \
4467  is.clear (); \
4468  \
4469  tmp.resize (n); \
4470  } \
4471  else \
4472  { \
4473  is >> std::ws >> tmp; \
4474  } \
4475  } \
4476  while (0)
4477 
4478  // This format must match a nonempty sequence of characters.
4479 #define BEGIN_CHAR_CLASS_CONVERSION() \
4480  int width = elt->width; \
4481  \
4482  std::string tmp; \
4483  \
4484  do \
4485  { \
4486  if (! width) \
4487  width = std::numeric_limits<int>::max (); \
4488  \
4489  std::ostringstream buf; \
4490  \
4491  std::string char_class = elt->char_class; \
4492  \
4493  int c = std::istream::traits_type::eof (); \
4494  \
4495  if (elt->type == '[') \
4496  { \
4497  int chars_read = 0; \
4498  while (is && chars_read++ < width \
4499  && (c = is.get ()) != std::istream::traits_type::eof () \
4500  && char_class.find (c) != std::string::npos) \
4501  buf << static_cast<char> (c); \
4502  } \
4503  else \
4504  { \
4505  int chars_read = 0; \
4506  while (is && chars_read++ < width \
4507  && (c = is.get ()) != std::istream::traits_type::eof () \
4508  && char_class.find (c) == std::string::npos) \
4509  buf << static_cast<char> (c); \
4510  } \
4511  \
4512  if (width == std::numeric_limits<int>::max () \
4513  && c != std::istream::traits_type::eof ()) \
4514  is.putback (c); \
4515  \
4516  tmp = buf.str (); \
4517  \
4518  if (tmp.empty ()) \
4519  is.setstate (std::ios::failbit); \
4520  else if (c == std::istream::traits_type::eof ()) \
4521  is.clear (); \
4522  \
4523  } \
4524  while (0)
4525 
4526 #define FINISH_CHARACTER_CONVERSION() \
4527  do \
4528  { \
4529  width = tmp.length (); \
4530  \
4531  if (is) \
4532  { \
4533  int i = 0; \
4534  \
4535  if (! discard) \
4536  { \
4537  conversion_count++; \
4538  \
4539  while (i < width) \
4540  { \
4541  if (data_index == max_size) \
4542  { \
4543  max_size *= 2; \
4544  \
4545  if (all_char_conv) \
4546  { \
4547  if (one_elt_size_spec) \
4548  mval.resize (1, max_size, 0.0); \
4549  else if (nr > 0) \
4550  mval.resize (nr, max_size / nr, 0.0); \
4551  else \
4552  panic_impossible (); \
4553  } \
4554  else if (nr > 0) \
4555  mval.resize (nr, max_size / nr, 0.0); \
4556  else \
4557  mval.resize (max_size, 1, 0.0); \
4558  \
4559  data = mval.fortran_vec (); \
4560  } \
4561  \
4562  data[data_index++] = static_cast<unsigned char> \
4563  (tmp[i++]); \
4564  } \
4565  } \
4566  } \
4567  } \
4568  while (0)
4569 
4570  octave_value
4573  bool one_elt_size_spec,
4574  octave_idx_type& conversion_count,
4575  const std::string& who)
4576  {
4577  if (octave::application::interactive () && file_number () == 0)
4578  ::error ("%s: unable to read from stdin while running interactively",
4579  who.c_str ());
4580 
4581  octave_value retval = Matrix ();
4582 
4583  conversion_count = 0;
4584 
4585  octave_idx_type nconv = fmt_list.num_conversions ();
4586 
4587  octave_idx_type data_index = 0;
4588 
4589  if (nr == 0 || nc == 0)
4590  {
4591  if (one_elt_size_spec)
4592  nc = 0;
4593 
4594  return Matrix (nr, nc, 0.0);
4595  }
4596 
4597  std::istream *isp = input_stream ();
4598 
4599  bool all_char_conv = fmt_list.all_character_conversions ();
4600 
4601  Matrix mval;
4602  double *data = nullptr;
4603  octave_idx_type max_size = 0;
4604  octave_idx_type max_conv = 0;
4605 
4606  octave_idx_type final_nr = 0;
4607  octave_idx_type final_nc = 0;
4608 
4609  if (all_char_conv)
4610  {
4611  // Any of these could be resized later (if we have %s conversions,
4612  // we may read more than one element for each conversion).
4613  if (one_elt_size_spec)
4614  {
4615  max_size = 512;
4616  mval.resize (1, max_size, 0.0);
4617 
4618  if (nr > 0)
4619  max_conv = nr;
4620  }
4621  else if (nr > 0)
4622  {
4623  if (nc > 0)
4624  {
4625  mval.resize (nr, nc, 0.0);
4626  max_size = max_conv = nr * nc;
4627  }
4628  else
4629  {
4630  mval.resize (nr, 32, 0.0);
4631  max_size = nr * 32;
4632  }
4633  }
4634  else
4635  panic_impossible ();
4636  }
4637  else if (nr > 0)
4638  {
4639  if (nc > 0)
4640  {
4641  // Will not resize later.
4642  mval.resize (nr, nc, 0.0);
4643  max_size = nr * nc;
4644  max_conv = max_size;
4645  }
4646  else
4647  {
4648  // Maybe resize later.
4649  mval.resize (nr, 32, 0.0);
4650  max_size = nr * 32;
4651  }
4652  }
4653  else
4654  {
4655  // Maybe resize later.
4656  mval.resize (32, 1, 0.0);
4657  max_size = 32;
4658  }
4659 
4660  data = mval.fortran_vec ();
4661 
4662  if (isp)
4663  {
4664  std::istream& is = *isp;
4665 
4666  const scanf_format_elt *elt = fmt_list.first ();
4667 
4668  std::ios::fmtflags flags = is.flags ();
4669 
4670  octave_idx_type trips = 0;
4671 
4672  octave_idx_type num_fmt_elts = fmt_list.length ();
4673 
4674  for (;;)
4675  {
4676  octave_quit ();
4677 
4678  if (elt)
4679  {
4680  if (elt->type == scanf_format_elt::null
4683  || elt->type == '%')
4684  && max_conv > 0 && conversion_count == max_conv))
4685  {
4686  // We are done, either because we have reached the end of the
4687  // format string and are not cycling through the format again
4688  // or because we've converted all the values that have been
4689  // requested and the next format element is a conversion.
4690  // Determine final array size and exit.
4691  if (all_char_conv && one_elt_size_spec)
4692  {
4693  final_nr = 1;
4694  final_nc = data_index;
4695  }
4696  else
4697  {
4698  final_nr = nr;
4699  final_nc = (data_index - 1) / nr + 1;
4700  }
4701 
4702  break;
4703  }
4704  else if (data_index == max_size)
4705  {
4706  max_size *= 2;
4707 
4708  if (all_char_conv)
4709  {
4710  if (one_elt_size_spec)
4711  mval.resize (1, max_size, 0.0);
4712  else if (nr > 0)
4713  mval.resize (nr, max_size / nr, 0.0);
4714  else
4715  panic_impossible ();
4716  }
4717  else if (nr > 0)
4718  mval.resize (nr, max_size / nr, 0.0);
4719  else
4720  mval.resize (max_size, 1, 0.0);
4721 
4722  data = mval.fortran_vec ();
4723  }
4724 
4725  std::string fmt = elt->text;
4726 
4727  bool discard = elt->discard;
4728 
4729  switch (elt->type)
4730  {
4733  break;
4734 
4737  break;
4738 
4739  case '%':
4740  DO_PCT_CONVERSION ();
4741  break;
4742 
4743  case 'd': case 'i':
4744  {
4745  switch (elt->modifier)
4746  {
4747  case 'h':
4748  {
4749  int16_t tmp;
4750  do_scanf_conv (is, *elt, &tmp, mval, data,
4751  data_index, conversion_count,
4752  nr, max_size, discard);
4753  }
4754  break;
4755 
4756  case 'l':
4757  {
4758  int64_t tmp;
4759  do_scanf_conv (is, *elt, &tmp, mval, data,
4760  data_index, conversion_count,
4761  nr, max_size, discard);
4762  }
4763  break;
4764 
4765  default:
4766  {
4767  int32_t tmp;
4768  do_scanf_conv (is, *elt, &tmp, mval, data,
4769  data_index, conversion_count,
4770  nr, max_size, discard);
4771  }
4772  break;
4773  }
4774  }
4775  break;
4776 
4777  case 'o': case 'u': case 'x':
4778  {
4779  switch (elt->modifier)
4780  {
4781  case 'h':
4782  {
4783  uint16_t tmp;
4784  do_scanf_conv (is, *elt, &tmp, mval, data,
4785  data_index, conversion_count,
4786  nr, max_size, discard);
4787  }
4788  break;
4789 
4790  case 'l':
4791  {
4792  uint64_t tmp;
4793  do_scanf_conv (is, *elt, &tmp, mval, data,
4794  data_index, conversion_count,
4795  nr, max_size, discard);
4796  }
4797  break;
4798 
4799  default:
4800  {
4801  uint32_t tmp;
4802  do_scanf_conv (is, *elt, &tmp, mval, data,
4803  data_index, conversion_count,
4804  nr, max_size, discard);
4805  }
4806  break;
4807  }
4808  }
4809  break;
4810 
4811  case 'e': case 'f': case 'g':
4812  {
4813  double tmp;
4814 
4815  do_scanf_conv (is, *elt, &tmp, mval, data,
4816  data_index, conversion_count,
4817  nr, max_size, discard);
4818  }
4819  break;
4820 
4821  case 'c':
4822  {
4823  BEGIN_C_CONVERSION ();
4824 
4826 
4827  is.setf (flags);
4828  }
4829  break;
4830 
4831  case 's':
4832  {
4833  BEGIN_S_CONVERSION ();
4834 
4836  }
4837  break;
4838 
4839  case '[': case '^':
4840  {
4842 
4844  }
4845  break;
4846 
4847  case 'p':
4848  error ("%s: unsupported format specifier", who.c_str ());
4849  break;
4850 
4851  default:
4852  error ("%s: internal format error", who.c_str ());
4853  break;
4854  }
4855 
4856  if (! ok ())
4857  {
4858  break;
4859  }
4860  else if (! is)
4861  {
4862  if (all_char_conv)
4863  {
4864  if (one_elt_size_spec)
4865  {
4866  final_nr = 1;
4867  final_nc = data_index;
4868  }
4869  else if (data_index > nr)
4870  {
4871  final_nr = nr;
4872  final_nc = (data_index - 1) / nr + 1;
4873  }
4874  else
4875  {
4876  final_nr = data_index;
4877  final_nc = 1;
4878  }
4879  }
4880  else if (nr > 0)
4881  {
4882  if (data_index > nr)
4883  {
4884  final_nr = nr;
4885  final_nc = (data_index - 1) / nr + 1;
4886  }
4887  else
4888  {
4889  final_nr = data_index;
4890  final_nc = 1;
4891  }
4892  }
4893  else
4894  {
4895  final_nr = data_index;
4896  final_nc = 1;
4897  }
4898 
4899  // If it looks like we have a matching failure, then
4900  // reset the failbit in the stream state.
4901  if (is.rdstate () & std::ios::failbit)
4902  is.clear (is.rdstate () & (~std::ios::failbit));
4903 
4904  // FIXME: is this the right thing to do?
4905  if (octave::application::interactive ()
4906  && ! octave::application::forced_interactive ()
4907  && name () == "stdin")
4908  {
4909  is.clear ();
4910 
4911  // Skip to end of line.
4912  bool err;
4913  do_gets (-1, err, false, who);
4914  }
4915 
4916  break;
4917  }
4918  }
4919  else
4920  {
4921  error ("%s: internal format error", who.c_str ());
4922  break;
4923  }
4924 
4925  if (nconv == 0 && ++trips == num_fmt_elts)
4926  {
4927  if (all_char_conv && one_elt_size_spec)
4928  {
4929  final_nr = 1;
4930  final_nc = data_index;
4931  }
4932  else
4933  {
4934  final_nr = nr;
4935  final_nc = (data_index - 1) / nr + 1;
4936  }
4937 
4938  break;
4939  }
4940  else
4941  {
4942  // Cycle through the format list more than once if we have some
4943  // conversions to make and we haven't reached the limit on the
4944  // number of values to convert (possibly because there is no
4945  // specified limit).
4946  elt = fmt_list.next (nconv > 0
4947  && (max_conv == 0
4948  || conversion_count < max_conv));
4949  }
4950  }
4951  }
4952 
4953  if (ok ())
4954  {
4955  mval.resize (final_nr, final_nc, 0.0);
4956 
4957  retval = mval;
4958 
4959  if (all_char_conv)
4960  retval = retval.convert_to_str (false, true);
4961  }
4962 
4963  return retval;
4964  }
4965 
4966  octave_value
4968  octave_idx_type& conversion_count,
4969  const std::string& who)
4970  {
4971  octave_value retval = Matrix ();
4972 
4973  conversion_count = 0;
4974 
4975  std::istream *isp = input_stream ();
4976 
4977  if (! isp)
4978  invalid_operation (who, "reading");
4979  else
4980  {
4981  scanf_format_list fmt_list (fmt);
4982 
4983  if (fmt_list.num_conversions () == -1)
4984  ::error ("%s: invalid format specified", who.c_str ());
4985 
4986  octave_idx_type nr = -1;
4987  octave_idx_type nc = -1;
4988 
4989  bool one_elt_size_spec;
4990 
4991  get_size (size, nr, nc, one_elt_size_spec, who);
4992 
4993  retval = do_scanf (fmt_list, nr, nc, one_elt_size_spec,
4994  conversion_count, who);
4995  }
4996 
4997  return retval;
4998  }
4999 
5000  bool
5002  octave_value& retval, const std::string& who)
5003  {
5004  std::istream *isp = input_stream ();
5005 
5006  if (! isp)
5007  return false;
5008 
5009  bool quit = false;
5010 
5011  std::istream& is = *isp;
5012 
5013  std::ios::fmtflags flags = is.flags ();
5014 
5015  if (elt)
5016  {
5017  std::string fmt = elt->text;
5018 
5019  bool discard = elt->discard;
5020 
5021  switch (elt->type)
5022  {
5025  break;
5026 
5029  break;
5030 
5031  case '%':
5032  {
5033  DO_PCT_CONVERSION ();
5034 
5035  if (! is)
5036  quit = true;
5037  }
5038  break;
5039 
5040  case 'd': case 'i':
5041  {
5042  switch (elt->modifier)
5043  {
5044  case 'h':
5045  {
5046  int16_t tmp;
5047  if (octave_scan (is, *elt, &tmp))
5048  {
5049  if (! discard)
5050  retval = tmp;
5051  }
5052  else
5053  quit = true;
5054  }
5055  break;
5056 
5057  case 'l':
5058  {
5059  int64_t tmp;
5060  if (octave_scan (is, *elt, &tmp))
5061  {
5062  if (! discard)
5063  retval = tmp;
5064  }
5065  else
5066  quit = true;
5067  }
5068  break;
5069 
5070  default:
5071  {
5072  int32_t tmp;
5073  if (octave_scan (is, *elt, &tmp))
5074  {
5075  if (! discard)
5076  retval = tmp;
5077  }
5078  else
5079  quit = true;
5080  }
5081  break;
5082  }
5083  }
5084  break;
5085 
5086  case 'o': case 'u': case 'x':
5087  {
5088  switch (elt->modifier)
5089  {
5090  case 'h':
5091  {
5092  uint16_t tmp;
5093  if (octave_scan (is, *elt, &tmp))
5094  {
5095  if (! discard)
5096  retval = tmp;
5097  }
5098  else
5099  quit = true;
5100  }
5101  break;
5102 
5103  case 'l':
5104  {
5105  uint64_t tmp;
5106  if (octave_scan (is, *elt, &tmp))
5107  {
5108  if (! discard)
5109  retval = tmp;
5110  }
5111  else
5112  quit = true;
5113  }
5114  break;
5115 
5116  default:
5117  {
5118  uint32_t tmp;
5119  if (octave_scan (is, *elt, &tmp))
5120  {
5121  if (! discard)
5122  retval = tmp;
5123  }
5124  else
5125  quit = true;
5126  }
5127  break;
5128  }
5129  }
5130  break;
5131 
5132  case 'e': case 'f': case 'g':
5133  {
5134  double tmp;
5135 
5136  if (octave_scan (is, *elt, &tmp))
5137  {
5138  if (! discard)
5139  retval = tmp;
5140  }
5141  else
5142  quit = true;
5143  }
5144  break;
5145 
5146  case 'c':
5147  {
5148  BEGIN_C_CONVERSION ();
5149 
5150  if (! discard)
5151  retval = tmp;
5152 
5153  if (! is)
5154  quit = true;
5155 
5156  is.setf (flags);
5157  }
5158  break;
5159 
5160  case 's':
5161  {
5162  BEGIN_S_CONVERSION ();
5163 
5164  if (! discard)
5165  retval = tmp;
5166 
5167  if (! is)
5168  quit = true;
5169  }
5170  break;
5171 
5172  case '[':
5173  case '^':
5174  {
5176 
5177  if (! discard)
5178  retval = tmp;
5179 
5180  if (! is)
5181  quit = true;
5182  }
5183  break;
5184 
5185  case 'p':
5186  error ("%s: unsupported format specifier", who.c_str ());
5187  break;
5188 
5189  default:
5190  error ("%s: internal format error", who.c_str ());
5191  break;
5192  }
5193  }
5194 
5195  if (ok () && is.fail ())
5196  {
5197  error ("%s: read error", who.c_str ());
5198 
5199  // FIXME: is this the right thing to do?
5200 
5201  if (octave::application::interactive ()
5202  && ! octave::application::forced_interactive ()
5203  && name () == "stdin")
5204  {
5205  // Skip to end of line.
5206  bool err;
5207  do_gets (-1, err, false, who);
5208  }
5209  }
5210 
5211  return quit;
5212  }
5213 
5216  {
5218 
5219  std::istream *isp = input_stream ();
5220 
5221  if (! isp)
5222  invalid_operation (who, "reading");
5223  else
5224  {
5225  std::istream& is = *isp;
5226 
5227  scanf_format_list fmt_list (fmt);
5228 
5229  octave_idx_type nconv = fmt_list.num_conversions ();
5230 
5231  if (nconv == -1)
5232  ::error ("%s: invalid format specified", who.c_str ());
5233 
5234  is.clear ();
5235 
5236  octave_idx_type len = fmt_list.length ();
5237 
5238  retval.resize (nconv+2, Matrix ());
5239 
5240  const scanf_format_elt *elt = fmt_list.first ();
5241 
5242  int num_values = 0;
5243 
5244  bool quit = false;
5245 
5246  for (octave_idx_type i = 0; i < len; i++)
5247  {
5248  octave_value tmp;
5249 
5250  quit = do_oscanf (elt, tmp, who);
5251 
5252  if (quit)
5253  break;
5254  else
5255  {
5256  if (tmp.is_defined ())
5257  retval(num_values++) = tmp;
5258 
5259  if (! ok ())
5260  break;
5261 
5262  elt = fmt_list.next (nconv > 0);
5263  }
5264  }
5265 
5266  retval(nconv) = num_values;
5267 
5268  int err_num;
5269  retval(nconv+1) = error (false, err_num);
5270 
5271  if (! quit)
5272  {
5273  // Pick up any trailing stuff.
5274  if (ok () && len > nconv)
5275  {
5276  octave_value tmp;
5277 
5278  elt = fmt_list.next ();
5279 
5280  do_oscanf (elt, tmp, who);
5281  }
5282  }
5283  }
5284 
5285  return retval;
5286  }
5287 
5288  octave_value
5290  octave_idx_type ntimes,
5291  const octave_value_list& options,
5292  const std::string& who,
5293  octave_idx_type& read_count)
5294  {
5295  if (octave::application::interactive () && file_number () == 0)
5296  ::error ("%s: unable to read from stdin while running interactively",
5297  who.c_str ());
5298 
5299  octave_value retval = Cell (dim_vector (1, 1), Matrix (0, 1));
5300 
5301  std::istream *isp = input_stream ();
5302 
5303  if (! isp)
5304  invalid_operation (who, "reading");
5305  else
5306  {
5307  octave::textscan scanner (who);
5308 
5309  retval = scanner.scan (*isp, fmt, ntimes, options, read_count);
5310  }
5311 
5312  return retval;
5313  }
5314 
5315  // Functions that are defined for all output streams
5316  // (output streams are those that define os).
5317 
5318  int
5320  {
5321  int retval = -1;
5322 
5323  std::ostream *os = output_stream ();
5324 
5325  if (! os)
5326  invalid_operation ("fflush", "writing");
5327  else
5328  {
5329  os->flush ();
5330 
5331  if (os->good ())
5332  retval = 0;
5333  }
5334 
5335  return retval;
5336  }
5337 
5338  class
5340  {
5341  public:
5342 
5343  enum state { ok, conversion_error };
5344 
5346  : values (args), val_idx (0), elt_idx (0),
5347  n_vals (values.length ()), n_elts (0), have_data (false),
5348  curr_state (ok)
5349  {
5350  for (octave_idx_type i = 0; i < values.length (); i++)
5351  {
5352  octave_value val = values(i);
5353 
5354  if (val.isstruct () || val.iscell () || val.isobject ())
5355  err_wrong_type_arg (who, val);
5356  }
5357  }
5358 
5359  // No copying!
5360 
5361  printf_value_cache (const printf_value_cache&) = delete;
5362 
5363  printf_value_cache& operator = (const printf_value_cache&) = delete;
5364 
5365  ~printf_value_cache (void) = default;
5366 
5367  // Get the current value as a double and advance the internal pointer.
5368  octave_value get_next_value (char type = 0);
5369 
5370  // Get the current value as an int and advance the internal pointer.
5371  int int_value (void);
5372 
5373  operator bool () const { return (curr_state == ok); }
5374 
5375  bool exhausted (void) { return (val_idx >= n_vals); }
5376 
5377  private:
5378 
5380  int val_idx;
5381  int elt_idx;
5382  int n_vals;
5383  int n_elts;
5387 
5388  // Must create value cache with values!
5389 
5390  printf_value_cache (void);
5391  };
5392 
5393  octave_value
5395  {
5397 
5398  if (exhausted ())
5400 
5401  while (! exhausted ())
5402  {
5403  if (! have_data)
5404  {
5405  curr_val = values (val_idx);
5406 
5407  elt_idx = 0;
5408  n_elts = curr_val.numel ();
5409  have_data = true;
5410  }
5411 
5412  if (elt_idx < n_elts)
5413  {
5414  if (type == 's')
5415  {
5416  if (curr_val.