GNU Octave  4.0.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
dlmread.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2008-2015 Jonathan Stickel
4 Copyright (C) 2010 Jaroslav Hajek
5 
6 This file is part of Octave.
7 
8 Octave is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 // Adapted from previous version of dlmread.occ as authored by Kai
25 // Habel, but core code has been completely re-written.
26 
27 #ifdef HAVE_CONFIG_H
28 #include <config.h>
29 #endif
30 
31 #include <cctype>
32 #include <fstream>
33 #include <limits>
34 
35 #include "file-ops.h"
36 #include "lo-ieee.h"
37 
38 #include "defun.h"
39 #include "oct-stream.h"
40 #include "error.h"
41 #include "oct-obj.h"
42 #include "utils.h"
43 
44 static const octave_idx_type idx_max =
46 
47 static bool
49 {
50  bool stat = false;
51 
52  if (is.peek () == std::istream::traits_type::eof ())
53  stat = true;
54  else
55  {
56  if (::isalpha (is.peek ()))
57  {
58  col = 0;
59  while (is && ::isalpha (is.peek ()))
60  {
61  char ch = is.get ();
62  col *= 26;
63  if (ch >= 'a')
64  col += ch - 'a' + 1;
65  else
66  col += ch - 'A' + 1;
67  }
68  col --;
69 
70  if (is)
71  {
72  is >> row;
73  row --;
74  if (is)
75  stat = true;
76  }
77  }
78  }
79 
80  return stat;
81 }
82 
83 static bool
84 parse_range_spec (const octave_value& range_spec,
87 {
88  bool stat = true;
89 
90  if (range_spec.is_string ())
91  {
92  std::istringstream is (range_spec.string_value ());
93  char ch = is.peek ();
94 
95  if (ch == '.' || ch == ':')
96  {
97  rlo = 0;
98  clo = 0;
99  ch = is.get ();
100  if (ch == '.')
101  {
102  ch = is.get ();
103  if (ch != '.')
104  stat = false;
105  }
106  }
107  else
108  {
109  stat = read_cell_spec (is, rlo, clo);
110 
111  if (stat)
112  {
113  ch = is.peek ();
114 
115  if (ch == '.' || ch == ':')
116  {
117  ch = is.get ();
118  if (ch == '.')
119  {
120  ch = is.get ();
121  if (!is || ch != '.')
122  stat = false;
123  }
124 
125  rup = idx_max - 1;
126  cup = idx_max - 1;
127  }
128  else
129  {
130  rup = rlo;
131  cup = clo;
132  if (!is || !is.eof ())
133  stat = false;
134  }
135  }
136  }
137 
138  if (stat && is && !is.eof ())
139  stat = read_cell_spec (is, rup, cup);
140 
141  if (!is || !is.eof ())
142  stat = false;
143  }
144  else if (range_spec.is_real_matrix () && range_spec.numel () == 4)
145  {
146  ColumnVector range(range_spec.vector_value ());
147  // double --> unsigned int
148  rlo = static_cast<octave_idx_type> (range(0));
149  clo = static_cast<octave_idx_type> (range(1));
150  rup = static_cast<octave_idx_type> (range(2));
151  cup = static_cast<octave_idx_type> (range(3));
152  }
153  else
154  stat = false;
155 
156  return stat;
157 }
158 
159 DEFUN (dlmread, args, ,
160  "-*- texinfo -*-\n\
161 @deftypefn {Built-in Function} {@var{data} =} dlmread (@var{file})\n\
162 @deftypefnx {Built-in Function} {@var{data} =} dlmread (@var{file}, @var{sep})\n\
163 @deftypefnx {Built-in Function} {@var{data} =} dlmread (@var{file}, @var{sep}, @var{r0}, @var{c0})\n\
164 @deftypefnx {Built-in Function} {@var{data} =} dlmread (@var{file}, @var{sep}, @var{range})\n\
165 @deftypefnx {Built-in Function} {@var{data} =} dlmread (@dots{}, \"emptyvalue\", @var{EMPTYVAL})\n\
166 Read the matrix @var{data} from a text file which uses the delimiter\n\
167 @var{sep} between data values.\n\
168 \n\
169 If @var{sep} is not defined the separator between fields is determined from\n\
170 the file itself.\n\
171 \n\
172 Given two scalar arguments @var{r0} and @var{c0}, these define the starting\n\
173 row and column of the data to be read. These values are indexed from zero,\n\
174 such that the first row corresponds to an index of zero.\n\
175 \n\
176 The @var{range} parameter may be a 4-element vector containing the upper\n\
177 left and lower right corner @code{[@var{R0},@var{C0},@var{R1},@var{C1}]}\n\
178 where the lowest index value is zero. Alternatively, a spreadsheet style\n\
179 range such as @qcode{\"A2..Q15\"} or @qcode{\"T1:AA5\"} can be used. The\n\
180 lowest alphabetical index @qcode{'A'} refers to the first column. The\n\
181 lowest row index is 1.\n\
182 \n\
183 @var{file} should be a file name or file id given by @code{fopen}. In the\n\
184 latter case, the file is read until end of file is reached.\n\
185 \n\
186 The @qcode{\"emptyvalue\"} option may be used to specify the value used to\n\
187 fill empty fields. The default is zero.\n\
188 @seealso{csvread, textscan, textread, dlmwrite}\n\
189 @end deftypefn")
190 {
191  octave_value_list retval;
192 
193  int nargin = args.length ();
194 
195  double empty_value = 0.0;
196 
197  if (nargin > 2 && args(nargin-2).is_string ()
198  && args(nargin-2).string_value () == "emptyvalue")
199  {
200  empty_value = args(nargin-1).double_value ();
201  if (error_state)
202  return retval;
203  nargin -= 2;
204  }
205 
206  if (nargin < 1 || nargin > 4)
207  {
208  print_usage ();
209  return retval;
210  }
211 
212  std::istream *input = 0;
213  std::ifstream input_file;
214 
215  if (args(0).is_string ())
216  {
217  // File name.
218  std::string fname (args(0).string_value ());
219 
220  std::string tname = file_ops::tilde_expand (fname);
221 
222  input_file.open (tname.c_str (), std::ios::in);
223 
224  if (! input_file)
225  {
226  error ("dlmread: unable to open file '%s'", fname.c_str ());
227  return retval;
228  }
229  else
230  input = &input_file;
231  }
232  else if (args(0).is_scalar_type ())
233  {
234  octave_stream is = octave_stream_list::lookup (args(0), "dlmread");
235 
236  if (error_state)
237  return retval;
238 
239  input = is.input_stream ();
240 
241  if (! input)
242  {
243  error ("dlmread: stream FILE not open for input");
244  return retval;
245  }
246  }
247  else
248  {
249  error ("dlmread: FILE argument must be a string or file id");
250  return retval;
251  }
252 
253  // Set default separator.
254  std::string sep;
255  if (nargin > 1)
256  {
257  if (args(1).is_sq_string ())
258  sep = do_string_escapes (args(1).string_value ());
259  else
260  sep = args(1).string_value ();
261 
262  if (error_state)
263  return retval;
264  }
265 
266  // Take a subset if a range was given.
267  octave_idx_type r0 = 0;
268  octave_idx_type c0 = 0;
269  octave_idx_type r1 = idx_max-1;
270  octave_idx_type c1 = idx_max-1;
271  if (nargin > 2)
272  {
273  if (nargin == 3)
274  {
275  if (!parse_range_spec (args(2), r0, c0, r1, c1))
276  error ("dlmread: error parsing RANGE");
277  }
278  else if (nargin == 4)
279  {
280  r0 = args(2).idx_type_value ();
281  c0 = args(3).idx_type_value ();
282 
283  if (error_state)
284  return retval;
285  }
286 
287  if (r0 < 0 || c0 < 0)
288  error ("dlmread: left & top must be positive");
289  }
290 
291  if (!error_state)
292  {
293  octave_idx_type i = 0;
294  octave_idx_type j = 0;
295  octave_idx_type r = 1;
296  octave_idx_type c = 1;
297  octave_idx_type rmax = 0;
298  octave_idx_type cmax = 0;
299 
300  Matrix rdata;
301  ComplexMatrix cdata;
302 
303  bool iscmplx = false;
304  bool sepflag = false;
305 
306  std::string line;
307 
308  // Skip the r0 leading lines as these might be a header.
309  for (octave_idx_type m = 0; m < r0; m++)
310  getline (*input, line);
311  r1 -= r0;
312 
313  std::istringstream tmp_stream;
314 
315  // Read in the data one field at a time, growing the data matrix
316  // as needed.
317  while (getline (*input, line))
318  {
319  // Skip blank lines for compatibility.
320  if (line.find_first_not_of (" \t") == std::string::npos)
321  continue;
322 
323  // To be compatible with matlab, blank separator should
324  // correspond to whitespace as delimter.
325  if (!sep.length ())
326  {
327  size_t n = line.find_first_of (",:; \t",
328  line.find_first_of ("0123456789"));
329  if (n == std::string::npos)
330  {
331  sep = " \t";
332  sepflag = true;
333  }
334  else
335  {
336  char ch = line.at (n);
337 
338  switch (line.at (n))
339  {
340  case ' ':
341  case '\t':
342  sepflag = true;
343  sep = " \t";
344  break;
345 
346  default:
347  sep = ch;
348  break;
349  }
350  }
351  }
352 
353  if (cmax == 0)
354  {
355  // Try to estimate the number of columns. Skip leading
356  // whitespace.
357  size_t pos1 = line.find_first_not_of (" \t");
358  do
359  {
360  size_t pos2 = line.find_first_of (sep, pos1);
361 
362  if (sepflag && pos2 != std::string::npos)
363  // Treat consecutive separators as one.
364  {
365  pos2 = line.find_first_not_of (sep, pos2);
366  if (pos2 != std::string::npos)
367  pos2 -= 1;
368  else
369  pos2 = line.length () - 1;
370  }
371 
372  cmax++;
373 
374  if (pos2 != std::string::npos)
375  pos1 = pos2 + 1;
376  else
377  pos1 = std::string::npos;
378 
379  }
380  while (pos1 != std::string::npos);
381 
382  if (iscmplx)
383  cdata.resize (rmax, cmax);
384  else
385  rdata.resize (rmax, cmax);
386  }
387 
388  r = (r > i + 1 ? r : i + 1);
389  j = 0;
390  // Skip leading whitespace.
391  size_t pos1 = line.find_first_not_of (" \t");
392  do
393  {
394  octave_quit ();
395 
396  size_t pos2 = line.find_first_of (sep, pos1);
397  std::string str = line.substr (pos1, pos2 - pos1);
398 
399  if (sepflag && pos2 != std::string::npos)
400  // Treat consecutive separators as one.
401  pos2 = line.find_first_not_of (sep, pos2) - 1;
402 
403  c = (c > j + 1 ? c : j + 1);
404  if (r > rmax || c > cmax)
405  {
406  // Use resize_and_fill for the case of not-equal
407  // length rows.
408  rmax = 2*r;
409  cmax = c;
410  if (iscmplx)
411  cdata.resize (rmax, cmax);
412  else
413  rdata.resize (rmax, cmax);
414  }
415 
416  tmp_stream.str (str);
417  tmp_stream.clear ();
418 
419  double x = octave_read_double (tmp_stream);
420  if (tmp_stream)
421  {
422  if (tmp_stream.eof ())
423  {
424  if (iscmplx)
425  cdata(i,j++) = x;
426  else
427  rdata(i,j++) = x;
428  }
429  else if (std::toupper (tmp_stream.peek ()) == 'I')
430  {
431  // This is to allow pure imaginary numbers.
432  if (iscmplx)
433  cdata(i,j++) = x;
434  else
435  rdata(i,j++) = x;
436  }
437  else
438  {
439  double y = octave_read_double (tmp_stream);
440 
441  if (!iscmplx && y != 0.)
442  {
443  iscmplx = true;
444  cdata = ComplexMatrix (rdata);
445  }
446 
447  if (iscmplx)
448  cdata(i,j++) = Complex (x, y);
449  else
450  rdata(i,j++) = x;
451  }
452  }
453  else if (iscmplx)
454  cdata(i,j++) = empty_value;
455  else
456  rdata(i,j++) = empty_value;
457 
458  if (pos2 != std::string::npos)
459  pos1 = pos2 + 1;
460  else
461  pos1 = std::string::npos;
462 
463  }
464  while (pos1 != std::string::npos);
465 
466  if (i == r1)
467  break;
468 
469  i++;
470  }
471 
472  if (r1 >= r)
473  r1 = r - 1;
474  if (c1 >= c)
475  c1 = c - 1;
476 
477  // Now take the subset of the matrix if there are any values.
478  if (i > 0 || j > 0)
479  {
480  if (iscmplx)
481  cdata = cdata.extract (0, c0, r1, c1);
482  else
483  rdata = rdata.extract (0, c0, r1, c1);
484  }
485 
486  if (iscmplx)
487  retval(0) = cdata;
488  else
489  retval(0) = rdata;
490  }
491 
492  return retval;
493 }
494 
495 /*
496 %!shared file
497 %! file = tempname ();
498 %! fid = fopen (file, "wt");
499 %! fwrite (fid, "1, 2, 3\n4, 5, 6\n7, 8, 9\n10, 11, 12");
500 %! fclose (fid);
501 
502 %!assert (dlmread (file), [1, 2, 3; 4, 5, 6; 7, 8, 9;10, 11, 12])
503 %!assert (dlmread (file, ","), [1, 2, 3; 4, 5, 6; 7, 8, 9; 10, 11, 12])
504 %!assert (dlmread (file, ",", [1, 0, 2, 1]), [4, 5; 7, 8])
505 %!assert (dlmread (file, ",", "B1..C2"), [2, 3; 5, 6])
506 %!assert (dlmread (file, ",", "B1:C2"), [2, 3; 5, 6])
507 %!assert (dlmread (file, ",", "..C2"), [1, 2, 3; 4, 5, 6])
508 %!assert (dlmread (file, ",", 0, 1), [2, 3; 5, 6; 8, 9; 11, 12])
509 %!assert (dlmread (file, ",", "B1.."), [2, 3; 5, 6; 8, 9; 11, 12])
510 %!error (dlmread (file, ",", [0 1]))
511 
512 %!test
513 %! unlink (file);
514 
515 %!shared file
516 %! file = tempname ();
517 %! fid = fopen (file, "wt");
518 %! fwrite (fid, "1, 2, 3\n4+4i, 5, 6\n7, 8, 9\n10, 11, 12");
519 %! fclose (fid);
520 
521 %!assert (dlmread (file), [1, 2, 3; 4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12])
522 %!assert (dlmread (file, ","), [1, 2, 3; 4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12])
523 %!assert (dlmread (file, ",", [1, 0, 2, 1]), [4 + 4i, 5; 7, 8])
524 %!assert (dlmread (file, ",", "A2..B3"), [4 + 4i, 5; 7, 8])
525 %!assert (dlmread (file, ",", "A2:B3"), [4 + 4i, 5; 7, 8])
526 %!assert (dlmread (file, ",", "..B3"), [1, 2; 4 + 4i, 5; 7, 8])
527 %!assert (dlmread (file, ",", 1, 0), [4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12])
528 %!assert (dlmread (file, ",", "A2.."), [4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12])
529 %!error (dlmread (file, ",", [0 1]))
530 
531 %!test
532 %! unlink (file);
533 */
static octave_stream lookup(int fid, const std::string &who=std::string())
Definition: oct-stream.cc:4158
double octave_read_double(std::istream &is)
Definition: lo-utils.h:103
static bool parse_range_spec(const octave_value &range_spec, octave_idx_type &rlo, octave_idx_type &clo, octave_idx_type &rup, octave_idx_type &cup)
Definition: dlmread.cc:84
void resize(octave_idx_type nr, octave_idx_type nc, double rfv=0)
Definition: dMatrix.h:130
Matrix extract(octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const
Definition: dMatrix.cc:620
OCTINTERP_API void print_usage(void)
Definition: defun.cc:51
octave_idx_type length(void) const
Definition: oct-obj.h:89
friend class ComplexMatrix
Definition: dMatrix.h:112
#define DEFUN(name, args_name, nargout_name, doc)
Definition: defun.h:44
void error(const char *fmt,...)
Definition: error.cc:476
RowVector row(octave_idx_type i) const
Definition: dMatrix.cc:639
std::istream * input_stream(void)
Definition: oct-stream.h:625
static std::string tilde_expand(const std::string &)
Definition: file-ops.cc:286
octave_idx_type numel(const octave_value_list &idx)
Definition: ov.h:395
std::string string_value(bool force=false) const
Definition: ov.h:897
bool is_string(void) const
Definition: ov.h:562
int error_state
Definition: error.cc:101
static const octave_idx_type idx_max
Definition: dlmread.cc:44
Definition: dMatrix.h:35
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:233
void resize(octave_idx_type nr, octave_idx_type nc, const Complex &rfv=Complex(0))
Definition: CMatrix.h:170
subroutine stat(x, n, av, var, xmin, xmax)
Definition: tstgmn.for:111
static bool read_cell_spec(std::istream &is, octave_idx_type &row, octave_idx_type &col)
Definition: dlmread.cc:48
Array< double > vector_value(bool frc_str_conv=false, bool frc_vec_conv=false) const
Definition: ov.cc:1690
std::string do_string_escapes(const std::string &s)
Definition: utils.cc:618
static int input(yyscan_t yyscanner)
static MArray< double > const octave_idx_type const octave_idx_type octave_idx_type octave_idx_type octave_idx_type c1
static MArray< double > const octave_idx_type const octave_idx_type octave_idx_type r1
std::complex< double > Complex
Definition: oct-cmplx.h:29
ComplexMatrix extract(octave_idx_type r1, octave_idx_type c1, octave_idx_type r2, octave_idx_type c2) const
Definition: CMatrix.cc:971
F77_RET_T const double * x
bool is_real_matrix(void) const
Definition: ov.h:538