GNU Octave  3.8.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
dlmread.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2008-2013 Jonathan Stickel
4 Copyright (C) 2010 Jaroslav Hajek
5 
6 This file is part of Octave.
7 
8 Octave is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 // Adapted from previous version of dlmread.occ as authored by Kai
25 // Habel, but core code has been completely re-written.
26 
27 #ifdef HAVE_CONFIG_H
28 #include <config.h>
29 #endif
30 
31 #include <cctype>
32 #include <fstream>
33 #include <limits>
34 
35 #include "file-ops.h"
36 #include "lo-ieee.h"
37 
38 #include "defun.h"
39 #include "oct-stream.h"
40 #include "error.h"
41 #include "oct-obj.h"
42 #include "utils.h"
43 
44 static const octave_idx_type idx_max =
46 
47 static bool
49 {
50  bool stat = false;
51 
52  if (is.peek () == std::istream::traits_type::eof ())
53  stat = true;
54  else
55  {
56  if (::isalpha (is.peek ()))
57  {
58  col = 0;
59  while (is && ::isalpha (is.peek ()))
60  {
61  char ch = is.get ();
62  col *= 26;
63  if (ch >= 'a')
64  col += ch - 'a' + 1;
65  else
66  col += ch - 'A' + 1;
67  }
68  col --;
69 
70  if (is)
71  {
72  is >> row;
73  row --;
74  if (is)
75  stat = true;
76  }
77  }
78  }
79 
80  return stat;
81 }
82 
83 static bool
84 parse_range_spec (const octave_value& range_spec,
87 {
88  bool stat = true;
89 
90  if (range_spec.is_string ())
91  {
92  std::istringstream is (range_spec.string_value ());
93  char ch = is.peek ();
94 
95  if (ch == '.' || ch == ':')
96  {
97  rlo = 0;
98  clo = 0;
99  ch = is.get ();
100  if (ch == '.')
101  {
102  ch = is.get ();
103  if (ch != '.')
104  stat = false;
105  }
106  }
107  else
108  {
109  stat = read_cell_spec (is, rlo, clo);
110 
111  if (stat)
112  {
113  ch = is.peek ();
114 
115  if (ch == '.' || ch == ':')
116  {
117  ch = is.get ();
118  if (ch == '.')
119  {
120  ch = is.get ();
121  if (!is || ch != '.')
122  stat = false;
123  }
124 
125  rup = idx_max - 1;
126  cup = idx_max - 1;
127  }
128  else
129  {
130  rup = rlo;
131  cup = clo;
132  if (!is || !is.eof ())
133  stat = false;
134  }
135  }
136  }
137 
138  if (stat && is && !is.eof ())
139  stat = read_cell_spec (is, rup, cup);
140 
141  if (!is || !is.eof ())
142  stat = false;
143  }
144  else if (range_spec.is_real_matrix () && range_spec.numel () == 4)
145  {
146  ColumnVector range(range_spec.vector_value ());
147  // double --> unsigned int
148  rlo = static_cast<octave_idx_type> (range(0));
149  clo = static_cast<octave_idx_type> (range(1));
150  rup = static_cast<octave_idx_type> (range(2));
151  cup = static_cast<octave_idx_type> (range(3));
152  }
153  else
154  stat = false;
155 
156  return stat;
157 }
158 
159 DEFUN (dlmread, args, ,
160  "-*- texinfo -*-\n\
161 @deftypefn {Built-in Function} {@var{data} =} dlmread (@var{file})\n\
162 @deftypefnx {Built-in Function} {@var{data} =} dlmread (@var{file}, @var{sep})\n\
163 @deftypefnx {Built-in Function} {@var{data} =} dlmread (@var{file}, @var{sep}, @var{r0}, @var{c0})\n\
164 @deftypefnx {Built-in Function} {@var{data} =} dlmread (@var{file}, @var{sep}, @var{range})\n\
165 @deftypefnx {Built-in Function} {@var{data} =} dlmread (@dots{}, \"emptyvalue\", @var{EMPTYVAL})\n\
166 Read the matrix @var{data} from a text file. If not defined the separator\n\
167 between fields is determined from the file itself. Otherwise the\n\
168 separation character is defined by @var{sep}.\n\
169 \n\
170 Given two scalar arguments @var{r0} and @var{c0}, these define the starting\n\
171 row and column of the data to be read. These values are indexed from zero,\n\
172 such that the first row corresponds to an index of zero.\n\
173 \n\
174 The @var{range} parameter may be a 4-element vector containing the upper\n\
175 left and lower right corner @code{[@var{R0},@var{C0},@var{R1},@var{C1}]}\n\
176 where the lowest index value is zero. Alternatively, a spreadsheet style\n\
177 range such as @qcode{\"A2..Q15\"} or @qcode{\"T1:AA5\"} can be used. The\n\
178 lowest alphabetical index @qcode{'A'} refers to the first column. The\n\
179 lowest row index is 1.\n\
180 \n\
181 @var{file} should be a file name or file id given by @code{fopen}. In the\n\
182 latter case, the file is read until end of file is reached.\n\
183 \n\
184 The @qcode{\"emptyvalue\"} option may be used to specify the value used to\n\
185 fill empty fields. The default is zero.\n\
186 @seealso{csvread, textscan, textread, dlmwrite}\n\
187 @end deftypefn")
188 {
189  octave_value_list retval;
190 
191  int nargin = args.length ();
192 
193  double empty_value = 0.0;
194 
195  if (nargin > 2 && args(nargin-2).is_string ()
196  && args(nargin-2).string_value () == "emptyvalue")
197  {
198  empty_value = args(nargin-1).double_value ();
199  if (error_state)
200  return retval;
201  nargin -= 2;
202  }
203 
204  if (nargin < 1 || nargin > 4)
205  {
206  print_usage ();
207  return retval;
208  }
209 
210  std::istream *input = 0;
211  std::ifstream input_file;
212 
213  if (args(0).is_string ())
214  {
215  // File name.
216  std::string fname (args(0).string_value ());
217  if (error_state)
218  return retval;
219 
220  std::string tname = file_ops::tilde_expand (fname);
221 
222  input_file.open (tname.c_str (), std::ios::in);
223 
224  if (! input_file)
225  {
226  error ("dlmread: unable to open file '%s'", fname.c_str ());
227  return retval;
228  }
229  else
230  input = &input_file;
231  }
232  else if (args(0).is_scalar_type ())
233  {
234  octave_stream is = octave_stream_list::lookup (args(0), "dlmread");
235 
236  if (error_state)
237  return retval;
238 
239  input = is.input_stream ();
240 
241  if (! input)
242  {
243  error ("dlmread: stream FILE not open for input");
244  return retval;
245  }
246  }
247  else
248  {
249  error ("dlmread: FILE argument must be a string or file id");
250  return retval;
251  }
252 
253  // Set default separator.
254  std::string sep;
255  if (nargin > 1)
256  {
257  if (args(1).is_sq_string ())
258  sep = do_string_escapes (args(1).string_value ());
259  else
260  sep = args(1).string_value ();
261 
262  if (error_state)
263  return retval;
264  }
265 
266  // Take a subset if a range was given.
267  octave_idx_type r0 = 0, c0 = 0, r1 = idx_max-1, c1 = idx_max-1;
268  if (nargin > 2)
269  {
270  if (nargin == 3)
271  {
272  if (!parse_range_spec (args (2), r0, c0, r1, c1))
273  error ("dlmread: error parsing RANGE");
274  }
275  else if (nargin == 4)
276  {
277  r0 = args(2).idx_type_value ();
278  c0 = args(3).idx_type_value ();
279 
280  if (error_state)
281  return retval;
282  }
283 
284  if (r0 < 0 || c0 < 0)
285  error ("dlmread: left & top must be positive");
286  }
287 
288  if (!error_state)
289  {
290  octave_idx_type i = 0, j = 0, r = 1, c = 1, rmax = 0, cmax = 0;
291 
292  Matrix rdata;
293  ComplexMatrix cdata;
294 
295  bool iscmplx = false;
296  bool sepflag = false;
297 
298  std::string line;
299 
300  // Skip the r0 leading lines as these might be a header.
301  for (octave_idx_type m = 0; m < r0; m++)
302  getline (*input, line);
303  r1 -= r0;
304 
305  std::istringstream tmp_stream;
306 
307  // Read in the data one field at a time, growing the data matrix
308  // as needed.
309  while (getline (*input, line))
310  {
311  // Skip blank lines for compatibility.
312  if (line.find_first_not_of (" \t") == std::string::npos)
313  continue;
314 
315  // To be compatible with matlab, blank separator should
316  // correspond to whitespace as delimter.
317  if (!sep.length ())
318  {
319  size_t n = line.find_first_of (",:; \t",
320  line.find_first_of ("0123456789"));
321  if (n == std::string::npos)
322  {
323  sep = " \t";
324  sepflag = true;
325  }
326  else
327  {
328  char ch = line.at (n);
329 
330  switch (line.at (n))
331  {
332  case ' ':
333  case '\t':
334  sepflag = true;
335  sep = " \t";
336  break;
337 
338  default:
339  sep = ch;
340  break;
341  }
342  }
343  }
344 
345  if (cmax == 0)
346  {
347  // Try to estimate the number of columns. Skip leading
348  // whitespace.
349  size_t pos1 = line.find_first_not_of (" \t");
350  do
351  {
352  size_t pos2 = line.find_first_of (sep, pos1);
353 
354  if (sepflag && pos2 != std::string::npos)
355  // Treat consecutive separators as one.
356  {
357  pos2 = line.find_first_not_of (sep, pos2);
358  if (pos2 != std::string::npos)
359  pos2 -= 1;
360  else
361  pos2 = line.length () - 1;
362  }
363 
364  cmax++;
365 
366  if (pos2 != std::string::npos)
367  pos1 = pos2 + 1;
368  else
369  pos1 = std::string::npos;
370 
371  }
372  while (pos1 != std::string::npos);
373 
374  if (iscmplx)
375  cdata.resize (rmax, cmax);
376  else
377  rdata.resize (rmax, cmax);
378  }
379 
380  r = (r > i + 1 ? r : i + 1);
381  j = 0;
382  // Skip leading whitespace.
383  size_t pos1 = line.find_first_not_of (" \t");
384  do
385  {
386  octave_quit ();
387 
388  size_t pos2 = line.find_first_of (sep, pos1);
389  std::string str = line.substr (pos1, pos2 - pos1);
390 
391  if (sepflag && pos2 != std::string::npos)
392  // Treat consecutive separators as one.
393  pos2 = line.find_first_not_of (sep, pos2) - 1;
394 
395  c = (c > j + 1 ? c : j + 1);
396  if (r > rmax || c > cmax)
397  {
398  // Use resize_and_fill for the case of not-equal
399  // length rows.
400  rmax = 2*r;
401  cmax = c;
402  if (iscmplx)
403  cdata.resize (rmax, cmax);
404  else
405  rdata.resize (rmax, cmax);
406  }
407 
408  tmp_stream.str (str);
409  tmp_stream.clear ();
410 
411  double x = octave_read_double (tmp_stream);
412  if (tmp_stream)
413  {
414  if (tmp_stream.eof ())
415  {
416  if (iscmplx)
417  cdata(i,j++) = x;
418  else
419  rdata(i,j++) = x;
420  }
421  else if (std::toupper (tmp_stream.peek ()) == 'I')
422  {
423  // This is to allow pure imaginary numbers.
424  if (iscmplx)
425  cdata(i,j++) = x;
426  else
427  rdata(i,j++) = x;
428  }
429  else
430  {
431  double y = octave_read_double (tmp_stream);
432 
433  if (!iscmplx && y != 0.)
434  {
435  iscmplx = true;
436  cdata = ComplexMatrix (rdata);
437  }
438 
439  if (iscmplx)
440  cdata(i,j++) = Complex (x, y);
441  else
442  rdata(i,j++) = x;
443  }
444  }
445  else if (iscmplx)
446  cdata(i,j++) = empty_value;
447  else
448  rdata(i,j++) = empty_value;
449 
450  if (pos2 != std::string::npos)
451  pos1 = pos2 + 1;
452  else
453  pos1 = std::string::npos;
454 
455  }
456  while (pos1 != std::string::npos);
457 
458  if (i == r1)
459  break;
460 
461  i++;
462  }
463 
464  if (r1 >= r)
465  r1 = r - 1;
466  if (c1 >= c)
467  c1 = c - 1;
468 
469  // Now take the subset of the matrix.
470  if (iscmplx)
471  cdata = cdata.extract (0, c0, r1, c1);
472  else
473  rdata = rdata.extract (0, c0, r1, c1);
474 
475  if (iscmplx)
476  retval(0) = cdata;
477  else
478  retval(0) = rdata;
479  }
480 
481  return retval;
482 }
483 
484 /*
485 %!shared file
486 %! file = tmpnam ();
487 %! fid = fopen (file, "wt");
488 %! fwrite (fid, "1, 2, 3\n4, 5, 6\n7, 8, 9\n10, 11, 12");
489 %! fclose (fid);
490 
491 %!assert (dlmread (file), [1, 2, 3; 4, 5, 6; 7, 8, 9;10, 11, 12])
492 %!assert (dlmread (file, ","), [1, 2, 3; 4, 5, 6; 7, 8, 9; 10, 11, 12])
493 %!assert (dlmread (file, ",", [1, 0, 2, 1]), [4, 5; 7, 8])
494 %!assert (dlmread (file, ",", "B1..C2"), [2, 3; 5, 6])
495 %!assert (dlmread (file, ",", "B1:C2"), [2, 3; 5, 6])
496 %!assert (dlmread (file, ",", "..C2"), [1, 2, 3; 4, 5, 6])
497 %!assert (dlmread (file, ",", 0, 1), [2, 3; 5, 6; 8, 9; 11, 12])
498 %!assert (dlmread (file, ",", "B1.."), [2, 3; 5, 6; 8, 9; 11, 12])
499 %!error (dlmread (file, ",", [0 1]))
500 
501 %!test
502 %! unlink (file);
503 
504 %!shared file
505 %! file = tmpnam ();
506 %! fid = fopen (file, "wt");
507 %! fwrite (fid, "1, 2, 3\n4+4i, 5, 6\n7, 8, 9\n10, 11, 12");
508 %! fclose (fid);
509 
510 %!assert (dlmread (file), [1, 2, 3; 4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12])
511 %!assert (dlmread (file, ","), [1, 2, 3; 4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12])
512 %!assert (dlmread (file, ",", [1, 0, 2, 1]), [4 + 4i, 5; 7, 8])
513 %!assert (dlmread (file, ",", "A2..B3"), [4 + 4i, 5; 7, 8])
514 %!assert (dlmread (file, ",", "A2:B3"), [4 + 4i, 5; 7, 8])
515 %!assert (dlmread (file, ",", "..B3"), [1, 2; 4 + 4i, 5; 7, 8])
516 %!assert (dlmread (file, ",", 1, 0), [4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12])
517 %!assert (dlmread (file, ",", "A2.."), [4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12])
518 %!error (dlmread (file, ",", [0 1]))
519 
520 %!test
521 %! unlink (file);
522 */