dlmread.cc

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (C) 2008-2012 Jonathan Stickel
00004 Copyright (C) 2010 Jaroslav Hajek
00005 
00006 This file is part of Octave.
00007 
00008 Octave is free software; you can redistribute it and/or modify it
00009 under the terms of the GNU General Public License as published by the
00010 Free Software Foundation; either version 3 of the License, or (at your
00011 option) any later version.
00012 
00013 Octave is distributed in the hope that it will be useful, but WITHOUT
00014 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00015 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00016 for more details.
00017 
00018 You should have received a copy of the GNU General Public License
00019 along with Octave; see the file COPYING.  If not, see
00020 <http://www.gnu.org/licenses/>.
00021 
00022 */
00023 
00024 // Adapted from previous version of dlmread.occ as authored by Kai
00025 // Habel, but core code has been completely re-written.
00026 
00027 #ifdef HAVE_CONFIG_H
00028 #include <config.h>
00029 #endif
00030 
00031 #include <cctype>
00032 #include <fstream>
00033 #include <limits>
00034 
00035 #include "file-ops.h"
00036 #include "lo-ieee.h"
00037 
00038 #include "defun-dld.h"
00039 #include "oct-stream.h"
00040 #include "error.h"
00041 #include "oct-obj.h"
00042 #include "utils.h"
00043 
00044 static const octave_idx_type idx_max =  std::numeric_limits<octave_idx_type>::max ();
00045 
00046 static bool
00047 read_cell_spec (std::istream& is, octave_idx_type& row, octave_idx_type& col)
00048 {
00049   bool stat = false;
00050 
00051   if (is.peek () == std::istream::traits_type::eof ())
00052     stat = true;
00053   else
00054     {
00055       if (::isalpha (is.peek ()))
00056         {
00057           col = 0;
00058           while (is && ::isalpha (is.peek ()))
00059             {
00060               char ch = is.get ();
00061               col *= 26;
00062               if (ch >= 'a')
00063                 col += ch - 'a' + 1;
00064               else
00065                 col += ch - 'A' + 1;
00066             }
00067           col --;
00068 
00069           if (is)
00070             {
00071               is >> row;
00072               row --;
00073               if (is)
00074                 stat = true;
00075             }
00076         }
00077     }
00078 
00079   return stat;
00080 }
00081 
00082 static bool
00083 parse_range_spec (const octave_value& range_spec,
00084                   octave_idx_type& rlo, octave_idx_type& clo,
00085                   octave_idx_type& rup, octave_idx_type& cup)
00086 {
00087   bool stat = true;
00088 
00089   if (range_spec.is_string ())
00090     {
00091       std::istringstream is (range_spec.string_value ());
00092       char ch = is.peek ();
00093 
00094       if (ch == '.' || ch == ':')
00095         {
00096           rlo = 0;
00097           clo = 0;
00098           ch = is.get ();
00099           if (ch == '.')
00100             {
00101               ch = is.get ();
00102               if (ch != '.')
00103                 stat = false;
00104             }
00105         }
00106       else
00107         {
00108           stat = read_cell_spec (is, rlo, clo);
00109 
00110           if (stat)
00111             {
00112               ch = is.peek ();
00113 
00114               if (ch == '.' || ch == ':')
00115                 {
00116                   ch = is.get ();
00117                   if (ch == '.')
00118                     {
00119                       ch = is.get ();
00120                       if (!is || ch != '.')
00121                         stat = false;
00122                     }
00123 
00124                   rup = idx_max - 1;
00125                   cup = idx_max - 1;
00126                 }
00127               else
00128                 {
00129                   rup = rlo;
00130                   cup = clo;
00131                   if (!is || !is.eof ())
00132                     stat = false;
00133                 }
00134             }
00135         }
00136 
00137       if (stat && is && !is.eof ())
00138         stat = read_cell_spec (is, rup, cup);
00139 
00140       if (!is || !is.eof ())
00141         stat = false;
00142     }
00143   else if (range_spec.is_real_matrix () && range_spec.numel () == 4)
00144     {
00145       ColumnVector range(range_spec.vector_value ());
00146       // double --> unsigned int
00147       rlo = static_cast<octave_idx_type> (range(0));
00148       clo = static_cast<octave_idx_type> (range(1));
00149       rup = static_cast<octave_idx_type> (range(2));
00150       cup = static_cast<octave_idx_type> (range(3));
00151     }
00152   else
00153     stat = false;
00154 
00155   return stat;
00156 }
00157 
00158 DEFUN_DLD (dlmread, args, ,
00159   "-*- texinfo -*-\n\
00160 @deftypefn  {Loadable Function} {@var{data} =} dlmread (@var{file})\n\
00161 @deftypefnx {Loadable Function} {@var{data} =} dlmread (@var{file}, @var{sep})\n\
00162 @deftypefnx {Loadable Function} {@var{data} =} dlmread (@var{file}, @var{sep}, @var{r0}, @var{c0})\n\
00163 @deftypefnx {Loadable Function} {@var{data} =} dlmread (@var{file}, @var{sep}, @var{range})\n\
00164 @deftypefnx {Loadable Function} {@var{data} =} dlmread (@dots{}, \"emptyvalue\", @var{EMPTYVAL})\n\
00165 Read the matrix @var{data} from a text file.  If not defined the separator\n\
00166 between fields is determined from the file itself.  Otherwise the\n\
00167 separation character is defined by @var{sep}.\n\
00168 \n\
00169 Given two scalar arguments @var{r0} and @var{c0}, these define the starting\n\
00170 row and column of the data to be read.  These values are indexed from zero,\n\
00171 such that the first row corresponds to an index of zero.\n\
00172 \n\
00173 The @var{range} parameter may be a 4-element vector containing the upper\n\
00174 left and lower right corner @code{[@var{R0},@var{C0},@var{R1},@var{C1}]}\n\
00175 where the lowest index value is zero.  Alternatively, a spreadsheet style\n\
00176 range such as 'A2..Q15' or 'T1:AA5' can be used.  The lowest alphabetical\n\
00177 index 'A' refers to the first column.  The lowest row index is 1.\n\
00178 \n\
00179 @var{file} should be a file name or file id given by @code{fopen}.  In the\n\
00180 latter case, the file is read until end of file is reached.\n\
00181 \n\
00182 The \"emptyvalue\" option may be used to specify the value used to fill empty\n\
00183 fields.  The default is zero.\n\
00184 @seealso{csvread, textscan, textread, dlmwrite}\n\
00185 @end deftypefn")
00186 {
00187   octave_value_list retval;
00188 
00189   int nargin = args.length ();
00190 
00191   double empty_value = 0.0;
00192 
00193   if (nargin > 2 && args(nargin-2).is_string ()
00194       && args(nargin-2).string_value () == "emptyvalue")
00195     {
00196       empty_value = args(nargin-1).double_value ();
00197       if (error_state)
00198          return retval;
00199       nargin -= 2;
00200     }
00201 
00202   if (nargin < 1 || nargin > 4)
00203     {
00204       print_usage ();
00205       return retval;
00206     }
00207 
00208   std::istream *input = 0;
00209   std::ifstream input_file;
00210 
00211   if (args(0).is_string ())
00212     {
00213       // File name.
00214       std::string fname (args(0).string_value ());
00215       if (error_state)
00216          return retval;
00217 
00218       std::string tname = file_ops::tilde_expand (fname);
00219 
00220       input_file.open (tname.c_str (), std::ios::in);
00221 
00222       if (! input_file)
00223         {
00224           error ("dlmread: unable to open file '%s'", fname.c_str ());
00225           return retval;
00226         }
00227       else
00228         input = &input_file;
00229     }
00230   else if (args(0).is_scalar_type ())
00231     {
00232       octave_stream is = octave_stream_list::lookup (args(0), "dlmread");
00233 
00234       if (error_state)
00235          return retval;
00236 
00237       input = is.input_stream ();
00238 
00239       if (! input)
00240         {
00241           error ("dlmread: stream FILE not open for input");
00242           return retval;
00243         }
00244     }
00245   else
00246     {
00247       error ("dlmread: FILE argument must be a string or file id");
00248       return retval;
00249     }
00250 
00251   // Set default separator.
00252   std::string sep;
00253   if (nargin > 1)
00254     {
00255       if (args(1).is_sq_string ())
00256         sep = do_string_escapes (args(1).string_value ());
00257       else
00258         sep = args(1).string_value ();
00259 
00260       if (error_state)
00261         return retval;
00262     }
00263 
00264   // Take a subset if a range was given.
00265   octave_idx_type r0 = 0, c0 = 0, r1 = idx_max-1, c1 = idx_max-1;
00266   if (nargin > 2)
00267     {
00268       if (nargin == 3)
00269         {
00270           if (!parse_range_spec (args (2), r0, c0, r1, c1))
00271             error ("dlmread: error parsing RANGE");
00272         }
00273       else if (nargin == 4)
00274         {
00275           r0 = args(2).idx_type_value ();
00276           c0 = args(3).idx_type_value ();
00277 
00278           if (error_state)
00279             return retval;
00280         }
00281 
00282       if (r0 < 0 || c0 < 0)
00283         error ("dlmread: left & top must be positive");
00284     }
00285 
00286   if (!error_state)
00287     {
00288       octave_idx_type i = 0, j = 0, r = 1, c = 1, rmax = 0, cmax = 0;
00289 
00290       Matrix rdata;
00291       ComplexMatrix cdata;
00292 
00293       bool iscmplx = false;
00294       bool sepflag = false;
00295 
00296       std::string line;
00297 
00298       // Skip the r0 leading lines as these might be a header.
00299       for (octave_idx_type m = 0; m < r0; m++)
00300         getline (*input, line);
00301       r1 -= r0;
00302 
00303       std::istringstream tmp_stream;
00304 
00305       // Read in the data one field at a time, growing the data matrix
00306       // as needed.
00307       while (getline (*input, line))
00308         {
00309           // Skip blank lines for compatibility.
00310           if (line.find_first_not_of (" \t") == std::string::npos)
00311             continue;
00312 
00313           // To be compatible with matlab, blank separator should
00314           // correspond to whitespace as delimter.
00315           if (!sep.length ())
00316             {
00317               size_t n = line.find_first_of (",:; \t",
00318                                              line.find_first_of ("0123456789"));
00319               if (n == std::string::npos)
00320                 {
00321                   sep = " \t";
00322                   sepflag = true;
00323                 }
00324               else
00325                 {
00326                   char ch = line.at (n);
00327 
00328                   switch (line.at (n))
00329                     {
00330                     case ' ':
00331                     case '\t':
00332                       sepflag = true;
00333                       sep = " \t";
00334                       break;
00335 
00336                     default:
00337                       sep = ch;
00338                       break;
00339                     }
00340                 }
00341             }
00342 
00343           if (cmax == 0)
00344             {
00345               // Try to estimate the number of columns.  Skip leading
00346               // whitespace.
00347               size_t pos1 = line.find_first_not_of (" \t");
00348               do
00349                 {
00350                   size_t pos2 = line.find_first_of (sep, pos1);
00351 
00352                   if (sepflag && pos2 != std::string::npos)
00353                     // Treat consecutive separators as one.
00354                     {
00355                       pos2 = line.find_first_not_of (sep, pos2);
00356                       if (pos2 != std::string::npos)
00357                         pos2 -= 1;
00358                       else
00359                         pos2 = line.length () - 1;
00360                     }
00361 
00362                   cmax++;
00363 
00364                   if (pos2 != std::string::npos)
00365                     pos1 = pos2 + 1;
00366                   else
00367                     pos1 = std::string::npos;
00368 
00369                 }
00370               while (pos1 != std::string::npos);
00371 
00372               if (iscmplx)
00373                 cdata.resize (rmax, cmax);
00374               else
00375                 rdata.resize (rmax, cmax);
00376             }
00377 
00378           r = (r > i + 1 ? r : i + 1);
00379           j = 0;
00380           // Skip leading whitespace.
00381           size_t pos1 = line.find_first_not_of (" \t");
00382           do
00383             {
00384               octave_quit ();
00385 
00386               size_t pos2 = line.find_first_of (sep, pos1);
00387               std::string str = line.substr (pos1, pos2 - pos1);
00388 
00389               if (sepflag && pos2 != std::string::npos)
00390                 // Treat consecutive separators as one.
00391                 pos2 = line.find_first_not_of (sep, pos2) - 1;
00392 
00393               c = (c > j + 1 ? c : j + 1);
00394               if (r > rmax || c > cmax)
00395                 {
00396                   // Use resize_and_fill for the case of not-equal
00397                   // length rows.
00398                   rmax = 2*r;
00399                   cmax = c;
00400                   if (iscmplx)
00401                     cdata.resize (rmax, cmax);
00402                   else
00403                     rdata.resize (rmax, cmax);
00404                 }
00405 
00406               tmp_stream.str (str);
00407               tmp_stream.clear ();
00408 
00409               double x = octave_read_double (tmp_stream);
00410               if (tmp_stream)
00411                 {
00412                   if (tmp_stream.eof ())
00413                     {
00414                       if (iscmplx)
00415                         cdata(i,j++) = x;
00416                       else
00417                         rdata(i,j++) = x;
00418                     }
00419                   else if (std::toupper (tmp_stream.peek ()) == 'I')
00420                     {
00421                       // This is to allow pure imaginary numbers.
00422                       if (iscmplx)
00423                         cdata(i,j++) = x;
00424                       else
00425                         rdata(i,j++) = x;
00426                     }
00427                   else
00428                     {
00429                       double y = octave_read_double (tmp_stream);
00430 
00431                       if (!iscmplx && y != 0.)
00432                         {
00433                           iscmplx = true;
00434                           cdata = ComplexMatrix (rdata);
00435                         }
00436 
00437                       if (iscmplx)
00438                         cdata(i,j++) = Complex (x, y);
00439                       else
00440                         rdata(i,j++) = x;
00441                     }
00442                 }
00443               else if (iscmplx)
00444                 cdata(i,j++) = empty_value;
00445               else
00446                 rdata(i,j++) = empty_value;
00447 
00448               if (pos2 != std::string::npos)
00449                 pos1 = pos2 + 1;
00450               else
00451                 pos1 = std::string::npos;
00452 
00453             }
00454           while (pos1 != std::string::npos);
00455 
00456           if (i == r1)
00457             break;
00458 
00459           i++;
00460         }
00461 
00462       if (r1 >= r)
00463         r1 = r - 1;
00464       if (c1 >= c)
00465         c1 = c - 1;
00466 
00467       // Now take the subset of the matrix.
00468       if (iscmplx)
00469         cdata = cdata.extract (0, c0, r1, c1);
00470       else
00471         rdata = rdata.extract (0, c0, r1, c1);
00472 
00473       if (iscmplx)
00474         retval(0) = cdata;
00475       else
00476         retval(0) = rdata;
00477     }
00478 
00479   return retval;
00480 }
00481 
00482 /*
00483 
00484 %!shared file
00485 %! file = tmpnam ();
00486 %! fid = fopen (file, "wt");
00487 %! fwrite (fid, "1, 2, 3\n4, 5, 6\n7, 8, 9\n10, 11, 12");
00488 %! fclose (fid);
00489 
00490 %!assert (dlmread (file), [1, 2, 3; 4, 5, 6; 7, 8, 9;10, 11, 12]);
00491 %!assert (dlmread (file, ","), [1, 2, 3; 4, 5, 6; 7, 8, 9; 10, 11, 12]);
00492 %!assert (dlmread (file, ",", [1, 0, 2, 1]), [4, 5; 7, 8]);
00493 %!assert (dlmread (file, ",", "B1..C2"), [2, 3; 5, 6]);
00494 %!assert (dlmread (file, ",", "B1:C2"), [2, 3; 5, 6]);
00495 %!assert (dlmread (file, ",", "..C2"), [1, 2, 3; 4, 5, 6]);
00496 %!assert (dlmread (file, ",", 0, 1), [2, 3; 5, 6; 8, 9; 11, 12]);
00497 %!assert (dlmread (file, ",", "B1.."), [2, 3; 5, 6; 8, 9; 11, 12]);
00498 %!error (dlmread (file, ",", [0 1]))
00499 
00500 %!test
00501 %! unlink (file);
00502 
00503 %!shared file
00504 %! file = tmpnam ();
00505 %! fid = fopen (file, "wt");
00506 %! fwrite (fid, "1, 2, 3\n4+4i, 5, 6\n7, 8, 9\n10, 11, 12");
00507 %! fclose (fid);
00508 
00509 %!assert (dlmread (file), [1, 2, 3; 4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12]);
00510 %!assert (dlmread (file, ","), [1, 2, 3; 4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12]);
00511 %!assert (dlmread (file, ",", [1, 0, 2, 1]), [4 + 4i, 5; 7, 8]);
00512 %!assert (dlmread (file, ",", "A2..B3"), [4 + 4i, 5; 7, 8]);
00513 %!assert (dlmread (file, ",", "A2:B3"), [4 + 4i, 5; 7, 8]);
00514 %!assert (dlmread (file, ",", "..B3"), [1, 2; 4 + 4i, 5; 7, 8]);
00515 %!assert (dlmread (file, ",", 1, 0), [4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12]);
00516 %!assert (dlmread (file, ",", "A2.."), [4 + 4i, 5, 6; 7, 8, 9; 10, 11, 12]);
00517 %!error (dlmread (file, ",", [0 1]))
00518 
00519 %!test
00520 %! unlink (file);
00521 
00522 */
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines