ov-str-mat.cc

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (C) 1996-2012 John W. Eaton
00004 Copyright (C) 2009-2010 VZLU Prague
00005 
00006 This file is part of Octave.
00007 
00008 Octave is free software; you can redistribute it and/or modify it
00009 under the terms of the GNU General Public License as published by the
00010 Free Software Foundation; either version 3 of the License, or (at your
00011 option) any later version.
00012 
00013 Octave is distributed in the hope that it will be useful, but WITHOUT
00014 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00015 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00016 for more details.
00017 
00018 You should have received a copy of the GNU General Public License
00019 along with Octave; see the file COPYING.  If not, see
00020 <http://www.gnu.org/licenses/>.
00021 
00022 */
00023 
00024 #ifdef HAVE_CONFIG_H
00025 #include <config.h>
00026 #endif
00027 
00028 #include <cctype>
00029 
00030 #include <iostream>
00031 #include <vector>
00032 
00033 #include "data-conv.h"
00034 #include "lo-ieee.h"
00035 #include "mach-info.h"
00036 #include "mx-base.h"
00037 #include "oct-locbuf.h"
00038 
00039 #include "byte-swap.h"
00040 #include "defun.h"
00041 #include "gripes.h"
00042 #include "ls-ascii-helper.h"
00043 #include "ls-hdf5.h"
00044 #include "ls-oct-ascii.h"
00045 #include "ls-utils.h"
00046 #include "oct-obj.h"
00047 #include "oct-stream.h"
00048 #include "ops.h"
00049 #include "ov-scalar.h"
00050 #include "ov-re-mat.h"
00051 #include "ov-str-mat.h"
00052 #include "pr-output.h"
00053 #include "pt-mat.h"
00054 #include "utils.h"
00055 
00056 DEFINE_OCTAVE_ALLOCATOR (octave_char_matrix_str);
00057 DEFINE_OCTAVE_ALLOCATOR (octave_char_matrix_sq_str);
00058 
00059 DEFINE_OV_TYPEID_FUNCTIONS_AND_DATA (octave_char_matrix_str, "string", "char");
00060 DEFINE_OV_TYPEID_FUNCTIONS_AND_DATA (octave_char_matrix_sq_str, "sq_string", "char");
00061 
00062 static octave_base_value *
00063 default_numeric_conversion_function (const octave_base_value& a)
00064 {
00065   octave_base_value *retval = 0;
00066 
00067   CAST_CONV_ARG (const octave_char_matrix_str&);
00068 
00069   NDArray nda = v.array_value (true);
00070 
00071   if (! error_state)
00072     {
00073       if (nda.numel () == 1)
00074         retval = new octave_scalar (nda(0));
00075       else
00076         retval = new octave_matrix (nda);
00077     }
00078 
00079   return retval;
00080 }
00081 
00082 octave_base_value::type_conv_info
00083 octave_char_matrix_str::numeric_conversion_function (void) const
00084 {
00085   return octave_base_value::type_conv_info (default_numeric_conversion_function,
00086                                             octave_matrix::static_type_id ());
00087 }
00088 
00089 octave_value
00090 octave_char_matrix_str::do_index_op_internal (const octave_value_list& idx,
00091                                               bool resize_ok, char type)
00092 {
00093   octave_value retval;
00094 
00095   octave_idx_type len = idx.length ();
00096 
00097   switch (len)
00098     {
00099     case 0:
00100       retval = octave_value (matrix, type);
00101       break;
00102 
00103     case 1:
00104       {
00105         idx_vector i = idx (0).index_vector ();
00106 
00107         if (! error_state)
00108           retval = octave_value (charNDArray (matrix.index (i, resize_ok)),
00109                                  type);
00110       }
00111       break;
00112 
00113     case 2:
00114       {
00115         idx_vector i = idx (0).index_vector ();
00116         idx_vector j = idx (1).index_vector ();
00117 
00118         if (! error_state)
00119           retval = octave_value (charNDArray (matrix.index (i, j, resize_ok)),
00120                                  type);
00121       }
00122       break;
00123 
00124     default:
00125       {
00126         Array<idx_vector> idx_vec (dim_vector (len, 1));
00127 
00128         for (octave_idx_type i = 0; i < len; i++)
00129           idx_vec(i) = idx(i).index_vector ();
00130 
00131         if (! error_state)
00132           retval = octave_value (charNDArray (matrix.index (idx_vec, resize_ok)),
00133                                  type);
00134       }
00135       break;
00136     }
00137 
00138   return retval;
00139 }
00140 
00141 octave_value
00142 octave_char_matrix_str::resize (const dim_vector& dv, bool fill) const
00143 {
00144   charNDArray retval (matrix);
00145   if (fill)
00146     retval.resize (dv, charNDArray::resize_fill_value ());
00147   else
00148     retval.resize (dv);
00149   return octave_value (retval, is_sq_string () ? '\'' : '"');
00150 }
00151 
00152 #define CHAR_MATRIX_CONV(T, INIT, TNAME, FCN) \
00153   T retval INIT; \
00154  \
00155   if (! force_string_conv) \
00156     gripe_invalid_conversion ("string", TNAME); \
00157   else \
00158     { \
00159       warning_with_id ("Octave:str-to-num", \
00160                        "implicit conversion from %s to %s", \
00161                        "string", TNAME); \
00162  \
00163       retval = octave_char_matrix::FCN (); \
00164     } \
00165  \
00166   return retval
00167 
00168 double
00169 octave_char_matrix_str::double_value (bool force_string_conv) const
00170 {
00171   CHAR_MATRIX_CONV (double, = 0, "real scalar", double_value);
00172 }
00173 
00174 Complex
00175 octave_char_matrix_str::complex_value (bool force_string_conv) const
00176 {
00177   CHAR_MATRIX_CONV (Complex, = 0, "complex scalar", complex_value);
00178 }
00179 
00180 Matrix
00181 octave_char_matrix_str::matrix_value (bool force_string_conv) const
00182 {
00183   CHAR_MATRIX_CONV (Matrix, , "real matrix", matrix_value);
00184 }
00185 
00186 ComplexMatrix
00187 octave_char_matrix_str::complex_matrix_value (bool force_string_conv) const
00188 {
00189   CHAR_MATRIX_CONV (ComplexMatrix, , "complex matrix", complex_matrix_value);
00190 }
00191 
00192 NDArray
00193 octave_char_matrix_str::array_value (bool force_string_conv) const
00194 {
00195   CHAR_MATRIX_CONV (NDArray, , "real N-d array", array_value);
00196 }
00197 
00198 ComplexNDArray
00199 octave_char_matrix_str::complex_array_value (bool force_string_conv) const
00200 {
00201   CHAR_MATRIX_CONV (ComplexNDArray, , "complex N-d array",
00202                     complex_array_value);
00203 }
00204 
00205 string_vector
00206 octave_char_matrix_str::all_strings (bool) const
00207 {
00208   string_vector retval;
00209 
00210   if (matrix.ndims () == 2)
00211     {
00212       charMatrix chm = matrix.matrix_value ();
00213 
00214       octave_idx_type n = chm.rows ();
00215 
00216       retval.resize (n);
00217 
00218       for (octave_idx_type i = 0; i < n; i++)
00219         retval[i] = chm.row_as_string (i);
00220     }
00221   else
00222     error ("invalid conversion of charNDArray to string_vector");
00223 
00224   return retval;
00225 }
00226 
00227 std::string
00228 octave_char_matrix_str::string_value (bool) const
00229 {
00230   std::string retval;
00231 
00232   if (matrix.ndims () == 2)
00233     {
00234       charMatrix chm = matrix.matrix_value ();
00235 
00236       retval = chm.row_as_string (0);  // FIXME???
00237     }
00238   else
00239     error ("invalid conversion of charNDArray to string");
00240 
00241   return retval;
00242 }
00243 
00244 Array<std::string>
00245 octave_char_matrix_str::cellstr_value (void) const
00246 {
00247   Array<std::string> retval;
00248 
00249   if (matrix.ndims () == 2)
00250     {
00251       const charMatrix chm = matrix.matrix_value ();
00252       octave_idx_type nr = chm.rows ();
00253       retval.clear (nr, 1);
00254       for (octave_idx_type i = 0; i < nr; i++)
00255         retval.xelem(i) = chm.row_as_string (i);
00256     }
00257   else
00258     error ("cellstr: cannot convert multidimensional arrays");
00259 
00260   return retval;
00261 }
00262 
00263 void
00264 octave_char_matrix_str::print_raw (std::ostream& os, bool pr_as_read_syntax) const
00265 {
00266   octave_print_internal (os, matrix, pr_as_read_syntax,
00267                          current_print_indent_level (), true);
00268 }
00269 
00270 bool
00271 octave_char_matrix_str::save_ascii (std::ostream& os)
00272 {
00273   dim_vector d = dims ();
00274   if (d.length () > 2)
00275     {
00276       charNDArray tmp = char_array_value ();
00277       os << "# ndims: " << d.length () << "\n";
00278       for (int i=0; i < d.length (); i++)
00279         os << " " << d (i);
00280       os << "\n";
00281       os.write (tmp.fortran_vec (), d.numel ());
00282       os << "\n";
00283     }
00284   else
00285     {
00286       // Keep this case, rather than use generic code above for
00287       // backward compatiability. Makes load_ascii much more complex!!
00288       charMatrix chm = char_matrix_value ();
00289       octave_idx_type elements = chm.rows ();
00290       os << "# elements: " << elements << "\n";
00291       for (octave_idx_type i = 0; i < elements; i++)
00292         {
00293           unsigned len = chm.cols ();
00294           os << "# length: " << len << "\n";
00295           std::string tstr = chm.row_as_string (i);
00296           const char *tmp = tstr.data ();
00297           if (tstr.length () > len)
00298             panic_impossible ();
00299           os.write (tmp, len);
00300           os << "\n";
00301         }
00302     }
00303 
00304   return true;
00305 }
00306 
00307 bool
00308 octave_char_matrix_str::load_ascii (std::istream& is)
00309 {
00310   bool success = true;
00311 
00312   string_vector keywords(3);
00313 
00314   keywords[0] = "ndims";
00315   keywords[1] = "elements";
00316   keywords[2] = "length";
00317 
00318   std::string kw;
00319   int val = 0;
00320 
00321   if (extract_keyword (is, keywords, kw, val, true))
00322     {
00323       if (kw == "ndims")
00324         {
00325           int mdims = val;
00326 
00327           if (mdims >= 0)
00328             {
00329               dim_vector dv;
00330               dv.resize (mdims);
00331 
00332               for (int i = 0; i < mdims; i++)
00333                 is >> dv(i);
00334 
00335               if (is)
00336                 {
00337                   charNDArray tmp(dv);
00338 
00339                   if (tmp.is_empty ())
00340                     matrix = tmp;
00341                   else
00342                     {
00343                       char *ftmp = tmp.fortran_vec ();
00344 
00345                       skip_preceeding_newline (is);
00346 
00347                       if (! is.read (ftmp, dv.numel ()) || !is)
00348                         {
00349                           error ("load: failed to load string constant");
00350                           success = false;
00351                         }
00352                       else
00353                         matrix = tmp;
00354                     }
00355                 }
00356               else
00357                 {
00358                   error ("load: failed to read dimensions");
00359                   success = false;
00360                 }
00361             }
00362           else
00363             {
00364               error ("load: failed to extract matrix size");
00365               success = false;
00366             }
00367         }
00368       else if (kw == "elements")
00369         {
00370           int elements = val;
00371 
00372           if (elements >= 0)
00373             {
00374               // FIXME -- need to be able to get max length
00375               // before doing anything.
00376 
00377               charMatrix chm (elements, 0);
00378               int max_len = 0;
00379               for (int i = 0; i < elements; i++)
00380                 {
00381                   int len;
00382                   if (extract_keyword (is, "length", len) && len >= 0)
00383                     {
00384                       // Use this instead of a C-style character
00385                       // buffer so that we can properly handle
00386                       // embedded NUL characters.
00387                       charMatrix tmp (1, len);
00388                       char *ptmp = tmp.fortran_vec ();
00389 
00390                       if (len > 0 && ! is.read (ptmp, len))
00391                         {
00392                           error ("load: failed to load string constant");
00393                           success = false;
00394                           break;
00395                         }
00396                       else
00397                         {
00398                           if (len > max_len)
00399                             {
00400                               max_len = len;
00401                               chm.resize (elements, max_len, 0);
00402                             }
00403 
00404                           chm.insert (tmp, i, 0);
00405                         }
00406                     }
00407                   else
00408                     {
00409                       error ("load: failed to extract string length for element %d",
00410                              i+1);
00411                       success = false;
00412                     }
00413                 }
00414 
00415               if (! error_state)
00416                 matrix = chm;
00417             }
00418           else
00419             {
00420               error ("load: failed to extract number of string elements");
00421               success = false;
00422             }
00423         }
00424       else if (kw == "length")
00425         {
00426           int len = val;
00427 
00428           if (len >= 0)
00429             {
00430               // This is cruft for backward compatiability,
00431               // but relatively harmless.
00432 
00433               // Use this instead of a C-style character buffer so
00434               // that we can properly handle embedded NUL characters.
00435               charMatrix tmp (1, len);
00436               char *ptmp = tmp.fortran_vec ();
00437 
00438               if (len > 0 && ! is.read (ptmp, len))
00439                 {
00440                   error ("load: failed to load string constant");
00441                 }
00442               else
00443                 {
00444                   if (is)
00445                     matrix = tmp;
00446                   else
00447                     error ("load: failed to load string constant");
00448                 }
00449             }
00450         }
00451       else
00452         panic_impossible ();
00453     }
00454   else
00455     {
00456       error ("load: failed to extract number of rows and columns");
00457       success = false;
00458     }
00459 
00460   return success;
00461 }
00462 
00463 bool
00464 octave_char_matrix_str::save_binary (std::ostream& os,
00465                                      bool& /* save_as_floats */)
00466 {
00467   dim_vector d = dims ();
00468   if (d.length() < 1)
00469     return false;
00470 
00471   // Use negative value for ndims to differentiate with old format!!
00472   int32_t tmp = - d.length();
00473   os.write (reinterpret_cast<char *> (&tmp), 4);
00474   for (int i=0; i < d.length (); i++)
00475     {
00476       tmp = d(i);
00477       os.write (reinterpret_cast<char *> (&tmp), 4);
00478     }
00479 
00480   charNDArray m = char_array_value ();
00481   os.write (m.fortran_vec (), d.numel ());
00482   return true;
00483 }
00484 
00485 bool
00486 octave_char_matrix_str::load_binary (std::istream& is, bool swap,
00487                                      oct_mach_info::float_format /* fmt */)
00488 {
00489   int32_t elements;
00490   if (! is.read (reinterpret_cast<char *> (&elements), 4))
00491     return false;
00492   if (swap)
00493     swap_bytes<4> (&elements);
00494 
00495   if (elements < 0)
00496     {
00497       int32_t mdims = - elements;
00498       int32_t di;
00499       dim_vector dv;
00500       dv.resize (mdims);
00501 
00502       for (int i = 0; i < mdims; i++)
00503         {
00504           if (! is.read (reinterpret_cast<char *> (&di), 4))
00505             return false;
00506           if (swap)
00507             swap_bytes<4> (&di);
00508           dv(i) = di;
00509         }
00510 
00511       // Convert an array with a single dimension to be a row vector.
00512       // Octave should never write files like this, other software
00513       // might.
00514 
00515       if (mdims == 1)
00516         {
00517           mdims = 2;
00518           dv.resize (mdims);
00519           dv(1) = dv(0);
00520           dv(0) = 1;
00521         }
00522 
00523       charNDArray m(dv);
00524       char *tmp = m.fortran_vec ();
00525       is.read (tmp, dv.numel ());
00526 
00527       if (error_state || ! is)
00528         return false;
00529       matrix = m;
00530     }
00531   else
00532     {
00533       charMatrix chm (elements, 0);
00534       int max_len = 0;
00535       for (int i = 0; i < elements; i++)
00536         {
00537           int32_t len;
00538           if (! is.read (reinterpret_cast<char *> (&len), 4))
00539             return false;
00540           if (swap)
00541             swap_bytes<4> (&len);
00542           charMatrix btmp (1, len);
00543           char *pbtmp = btmp.fortran_vec ();
00544           if (! is.read (pbtmp, len))
00545             return false;
00546           if (len > max_len)
00547             {
00548               max_len = len;
00549               chm.resize (elements, max_len, 0);
00550             }
00551           chm.insert (btmp, i, 0);
00552         }
00553       matrix = chm;
00554     }
00555   return true;
00556 }
00557 
00558 #if defined (HAVE_HDF5)
00559 
00560 bool
00561 octave_char_matrix_str::save_hdf5 (hid_t loc_id, const char *name,
00562                                    bool /* save_as_floats */)
00563 {
00564   dim_vector dv = dims ();
00565   int empty = save_hdf5_empty (loc_id, name, dv);
00566   if (empty)
00567     return (empty > 0);
00568 
00569   int rank = dv.length ();
00570   hid_t space_hid = -1, data_hid = -1;
00571   bool retval = true;
00572   charNDArray m = char_array_value ();
00573 
00574   OCTAVE_LOCAL_BUFFER (hsize_t, hdims, rank);
00575 
00576   // Octave uses column-major, while HDF5 uses row-major ordering
00577   for (int i = 0; i < rank; i++)
00578     hdims[i] = dv (rank-i-1);
00579 
00580   space_hid = H5Screate_simple (rank, hdims, 0);
00581   if (space_hid < 0)
00582     return false;
00583 #if HAVE_HDF5_18
00584   data_hid = H5Dcreate (loc_id, name, H5T_NATIVE_CHAR, space_hid,
00585                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
00586 #else
00587   data_hid = H5Dcreate (loc_id, name, H5T_NATIVE_CHAR, space_hid,
00588                         H5P_DEFAULT);
00589 #endif
00590   if (data_hid < 0)
00591     {
00592       H5Sclose (space_hid);
00593       return false;
00594     }
00595 
00596   OCTAVE_LOCAL_BUFFER (char, s, dv.numel ());
00597 
00598   for (int i = 0; i < dv.numel (); ++i)
00599     s[i] = m(i);
00600 
00601   retval = H5Dwrite (data_hid, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL,
00602                      H5P_DEFAULT, s) >= 0;
00603 
00604   H5Dclose (data_hid);
00605   H5Sclose (space_hid);
00606 
00607   return retval;
00608 }
00609 
00610 bool
00611 octave_char_matrix_str::load_hdf5 (hid_t loc_id, const char *name)
00612 {
00613   bool retval = false;
00614 
00615   dim_vector dv;
00616   int empty = load_hdf5_empty (loc_id, name, dv);
00617   if (empty > 0)
00618     matrix.resize(dv);
00619   if (empty)
00620     return (empty > 0);
00621 
00622 #if HAVE_HDF5_18
00623   hid_t data_hid = H5Dopen (loc_id, name, H5P_DEFAULT);
00624 #else
00625   hid_t data_hid = H5Dopen (loc_id, name);
00626 #endif
00627   hid_t space_hid = H5Dget_space (data_hid);
00628   hsize_t rank = H5Sget_simple_extent_ndims (space_hid);
00629   hid_t type_hid = H5Dget_type (data_hid);
00630   hid_t type_class_hid = H5Tget_class (type_hid);
00631 
00632   if (type_class_hid == H5T_INTEGER)
00633     {
00634       if (rank < 1)
00635         {
00636           H5Tclose (type_hid);
00637           H5Sclose (space_hid);
00638           H5Dclose (data_hid);
00639           return false;
00640         }
00641 
00642       OCTAVE_LOCAL_BUFFER (hsize_t, hdims, rank);
00643       OCTAVE_LOCAL_BUFFER (hsize_t, maxdims, rank);
00644 
00645       H5Sget_simple_extent_dims (space_hid, hdims, maxdims);
00646 
00647       // Octave uses column-major, while HDF5 uses row-major ordering
00648       if (rank == 1)
00649         {
00650           dv.resize (2);
00651           dv(0) = 1;
00652           dv(1) = hdims[0];
00653         }
00654       else
00655         {
00656           dv.resize (rank);
00657           for (hsize_t i = 0, j = rank - 1; i < rank; i++, j--)
00658             dv(j) = hdims[i];
00659         }
00660 
00661       charNDArray m (dv);
00662       char *str = m.fortran_vec ();
00663       if (H5Dread (data_hid, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL,
00664                    H5P_DEFAULT, str) >= 0)
00665         {
00666           retval = true;
00667           matrix = m;
00668         }
00669 
00670       H5Tclose (type_hid);
00671       H5Sclose (space_hid);
00672       H5Dclose (data_hid);
00673       return true;
00674     }
00675   else
00676     {
00677       // This is cruft for backward compatiability and easy data
00678       // importation
00679       if (rank == 0)
00680         {
00681           // a single string:
00682           int slen = H5Tget_size (type_hid);
00683           if (slen < 0)
00684             {
00685               H5Tclose (type_hid);
00686               H5Sclose (space_hid);
00687               H5Dclose (data_hid);
00688               return false;
00689             }
00690           else
00691             {
00692               OCTAVE_LOCAL_BUFFER (char, s, slen);
00693               // create datatype for (null-terminated) string
00694               // to read into:
00695               hid_t st_id = H5Tcopy (H5T_C_S1);
00696               H5Tset_size (st_id, slen);
00697               if (H5Dread (data_hid, st_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, s) < 0)
00698                 {
00699                   H5Tclose (st_id);
00700                   H5Tclose (type_hid);
00701                   H5Sclose (space_hid);
00702                   H5Dclose (data_hid);
00703                   return false;
00704                 }
00705 
00706               matrix = charMatrix (s);
00707 
00708               H5Tclose (st_id);
00709               H5Tclose (type_hid);
00710               H5Sclose (space_hid);
00711               H5Dclose (data_hid);
00712               return true;
00713             }
00714         }
00715       else if (rank == 1)
00716         {
00717           // string vector
00718           hsize_t elements, maxdim;
00719           H5Sget_simple_extent_dims (space_hid, &elements, &maxdim);
00720           int slen = H5Tget_size (type_hid);
00721           if (slen < 0)
00722             {
00723               H5Tclose (type_hid);
00724               H5Sclose (space_hid);
00725               H5Dclose (data_hid);
00726               return false;
00727             }
00728           else
00729             {
00730               // hdf5 string arrays store strings of all the
00731               // same physical length (I think), which is
00732               // slightly wasteful, but oh well.
00733 
00734               OCTAVE_LOCAL_BUFFER (char, s, elements * slen);
00735 
00736               // create datatype for (null-terminated) string
00737               // to read into:
00738               hid_t st_id = H5Tcopy (H5T_C_S1);
00739               H5Tset_size (st_id, slen);
00740 
00741               if (H5Dread (data_hid, st_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, s) < 0)
00742                 {
00743                   H5Tclose (st_id);
00744                   H5Tclose (type_hid);
00745                   H5Sclose (space_hid);
00746                   H5Dclose (data_hid);
00747                   return false;
00748                 }
00749 
00750               charMatrix chm (elements, slen - 1);
00751               for (hsize_t i = 0; i < elements; ++i)
00752                 {
00753                   chm.insert (s + i*slen, i, 0);
00754                 }
00755 
00756               matrix = chm;
00757 
00758               H5Tclose (st_id);
00759               H5Tclose (type_hid);
00760               H5Sclose (space_hid);
00761               H5Dclose (data_hid);
00762               return true;
00763             }
00764         }
00765       else
00766         {
00767           H5Tclose (type_hid);
00768           H5Sclose (space_hid);
00769           H5Dclose (data_hid);
00770           return false;
00771         }
00772     }
00773 
00774   return retval;
00775 }
00776 
00777 #endif
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines