GNU Octave  6.2.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-string.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2016-2021 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 # include "config.h"
28 #endif
29 
30 #include "oct-string.h"
31 
32 #include <algorithm>
33 #include <cctype>
34 #include <cstring>
35 #include <iomanip>
36 #include <string>
37 
38 #include "Array.h"
39 #include "lo-ieee.h"
40 #include "lo-mappers.h"
41 #include "uniconv-wrappers.h"
42 #include "unistr-wrappers.h"
43 #include "unwind-prot.h"
44 
45 template <typename T>
46 static bool
47 str_data_cmp (const typename T::value_type *a, const typename T::value_type *b,
48  const typename T::size_type n)
49 {
50  for (typename T::size_type i = 0; i < n; ++i)
51  if (a[i] != b[i])
52  return false;
53  return true;
54 }
55 
56 template <typename T>
57 static bool
58 str_data_cmpi (const typename T::value_type *a, const typename T::value_type *b,
59  const typename T::size_type n)
60 {
61  for (typename T::size_type i = 0; i < n; ++i)
62  if (std::tolower (a[i]) != std::tolower (b[i]))
63  return false;
64  return true;
65 }
66 
67 
68 // Templates to handle std::basic_string, std::vector, Array, and char*.
69 template <typename T>
70 typename T::size_type
71 numel (const T& str)
72 {
73  return str.size ();
74 }
75 
76 template <>
78 numel (const Array<char>& str)
79 {
80  return str.numel ();
81 }
82 
83 template <typename T>
84 typename T::size_type
85 strlen (const typename T::value_type *str)
86 {
87  return std::strlen (str);
88 }
89 
90 template <typename T>
91 bool
92 sizes_cmp (const T& str_a, const T& str_b)
93 {
94  return str_a.size () == str_b.size ();
95 }
96 
97 template <>
98 bool
99 sizes_cmp (const Array<char>& str_a, const Array<char>& str_b)
100 {
101  return str_a.dims () == str_b.dims ();
102 }
103 
104 template <typename T>
105 bool
106 sizes_cmp (const T& str_a, const typename T::value_type *str_b)
107 {
108  return str_a.size () == strlen<T> (str_b);
109 }
110 
111 template <>
112 bool
113 sizes_cmp (const Array<char>& str_a, const char *str_b)
114 {
115  return (str_a.isvector () && str_a.rows () == 1
116  && str_a.numel () == strlen<Array<char>> (str_b));
117 }
118 
119 
120 template<typename T>
121 bool
122 octave::string::strcmp (const T& str_a, const T& str_b)
123 {
124  return (sizes_cmp (str_a, str_b)
125  && str_data_cmp<T> (str_a.data (), str_b.data (), numel (str_a)));
126 }
127 
128 template<typename T>
129 bool
130 octave::string::strcmp (const T& str_a, const typename T::value_type *str_b)
131 {
132  return (sizes_cmp (str_a, str_b)
133  && str_data_cmp<T> (str_a.data (), str_b, numel (str_a)));
134 }
135 
136 
137 template<typename T>
138 bool
139 octave::string::strcmpi (const T& str_a, const T& str_b)
140 {
141  return (sizes_cmp (str_a, str_b)
142  && str_data_cmpi<T> (str_a.data (), str_b.data (), numel (str_a)));
143 }
144 
145 template<typename T>
146 bool
147 octave::string::strcmpi (const T& str_a, const typename T::value_type *str_b)
148 {
149  return (sizes_cmp (str_a, str_b)
150  && str_data_cmpi<T> (str_a.data (), str_b, numel (str_a)));
151 }
152 
153 
154 template<typename T>
155 bool
156 octave::string::strncmp (const T& str_a, const T& str_b,
157  const typename T::size_type n)
158 {
159  typename T::size_type neff;
160  auto len_a = numel (str_a);
161  auto len_b = numel (str_b);
162  neff = std::min (std::max (len_a, len_b), n);
163 
164  return (len_a >= neff && len_b >= neff
165  && str_data_cmp<T> (str_a.data (), str_b.data (), neff));
166 }
167 
168 template<typename T>
169 bool
170 octave::string::strncmp (const T& str_a, const typename T::value_type *str_b,
171  const typename T::size_type n)
172 {
173  typename T::size_type neff;
174  auto len_a = numel (str_a);
175  auto len_b = strlen<T> (str_b);
176  neff = std::min (std::max (len_a, len_b), n);
177 
178  return (len_a >= neff && len_b >= neff
179  && str_data_cmp<T> (str_a.data (), str_b, neff));
180 }
181 
182 
183 template<typename T>
184 bool
185 octave::string::strncmpi (const T& str_a, const T& str_b,
186  const typename T::size_type n)
187 {
188  typename T::size_type neff;
189  auto len_a = numel (str_a);
190  auto len_b = numel (str_b);
191  neff = std::min (std::max (len_a, len_b), n);
192 
193  return (len_a >= neff && len_b >= neff
194  && str_data_cmpi<T> (str_a.data (), str_b.data (), neff));
195 }
196 
197 template<typename T>
198 bool
199 octave::string::strncmpi (const T& str_a, const typename T::value_type *str_b,
200  const typename T::size_type n)
201 {
202  typename T::size_type neff;
203  auto len_a = numel (str_a);
204  auto len_b = strlen<T> (str_b);
205  neff = std::min (std::max (len_a, len_b), n);
206 
207  return (len_a >= neff && len_b >= neff
208  && str_data_cmpi<T> (str_a.data (), str_b, neff));
209 }
210 
211 
212 // Instantiations we need
213 #define INSTANTIATE_OCTAVE_STRING(T) \
214  template bool octave::string::strcmp<T> (const T&, const T&); \
215  template bool octave::string::strcmp<T> (const T&, \
216  const typename T::value_type*); \
217  template bool octave::string::strcmpi<T> (const T&, const T&); \
218  template bool octave::string::strcmpi<T> (const T&, \
219  const typename T::value_type*); \
220  template bool octave::string::strncmp<T> (const T&, const T&, \
221  const typename T::size_type); \
222  template bool octave::string::strncmp<T> (const T&, \
223  const typename T::value_type*, \
224  const typename T::size_type); \
225  template bool octave::string::strncmpi<T> (const T&, const T&, \
226  const typename T::size_type n); \
227  template bool octave::string::strncmpi<T> (const T&, \
228  const typename T::value_type*, \
229  const typename T::size_type);
230 
231 // We could also instantiate std::vector<char> but would it be
232 // useful for anyone?
235 
236 #undef INSTANTIATE_OCTAVE_STRING
237 
238 static inline bool
240 { return c == 'i' || c == 'j'; }
241 
242 static double
243 single_num (std::istringstream& is)
244 {
245  double num = 0.0;
246 
247  char c = is.peek ();
248 
249  // Skip spaces.
250  while (isspace (c))
251  {
252  is.get ();
253  c = is.peek ();
254  }
255 
256  if (std::toupper (c) == 'I')
257  {
258  // It's infinity.
259  is.get ();
260  char c1 = is.get ();
261  char c2 = is.get ();
262  if (std::tolower (c1) == 'n' && std::tolower (c2) == 'f')
263  {
265  is.peek (); // May set EOF bit.
266  }
267  else
268  is.setstate (std::ios::failbit); // indicate that read has failed.
269  }
270  else if (c == 'N')
271  {
272  // It's NA or NaN
273  is.get ();
274  char c1 = is.get ();
275  if (c1 == 'A')
276  {
277  num = octave_NA;
278  is.peek (); // May set EOF bit.
279  }
280  else
281  {
282  char c2 = is.get ();
283  if (c1 == 'a' && c2 == 'N')
284  {
286  is.peek (); // May set EOF bit.
287  }
288  else
289  is.setstate (std::ios::failbit); // indicate that read has failed.
290  }
291  }
292  else
293  is >> num;
294 
295  return num;
296 }
297 
298 static std::istringstream&
299 extract_num (std::istringstream& is, double& num, bool& imag, bool& have_sign)
300 {
301  have_sign = imag = false;
302 
303  char c = is.peek ();
304 
305  // Skip leading spaces.
306  while (isspace (c))
307  {
308  is.get ();
309  c = is.peek ();
310  }
311 
312  bool negative = false;
313 
314  // Accept leading sign.
315  if (c == '+' || c == '-')
316  {
317  have_sign = true;
318  negative = c == '-';
319  is.get ();
320  c = is.peek ();
321  }
322 
323  // Skip spaces after sign.
324  while (isspace (c))
325  {
326  is.get ();
327  c = is.peek ();
328  }
329 
330  // Imaginary number (i*num or just i), or maybe 'inf'.
331  if (c == 'i')
332  {
333  // possible infinity.
334  is.get ();
335  c = is.peek ();
336 
337  if (is.eof ())
338  {
339  // just 'i' and string is finished. Return immediately.
340  imag = true;
341  num = (negative ? -1.0 : 1.0);
342  return is;
343  }
344  else
345  {
346  if (std::tolower (c) != 'n')
347  imag = true;
348  is.unget ();
349  }
350  }
351  else if (c == 'j')
352  imag = true;
353 
354  // It's i*num or just i
355  if (imag)
356  {
357  is.get ();
358  c = is.peek ();
359  // Skip spaces after imaginary unit.
360  while (isspace (c))
361  {
362  is.get ();
363  c = is.peek ();
364  }
365 
366  if (c == '*')
367  {
368  // Multiplier follows, we extract it as a number.
369  is.get ();
370  num = single_num (is);
371  if (is.good ())
372  c = is.peek ();
373  }
374  else
375  num = 1.0;
376  }
377  else
378  {
379  // It's num, num*i, or numi.
380  num = single_num (is);
381  if (is.good ())
382  {
383  c = is.peek ();
384 
385  // Skip spaces after number.
386  while (isspace (c))
387  {
388  is.get ();
389  c = is.peek ();
390  }
391 
392  if (c == '*')
393  {
394  is.get ();
395  c = is.peek ();
396 
397  // Skip spaces after operator.
398  while (isspace (c))
399  {
400  is.get ();
401  c = is.peek ();
402  }
403 
404  if (is_imag_unit (c))
405  {
406  imag = true;
407  is.get ();
408  c = is.peek ();
409  }
410  else
411  is.setstate (std::ios::failbit); // indicate read has failed.
412  }
413  else if (is_imag_unit (c))
414  {
415  imag = true;
416  is.get ();
417  c = is.peek ();
418  }
419  }
420  }
421 
422  if (is.good ())
423  {
424  // Skip trailing spaces.
425  while (isspace (c))
426  {
427  is.get ();
428  c = is.peek ();
429  }
430  }
431 
432  if (negative)
433  num = -num;
434 
435  return is;
436 }
437 
438 static inline void
439 set_component (Complex& c, double num, bool imag)
440 {
441 #if defined (HAVE_CXX_COMPLEX_SETTERS)
442  if (imag)
443  c.imag (num);
444  else
445  c.real (num);
446 #elif defined (HAVE_CXX_COMPLEX_REFERENCE_ACCESSORS)
447  if (imag)
448  c.imag () = num;
449  else
450  c.real () = num;
451 #else
452  if (imag)
453  c = Complex (c.real (), num);
454  else
455  c = Complex (num, c.imag ());
456 #endif
457 }
458 
459 Complex
460 octave::string::str2double (const std::string& str_arg)
461 {
462  Complex val (0.0, 0.0);
463 
464  std::string str = str_arg;
465 
466  // FIXME: removing all commas doesn't allow actual parsing.
467  // Example: "1,23.45" is wrong, but passes Octave.
468  str.erase (std::remove (str.begin (), str.end(), ','), str.end ());
469  std::istringstream is (str);
470 
471  double num;
472  bool i1, i2, s1, s2;
473 
474  if (is.eof ())
476  else if (! extract_num (is, num, i1, s1))
478  else
479  {
480  set_component (val, num, i1);
481 
482  if (! is.eof ())
483  {
484  if (! extract_num (is, num, i2, s2) || i1 == i2 || ! s2)
486  else
487  set_component (val, num, i2);
488  }
489  }
490 
491  return val;
492 }
493 
494 std::string
495 octave::string::u8_to_encoding (const std::string& who,
496  const std::string& u8_string,
497  const std::string& encoding)
498 {
499  const uint8_t *src = reinterpret_cast<const uint8_t *>
500  (u8_string.c_str ());
501  size_t srclen = u8_string.length ();
502 
503  size_t length;
504  char *native_str = octave_u8_conv_to_encoding (encoding.c_str (), src,
505  srclen, &length);
506 
507  if (! native_str)
508  {
509  if (errno == ENOSYS)
510  (*current_liboctave_error_handler)
511  ("%s: iconv() is not supported. Installing GNU libiconv and then "
512  "re-compiling Octave could fix this.", who.c_str ());
513  else
515  ("%s: converting from UTF-8 to codepage '%s' failed: %s",
516  who.c_str (), encoding.c_str (), std::strerror (errno));
517  }
518 
520  frame.add_fcn (::free, static_cast<void *> (native_str));
521 
522  std::string retval = std::string (native_str, length);
523 
524  return retval;
525 }
526 
527 std::string
528 octave::string::u8_from_encoding (const std::string& who,
529  const std::string& native_string,
530  const std::string& encoding)
531 {
532  const char *src = native_string.c_str ();
533  size_t srclen = native_string.length ();
534 
535  size_t length;
536  uint8_t *utf8_str = octave_u8_conv_from_encoding (encoding.c_str (), src,
537  srclen, &length);
538  if (! utf8_str)
539  {
540  if (errno == ENOSYS)
541  (*current_liboctave_error_handler)
542  ("%s: iconv() is not supported. Installing GNU libiconv and then "
543  "re-compiling Octave could fix this.", who.c_str ());
544  else
546  ("%s: converting from codepage '%s' to UTF-8 failed: %s",
547  who.c_str (), encoding.c_str (), std::strerror (errno));
548  }
549 
551  frame.add_fcn (::free, static_cast<void *> (utf8_str));
552 
553  std::string retval = std::string (reinterpret_cast<char *> (utf8_str), length);
554 
555  return retval;
556 }
557 
558 unsigned int
559 octave::string::u8_validate (const std::string& who,
560  std::string& in_str,
562 {
563  std::string out_str;
564 
565  unsigned int num_replacements = 0;
566  const char *in_chr = in_str.c_str ();
567  const char *inv_utf8 = in_chr;
568  const char * const in_end = in_chr + in_str.length ();
569  while (inv_utf8 && in_chr < in_end)
570  {
571  inv_utf8 = reinterpret_cast<const char *>
572  (octave_u8_check_wrapper (reinterpret_cast<const uint8_t *> (in_chr),
573  in_end - in_chr));
574 
575  if (inv_utf8 == nullptr)
576  out_str.append (in_chr, in_end - in_chr);
577  else
578  {
579  num_replacements++;
580  out_str.append (in_chr, inv_utf8 - in_chr);
581  in_chr = inv_utf8 + 1;
582 
583  if (type == U8_REPLACEMENT_CHAR)
584  out_str.append ("\xef\xbf\xbd");
585  else if (type == U8_ISO_8859_1)
586  {
587  std::string fallback = "iso-8859-1";
588  size_t lengthp;
589  uint8_t *val_utf8 = octave_u8_conv_from_encoding
590  (fallback.c_str (), inv_utf8, 1, &lengthp);
591 
592  if (! val_utf8)
593  (*current_liboctave_error_handler)
594  ("%s: converting from codepage '%s' to UTF-8 failed: %s",
595  who.c_str (), fallback.c_str (), std::strerror (errno));
596 
598  frame.add_fcn (::free, static_cast<void *> (val_utf8));
599 
600  out_str.append (reinterpret_cast<const char *> (val_utf8),
601  lengthp);
602  }
603  }
604  }
605 
606  in_str = out_str;
607  return num_replacements;
608 }
609 
610 template <typename T>
611 std::string
612 rational_approx (T val, int len)
613 {
614  std::string s;
615 
616  if (len <= 0)
617  len = 10;
618 
619  if (octave::math::isinf (val))
620  {
621  if (val > 0)
622  s = "1/0";
623  else
624  s = "-1/0";
625  }
626  else if (octave::math::isnan (val))
627  s = "0/0";
628  else if (val < std::numeric_limits<int>::min ()
630  || octave::math::x_nint (val) == val)
631  {
632  std::ostringstream buf;
633  buf.flags (std::ios::fixed);
634  buf << std::setprecision (0) << octave::math::round (val);
635  s = buf.str ();
636  }
637  else
638  {
639  T lastn = 1;
640  T lastd = 0;
641  T n = octave::math::round (val);
642  T d = 1;
643  T frac = val - n;
644  int m = 0;
645 
646  std::ostringstream init_buf;
647  init_buf.flags (std::ios::fixed);
648  init_buf << std::setprecision (0) << static_cast<int> (n);
649  s = init_buf.str ();
650 
651  while (true)
652  {
653  T flip = 1 / frac;
654  T step = octave::math::round (flip);
655  T nextn = n;
656  T nextd = d;
657 
658  // Have we converged to 1/intmax ?
659  if (std::abs (flip) > static_cast<T> (std::numeric_limits<int>::max ()))
660  {
661  lastn = n;
662  lastd = d;
663  break;
664  }
665 
666  frac = flip - step;
667  n = step * n + lastn;
668  d = step * d + lastd;
669  lastn = nextn;
670  lastd = nextd;
671 
672  std::ostringstream buf;
673  buf.flags (std::ios::fixed);
674  buf << std::setprecision (0) << static_cast<int> (n)
675  << '/' << static_cast<int> (d);
676  m++;
677 
678  if (n < 0 && d < 0)
679  {
680  // Double negative, string can be two characters longer.
681  if (buf.str ().length () > static_cast<unsigned int> (len + 2))
682  break;
683  }
684  else
685  {
686  if (buf.str ().length () > static_cast<unsigned int> (len))
687  break;
688  }
689 
692  break;
693 
694  s = buf.str ();
695  }
696 
697  if (lastd < 0)
698  {
699  // Move negative sign from denominator to numerator
700  lastd = - lastd;
701  lastn = - lastn;
702  std::ostringstream buf;
703  buf.flags (std::ios::fixed);
704  buf << std::setprecision (0) << static_cast<int> (lastn)
705  << '/' << static_cast<int> (lastd);
706  s = buf.str ();
707  }
708  }
709 
710  return s;
711 }
712 
713 // instantiate the template for float and double
714 template std::string rational_approx <float> (float val, int len);
715 template std::string rational_approx <double> (double val, int len);
#define Inf
Definition: Faddeeva.cc:247
#define NaN
Definition: Faddeeva.cc:248
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:230
charNDArray min(char d, const charNDArray &m)
Definition: chNDArray.cc:207
octave_idx_type numel(void) const
Number of elements in the array.
Definition: Array.h:377
bool isvector(void) const
Size of the specified dimension.
Definition: Array.h:574
octave_idx_type rows(void) const
Definition: Array.h:415
const dim_vector & dims(void) const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:453
void add_fcn(void(*fcn)(Params...), Args &&... args)
ColumnVector imag(const ComplexColumnVector &a)
Definition: dColVector.cc:143
OCTAVE_NORETURN liboctave_error_handler current_liboctave_error_handler
Definition: lo-error.c:41
#define octave_NA
Definition: lo-ieee.h:41
F77_RET_T const F77_DBLE const F77_DBLE F77_DBLE * d
T octave_idx_type m
Definition: mx-inlines.cc:773
octave_idx_type n
Definition: mx-inlines.cc:753
T x_nint(T x)
Definition: lo-mappers.h:262
bool isnan(bool)
Definition: lo-mappers.h:178
bool isinf(double x)
Definition: lo-mappers.h:203
double round(double x)
Definition: lo-mappers.h:136
bool strncmp(const T &str_a, const T &str_b, const typename T::size_type n)
True if the first N characters are the same.
Definition: oct-string.cc:156
bool strncmpi(const T &str_a, const T &str_b, const typename T::size_type n)
True if the first N characters are the same, ignoring case.
Definition: oct-string.cc:185
OCTAVE_API std::string u8_to_encoding(const std::string &who, const std::string &u8_string, const std::string &encoding)
Definition: oct-string.cc:495
bool strcmp(const T &str_a, const T &str_b)
True if strings are the same.
Definition: oct-string.cc:122
bool strcmpi(const T &str_a, const T &str_b)
True if strings are the same, ignoring case.
Definition: oct-string.cc:139
OCTAVE_API std::string u8_from_encoding(const std::string &who, const std::string &native_string, const std::string &encoding)
Definition: oct-string.cc:528
OCTAVE_API Complex str2double(const std::string &str_arg)
Definition: oct-string.cc:460
OCTAVE_API unsigned int u8_validate(const std::string &who, std::string &in_string, const u8_fallback_type type=U8_REPLACEMENT_CHAR)
Definition: oct-string.cc:559
std::complex< double > Complex
Definition: oct-cmplx.h:33
template std::string rational_approx< double >(double val, int len)
template std::string rational_approx< float >(float val, int len)
static void set_component(Complex &c, double num, bool imag)
Definition: oct-string.cc:439
static double single_num(std::istringstream &is)
Definition: oct-string.cc:243
static bool str_data_cmp(const typename T::value_type *a, const typename T::value_type *b, const typename T::size_type n)
Definition: oct-string.cc:47
static bool is_imag_unit(int c)
Definition: oct-string.cc:239
static std::istringstream & extract_num(std::istringstream &is, double &num, bool &imag, bool &have_sign)
Definition: oct-string.cc:299
static bool str_data_cmpi(const typename T::value_type *a, const typename T::value_type *b, const typename T::size_type n)
Definition: oct-string.cc:58
#define INSTANTIATE_OCTAVE_STRING(T)
Definition: oct-string.cc:213
T::size_type numel(const T &str)
Definition: oct-string.cc:71
bool sizes_cmp(const T &str_a, const T &str_b)
Definition: oct-string.cc:92
T::size_type strlen(const typename T::value_type *str)
Definition: oct-string.cc:85
std::string rational_approx(T val, int len)
Definition: oct-string.cc:612
void free(void *)
octave_value::octave_value(const Array< char > &chm, char type) return retval
Definition: ov.cc:811
static T abs(T x)
Definition: pr-output.cc:1678
uint8_t * octave_u8_conv_from_encoding(const char *fromcode, const char *src, size_t srclen, size_t *lengthp)
char * octave_u8_conv_to_encoding(const char *tocode, const uint8_t *src, size_t srclen, size_t *lengthp)
const uint8_t * octave_u8_check_wrapper(const uint8_t *src, size_t n)
F77_RET_T len
Definition: xerbla.cc:61