GNU Octave  4.4.1
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
__ichol__.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2014-2018 Eduardo Ramos Fernández <eduradical951@gmail.com>
4 Copyright (C) 2013-2018 Kai T. Ohlhus <k.ohlhus@gmail.com>
5 
6 This file is part of Octave.
7 
8 Octave is free software: you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <https://www.gnu.org/licenses/>.
21 
22 */
23 
24 #if defined (HAVE_CONFIG_H)
25 # include "config.h"
26 #endif
27 
28 #include "oct-locbuf.h"
29 #include "oct-norm.h"
30 
31 #include "defun.h"
32 #include "error.h"
33 
34 #include "builtin-defun-decls.h"
35 
36 // Secondary functions for complex and real case used in ichol algorithms.
38 {
39 #if defined (HAVE_CXX_COMPLEX_SETTERS)
40  b.imag (-b.imag ());
41 #elif defined (HAVE_CXX_COMPLEX_REFERENCE_ACCESSORS)
42  b.imag () = -b.imag ();
43 #else
44  b = b.conj ();
45 #endif
46  return a * b;
47 }
48 
49 double ichol_mult_real (double a, double b)
50 {
51  return a * b;
52 }
53 
55 {
56  if (pivot.imag () != 0)
57  error ("ichol: non-real pivot encountered. The matrix must be Hermitian.");
58  else if (pivot.real () < 0)
59  error ("ichol: negative pivot encountered");
60 
61  return true;
62 }
63 
64 bool ichol_checkpivot_real (double pivot)
65 {
66  if (pivot < 0)
67  error ("ichol: negative pivot encountered");
68 
69  return true;
70 }
71 
72 template <typename octave_matrix_t, typename T, T (*ichol_mult) (T, T),
73  bool (*ichol_checkpivot) (T)>
74 void ichol_0 (octave_matrix_t& sm, const std::string michol = "off")
75 {
76  const octave_idx_type n = sm.cols ();
77  octave_idx_type j1, jend, j2, jrow, jjrow, j, jw, i, k, jj, r;
78  T tl;
79 
80  char opt;
81  enum {OFF, ON};
82  if (michol == "on")
83  opt = ON;
84  else
85  opt = OFF;
86 
87  // Input matrix pointers
88  octave_idx_type *cidx = sm.cidx ();
89  octave_idx_type *ridx = sm.ridx ();
90  T* data = sm.data ();
91 
92  // Working arrays
96  OCTAVE_LOCAL_BUFFER (T, dropsums, n);
97 
98  // Initialize working arrays
99  for (i = 0; i < n; i++)
100  {
101  iw[i] = -1;
102  Llist[i] = -1;
103  Lfirst[i] = -1;
104  dropsums[i] = 0;
105  }
106 
107  // Loop over all columns
108  for (k = 0; k < n; k++)
109  {
110  j1 = cidx[k];
111  j2 = cidx[k+1];
112  for (j = j1; j < j2; j++)
113  iw[ridx[j]] = j;
114 
115  jrow = Llist[k];
116  // Iterate over each non-zero element in the actual row.
117  while (jrow != -1)
118  {
119  jjrow = Lfirst[jrow];
120  jend = cidx[jrow+1];
121  for (jj = jjrow; jj < jend; jj++)
122  {
123  r = ridx[jj];
124  jw = iw[r];
125  tl = ichol_mult (data[jj], data[jjrow]);
126  if (jw != -1)
127  data[jw] -= tl;
128  else
129  // Because of the symmetry of the matrix, we know
130  // the drops in the column r are also in the column k.
131  if (opt == ON)
132  {
133  dropsums[r] -= tl;
134  dropsums[k] -= tl;
135  }
136  }
137  // Update the linked list and the first entry of the actual column.
138  if ((jjrow + 1) < jend)
139  {
140  Lfirst[jrow]++;
141  j = jrow;
142  jrow = Llist[jrow];
143  Llist[j] = Llist[ridx[Lfirst[j]]];
144  Llist[ridx[Lfirst[j]]] = j;
145  }
146  else
147  jrow = Llist[jrow];
148  }
149 
150  if (opt == ON)
151  data[j1] += dropsums[k];
152 
153  // Test for j1 == j2 must be first to avoid invalid ridx[j1] access
154  if (j1 == j2 || ridx[j1] != k)
155  error ("ichol: encountered a pivot equal to 0");
156 
157  if (! ichol_checkpivot (data[j1]))
158  break;
159 
160  data[cidx[k]] = std::sqrt (data[j1]);
161 
162  // Update Llist and Lfirst with the k-column information. Also,
163  // scale the column elements by the pivot and reset the working array iw.
164  if (k < (n - 1))
165  {
166  iw[ridx[j1]] = -1;
167  for (i = j1 + 1; i < j2; i++)
168  {
169  iw[ridx[i]] = -1;
170  data[i] /= data[j1];
171  }
172  Lfirst[k] = j1;
173  if ((Lfirst[k] + 1) < j2)
174  {
175  Lfirst[k]++;
176  jjrow = ridx[Lfirst[k]];
177  Llist[k] = Llist[jjrow];
178  Llist[jjrow] = k;
179  }
180  }
181  }
182 }
183 
184 DEFUN (__ichol0__, args, ,
185  doc: /* -*- texinfo -*-
186 @deftypefn {} {@var{L} =} __ichol0__ (@var{A}, @var{michol})
187 Undocumented internal function.
188 @end deftypefn */)
189 {
190  if (args.length () != 2)
191  print_usage ();
192 
193  std::string michol = args(1).string_value ();
194 
195  // In ICHOL0 algorithm the zero-pattern of the input matrix is preserved
196  // so its structure does not change during the algorithm. The same input
197  // matrix is used to build the output matrix due to that fact.
198  if (! args(0).iscomplex ())
199  {
200  SparseMatrix sm = Ftril (args(0))(0).sparse_matrix_value ();
202  ichol_checkpivot_real> (sm, michol);
203  return ovl (sm);
204  }
205  else
206  {
208  Ftril (args(0))(0).sparse_complex_matrix_value ();
210  ichol_checkpivot_complex> (sm, michol);
211  return ovl (sm);
212  }
213 }
214 
215 template <typename octave_matrix_t, typename T, T (*ichol_mult) (T, T),
216  bool (*ichol_checkpivot) (T)>
217 void ichol_t (const octave_matrix_t& sm, octave_matrix_t& L, const T* cols_norm,
218  const T droptol, const std::string michol = "off")
219 {
220 
221  const octave_idx_type n = sm.cols ();
222  octave_idx_type j, jrow, jend, jjrow, i, k, jj, total_len,
223  w_len, max_len, ind;
224  char opt;
225  enum {OFF, ON};
226  if (michol == "on")
227  opt = ON;
228  else
229  opt = OFF;
230 
231  // Input matrix pointers
232  octave_idx_type *cidx = sm.cidx ();
233  octave_idx_type *ridx = sm.ridx ();
234  T* data = sm.data ();
235 
236  // Output matrix data structures. Because the final zero pattern pattern of
237  // the output matrix is not known due to fill-in elements, a heuristic
238  // approach has been adopted for memory allocation. The size of ridx_out_l
239  // and data_out_l is incremented 10% of their actual size (nnz (A) in the
240  // beginning). If that amount is less than n, their size is just incremented
241  // in n elements. This way the number of reallocations decreases throughout
242  // the process, obtaining a good performance.
243  max_len = sm.nnz ();
244  max_len += (0.1 * max_len) > n ? 0.1 * max_len : n;
245  Array <octave_idx_type> cidx_out_l (dim_vector (n + 1, 1));
246  octave_idx_type *cidx_l = cidx_out_l.fortran_vec ();
247  Array <octave_idx_type> ridx_out_l (dim_vector (max_len ,1));
248  octave_idx_type *ridx_l = ridx_out_l.fortran_vec ();
249  Array <T> data_out_l (dim_vector (max_len, 1));
250  T* data_l = data_out_l.fortran_vec ();
251 
252  // Working arrays
253  OCTAVE_LOCAL_BUFFER (T, w_data, n);
256  OCTAVE_LOCAL_BUFFER (T, col_drops, n);
257  std::vector<octave_idx_type> vec (n, 0);
258  std::vector<bool> mark (n, false);
259 
260  T zero = T (0);
261  cidx_l[0] = cidx[0];
262  for (i = 0; i < n; i++)
263  {
264  Llist[i] = -1;
265  Lfirst[i] = -1;
266  w_data[i] = 0;
267  col_drops[i] = zero;
268  }
269 
270  total_len = 0;
271  for (k = 0; k < n; k++)
272  {
273  ind = 0;
274  for (j = cidx[k]; j < cidx[k+1]; j++)
275  {
276  w_data[ridx[j]] = data[j];
277  // Mark column index, intentionally outside the if-clause to ensure
278  // that mark[k] will be set to true as well.
279  mark[ridx[j]] = true;
280  if (ridx[j] != k)
281  {
282  vec[ind] = ridx[j];
283  ind++;
284  }
285  }
286  jrow = Llist[k];
287  while (jrow != -1)
288  {
289  jjrow = Lfirst[jrow];
290  jend = cidx_l[jrow+1];
291  for (jj = jjrow; jj < jend; jj++)
292  {
293  j = ridx_l[jj];
294  // If the element in the j position of the row is zero,
295  // then it will become non-zero, so we add it to the
296  // vector that tracks non-zero elements in the working row.
297  if (! mark[j])
298  {
299  mark[j] = true;
300  vec[ind] = j;
301  ind++;
302  }
303  w_data[j] -= ichol_mult (data_l[jj], data_l[jjrow]);
304  }
305  // Update the actual column first element and
306  // update the linked list of the jrow row.
307  if ((jjrow + 1) < jend)
308  {
309  Lfirst[jrow]++;
310  j = jrow;
311  jrow = Llist[jrow];
312  Llist[j] = Llist[ridx_l[Lfirst[j]]];
313  Llist[ridx_l[Lfirst[j]]] = j;
314  }
315  else
316  jrow = Llist[jrow];
317  }
318 
319  // Resizing output arrays
320  if ((max_len - total_len) < n)
321  {
322  max_len += (0.1 * max_len) > n ? 0.1 * max_len : n;
323  data_out_l.resize (dim_vector (max_len, 1));
324  data_l = data_out_l.fortran_vec ();
325  ridx_out_l.resize (dim_vector (max_len, 1));
326  ridx_l = ridx_out_l.fortran_vec ();
327  }
328 
329  // The sorting of the non-zero elements of the working column can be
330  // handled in a couple of ways. The most efficient two I found, are
331  // keeping the elements in an ordered binary search tree dynamically or
332  // keep them unsorted in a vector and at the end of the outer iteration
333  // order them. The last approach exhibits lower execution times.
334  std::sort (vec.begin (), vec.begin () + ind);
335 
336  data_l[total_len] = w_data[k];
337  ridx_l[total_len] = k;
338  w_len = 1;
339 
340  // Extract the non-zero elements of working column and
341  // drop the elements that are lower than droptol * cols_norm[k].
342  for (i = 0; i < ind ; i++)
343  {
344  jrow = vec[i];
345  if (w_data[jrow] != zero)
346  {
347  if (std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
348  {
349  if (opt == ON)
350  {
351  col_drops[k] += w_data[jrow];
352  col_drops[jrow] += w_data[jrow];
353  }
354  }
355  else
356  {
357  data_l[total_len + w_len] = w_data[jrow];
358  ridx_l[total_len + w_len] = jrow;
359  w_len++;
360  }
361  }
362  // Clear mark, vec, and w_data. However, mark[k] is not set to zero.
363  mark[jrow] = false;
364  vec[i] = 0;
365  w_data[jrow] = zero;
366  }
367 
368  // Compensate column sums --> michol option
369  if (opt == ON)
370  data_l[total_len] += col_drops[k];
371 
372  if (data_l[total_len] == zero)
373  error ("ichol: encountered a pivot equal to 0");
374 
375  if (! ichol_checkpivot (data_l[total_len]))
376  break;
377 
378  // Once elements are dropped and compensation of column sums are done,
379  // scale the elements by the pivot.
380  data_l[total_len] = std::sqrt (data_l[total_len]);
381  for (jj = total_len + 1; jj < (total_len + w_len); jj++)
382  data_l[jj] /= data_l[total_len];
383  total_len += w_len;
384  // Check if there are too many elements to be indexed with
385  // octave_idx_type type due to fill-in during the process.
386  if (total_len < 0)
387  error ("ichol: integer overflow. Too many fill-in elements in L");
388 
389  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len;
390 
391  // Update Llist and Lfirst with the k-column information.
392  if (k < (n - 1))
393  {
394  Lfirst[k] = cidx_l[k];
395  if ((Lfirst[k] + 1) < cidx_l[k+1])
396  {
397  Lfirst[k]++;
398  jjrow = ridx_l[Lfirst[k]];
399  Llist[k] = Llist[jjrow];
400  Llist[jjrow] = k;
401  }
402  }
403  }
404 
405  // Build the output matrices
406  L = octave_matrix_t (n, n, total_len);
407 
408  for (i = 0; i <= n; i++)
409  L.cidx (i) = cidx_l[i];
410 
411  for (i = 0; i < total_len; i++)
412  {
413  L.ridx (i) = ridx_l[i];
414  L.data (i) = data_l[i];
415  }
416 }
417 
418 DEFUN (__icholt__, args, ,
419  doc: /* -*- texinfo -*-
420 @deftypefn {} {@var{L} =} __icholt__ (@var{A}, @var{droptol}, @var{michol})
421 Undocumented internal function.
422 @end deftypefn */)
423 {
424  if (args.length () != 3)
425  print_usage ();
426 
427  double droptol = args(1).double_value ();
428  std::string michol = args(2).string_value ();
429 
430  if (! args(0).iscomplex ())
431  {
432  SparseMatrix L;
433  SparseMatrix sm_l = Ftril (args(0))(0).sparse_matrix_value ();
436  (sm_l, L, xcolnorms (sm_l, 1).fortran_vec (), droptol, michol);
437 
438  return ovl (L);
439  }
440  else
441  {
443  SparseComplexMatrix sm_l =
444  Ftril (args(0))(0).sparse_complex_matrix_value ();
445  Array <Complex> cols_norm = xcolnorms (sm_l, 1);
448  (sm_l, L, cols_norm.fortran_vec (),
449  Complex (droptol), michol);
450 
451  return ovl (L);
452  }
453 }
454 
455 /*
456 %!test <51736>
457 %! k = 4;
458 %! n = 2^k;
459 %! Afull = diag (ones (n,1)) / ...
460 %! 2+triu ([zeros(n,2), [n.^-[1;1;2]*ones(1,n-2);zeros(n-3,n-2)]], 1);
461 %! A = sparse (Afull + Afull.');
462 %! opts.type = "ict";
463 %! opts.droptol = 0;
464 %! L = ichol (A, opts);
465 %! assert (norm (A - L*L.'), 0, 2*eps);
466 */
Complex ichol_mult_complex(Complex a, Complex b)
Definition: __ichol__.cc:37
ind
Definition: sub2ind.cc:107
OCTINTERP_API void print_usage(void)
Definition: defun.cc:54
OCTAVE_EXPORT octave_value_list or N dimensional array whose elements are all equal to the IEEE symbol zero divided by zero($0/0$)
for large enough k
Definition: lu.cc:617
const T * fortran_vec(void) const
Definition: Array.h:584
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:53
void error(const char *fmt,...)
Definition: error.cc:578
static T abs(T x)
Definition: pr-output.cc:1696
OCTAVE_EXPORT octave_value_list Ftril(const octave_value_list &args, int) and setting all other elements to zero. The optional second argument specifies how many diagonals above or below the main diagonal should also be set to zero. The default value of ar
Definition: tril.cc:374
calling an anonymous function involves an overhead quite comparable to the overhead of an m file function Passing a handle to a built in function is because the interpreter is not involved in the internal loop For a
Definition: cellfun.cc:400
void ichol_t(const octave_matrix_t &sm, octave_matrix_t &L, const T *cols_norm, const T droptol, const std::string michol="off")
Definition: __ichol__.cc:217
OCTAVE_API RowVector xcolnorms(const Matrix &m, double p)
Definition: oct-norm.cc:593
void resize(const dim_vector &dv, const T &rfv)
Resizing (with fill).
Definition: Array.cc:1010
OCTAVE_EXPORT octave_value_list isa nd deftypefn *return ovl(args(0).isinteger())
bool ichol_checkpivot_complex(Complex pivot)
Definition: __ichol__.cc:54
double ichol_mult_real(double a, double b)
Definition: __ichol__.cc:49
b
Definition: cellfun.cc:400
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:41
for i
Definition: data.cc:5264
void ichol_0(octave_matrix_t &sm, const std::string michol="off")
Definition: __ichol__.cc:74
std::complex< double > Complex
Definition: oct-cmplx.h:31
bool ichol_checkpivot_real(double pivot)
Definition: __ichol__.cc:64
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:87
If this string is the system will ring the terminal sometimes it is useful to be able to print the original representation of the string
Definition: utils.cc:888