GNU Octave  3.8.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
oct-sort.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2003-2013 David Bateman
4 Copyright (C) 2008-2009 Jaroslav Hajek
5 Copyright (C) 2009-2010 VZLU Prague
6 
7 This file is part of Octave.
8 
9 Octave is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 Octave is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with Octave; see the file COPYING. If not, see
21 <http://www.gnu.org/licenses/>.
22 
23 Code stolen in large part from Python's, listobject.c, which itself had
24 no license header. However, thanks to Tim Peters for the parts of the
25 code I ripped-off.
26 
27 As required in the Python license the short description of the changes
28 made are
29 
30 * convert the sorting code in listobject.cc into a generic class,
31  replacing PyObject* with the type of the class T.
32 
33 * replaced usages of malloc, free, memcpy and memmove by standard C++
34  new [], delete [] and std::copy and std::copy_backward. Note that replacing
35  memmove by std::copy is possible if the destination starts before the source.
36  If not, std::copy_backward needs to be used.
37 
38 * templatize comparison operator in most methods, provide possible dispatch
39 
40 * duplicate methods to avoid by-the-way indexed sorting
41 
42 * add methods for verifying sortedness of array
43 
44 * row sorting via breadth-first tree subsorting
45 
46 * binary lookup and sequential binary lookup optimized for dense downsampling.
47 
48 * NOTE: the memory management routines rely on the fact that delete [] silently
49  ignores null pointers. Don't gripe about the missing checks - they're there.
50 
51 
52 The Python license is
53 
54  PSF LICENSE AGREEMENT FOR PYTHON 2.3
55  --------------------------------------
56 
57  1. This LICENSE AGREEMENT is between the Python Software Foundation
58  ("PSF"), and the Individual or Organization ("Licensee") accessing and
59  otherwise using Python 2.3 software in source or binary form and its
60  associated documentation.
61 
62  2. Subject to the terms and conditions of this License Agreement, PSF
63  hereby grants Licensee a nonexclusive, royalty-free, world-wide
64  license to reproduce, analyze, test, perform and/or display publicly,
65  prepare derivative works, distribute, and otherwise use Python 2.3
66  alone or in any derivative version, provided, however, that PSF's
67  License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
68  2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
69  retained in Python 2.3 alone or in any derivative version prepared by
70  Licensee.
71 
72  3. In the event Licensee prepares a derivative work that is based on
73  or incorporates Python 2.3 or any part thereof, and wants to make
74  the derivative work available to others as provided herein, then
75  Licensee hereby agrees to include in any such work a brief summary of
76  the changes made to Python 2.3.
77 
78  4. PSF is making Python 2.3 available to Licensee on an "AS IS"
79  basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
80  IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
81  DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
82  FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
83  INFRINGE ANY THIRD PARTY RIGHTS.
84 
85  5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
86  2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
87  A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
88  OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
89 
90  6. This License Agreement will automatically terminate upon a material
91  breach of its terms and conditions.
92 
93  7. Nothing in this License Agreement shall be deemed to create any
94  relationship of agency, partnership, or joint venture between PSF and
95  Licensee. This License Agreement does not grant permission to use PSF
96  trademarks or trade name in a trademark sense to endorse or promote
97  products or services of Licensee, or any third party.
98 
99  8. By copying, installing or otherwise using Python 2.3, Licensee
100  agrees to be bound by the terms and conditions of this License
101  Agreement.
102 */
103 
104 #ifdef HAVE_CONFIG_H
105 #include <config.h>
106 #endif
107 
108 #include <cassert>
109 #include <algorithm>
110 #include <functional>
111 #include <cstring>
112 #include <stack>
113 
114 #include "lo-mappers.h"
115 #include "quit.h"
116 #include "oct-sort.h"
117 #include "oct-locbuf.h"
118 
119 template <class T>
121  compare (ascending_compare), ms (0)
122 {
123 }
124 
125 template <class T>
126 octave_sort<T>::octave_sort (compare_fcn_type comp)
127  : compare (comp), ms (0)
128 {
129 }
130 
131 template <class T>
133 {
134  delete ms;
135 }
136 
137 template <class T>
138 void
140 {
141  if (mode == ASCENDING)
142  compare = ascending_compare;
143  else if (mode == DESCENDING)
144  compare = descending_compare;
145  else
146  compare = 0;
147 }
148 
149 template <class T>
150 template <class Comp>
151 void
153  octave_idx_type start, Comp comp)
154 {
155  if (start == 0)
156  ++start;
157 
158  for (; start < nel; ++start)
159  {
160  /* set l to where *start belongs */
161  octave_idx_type l = 0, r = start;
162  T pivot = data[start];
163  /* Invariants:
164  * pivot >= all in [lo, l).
165  * pivot < all in [r, start).
166  * The second is vacuously true at the start.
167  */
168  do
169  {
170  octave_idx_type p = l + ((r - l) >> 1);
171  if (comp (pivot, data[p]))
172  r = p;
173  else
174  l = p+1;
175  }
176  while (l < r);
177  /* The invariants still hold, so pivot >= all in [lo, l) and
178  pivot < all in [l, start), so pivot belongs at l. Note
179  that if there are elements equal to pivot, l points to the
180  first slot after them -- that's why this sort is stable.
181  Slide over to make room.
182  Caution: using memmove is much slower under MSVC 5;
183  we're not usually moving many slots. */
184  // NOTE: using swap and going upwards appears to be faster.
185  for (octave_idx_type p = l; p < start; p++)
186  std::swap (pivot, data[p]);
187  data[start] = pivot;
188  }
189 
190  return;
191 }
192 
193 template <class T>
194 template <class Comp>
195 void
197  octave_idx_type start, Comp comp)
198 {
199  if (start == 0)
200  ++start;
201 
202  for (; start < nel; ++start)
203  {
204  /* set l to where *start belongs */
205  octave_idx_type l = 0, r = start;
206  T pivot = data[start];
207  /* Invariants:
208  * pivot >= all in [lo, l).
209  * pivot < all in [r, start).
210  * The second is vacuously true at the start.
211  */
212  do
213  {
214  octave_idx_type p = l + ((r - l) >> 1);
215  if (comp (pivot, data[p]))
216  r = p;
217  else
218  l = p+1;
219  }
220  while (l < r);
221  /* The invariants still hold, so pivot >= all in [lo, l) and
222  pivot < all in [l, start), so pivot belongs at l. Note
223  that if there are elements equal to pivot, l points to the
224  first slot after them -- that's why this sort is stable.
225  Slide over to make room.
226  Caution: using memmove is much slower under MSVC 5;
227  we're not usually moving many slots. */
228  // NOTE: using swap and going upwards appears to be faster.
229  for (octave_idx_type p = l; p < start; p++)
230  std::swap (pivot, data[p]);
231  data[start] = pivot;
232  octave_idx_type ipivot = idx[start];
233  for (octave_idx_type p = l; p < start; p++)
234  std::swap (ipivot, idx[p]);
235  idx[start] = ipivot;
236  }
237 
238  return;
239 }
240 
241 /*
242 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi
243 is required on entry. "A run" is the longest ascending sequence, with
244 
245  lo[0] <= lo[1] <= lo[2] <= ...
246 
247 or the longest descending sequence, with
248 
249  lo[0] > lo[1] > lo[2] > ...
250 
251 DESCENDING is set to false in the former case, or to true in the latter.
252 For its intended use in a stable mergesort, the strictness of the defn of
253 "descending" is needed so that the caller can safely reverse a descending
254 sequence without violating stability (strict > ensures there are no equal
255 elements to get out of order).
256 
257 Returns -1 in case of error.
258 */
259 template <class T>
260 template <class Comp>
262 octave_sort<T>::count_run (T *lo, octave_idx_type nel, bool& descending,
263  Comp comp)
264 {
265  octave_idx_type n;
266  T *hi = lo + nel;
267 
268  descending = false;
269  ++lo;
270  if (lo == hi)
271  return 1;
272 
273  n = 2;
274 
275  if (comp (*lo, *(lo-1)))
276  {
277  descending = true;
278  for (lo = lo+1; lo < hi; ++lo, ++n)
279  {
280  if (comp (*lo, *(lo-1)))
281  ;
282  else
283  break;
284  }
285  }
286  else
287  {
288  for (lo = lo+1; lo < hi; ++lo, ++n)
289  {
290  if (comp (*lo, *(lo-1)))
291  break;
292  }
293  }
294 
295  return n;
296 }
297 
298 /*
299 Locate the proper position of key in a sorted vector; if the vector contains
300 an element equal to key, return the position immediately to the left of
301 the leftmost equal element. [gallop_right() does the same except returns
302 the position to the right of the rightmost equal element (if any).]
303 
304 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0.
305 
306 "hint" is an index at which to begin the search, 0 <= hint < n. The closer
307 hint is to the final result, the faster this runs.
308 
309 The return value is the int k in 0..n such that
310 
311  a[k-1] < key <= a[k]
312 
313 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW,
314 key belongs at index k; or, IOW, the first k elements of a should precede
315 key, and the last n-k should follow key.
316 
317 Returns -1 on error. See listsort.txt for info on the method.
318 */
319 template <class T>
320 template <class Comp>
323  octave_idx_type hint,
324  Comp comp)
325 {
326  octave_idx_type ofs;
327  octave_idx_type lastofs;
328  octave_idx_type k;
329 
330  a += hint;
331  lastofs = 0;
332  ofs = 1;
333  if (comp (*a, key))
334  {
335  /* a[hint] < key -- gallop right, until
336  * a[hint + lastofs] < key <= a[hint + ofs]
337  */
338  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
339  while (ofs < maxofs)
340  {
341  if (comp (a[ofs], key))
342  {
343  lastofs = ofs;
344  ofs = (ofs << 1) + 1;
345  if (ofs <= 0) /* int overflow */
346  ofs = maxofs;
347  }
348  else /* key <= a[hint + ofs] */
349  break;
350  }
351  if (ofs > maxofs)
352  ofs = maxofs;
353  /* Translate back to offsets relative to &a[0]. */
354  lastofs += hint;
355  ofs += hint;
356  }
357  else
358  {
359  /* key <= a[hint] -- gallop left, until
360  * a[hint - ofs] < key <= a[hint - lastofs]
361  */
362  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
363  while (ofs < maxofs)
364  {
365  if (comp (*(a-ofs), key))
366  break;
367  /* key <= a[hint - ofs] */
368  lastofs = ofs;
369  ofs = (ofs << 1) + 1;
370  if (ofs <= 0) /* int overflow */
371  ofs = maxofs;
372  }
373  if (ofs > maxofs)
374  ofs = maxofs;
375  /* Translate back to positive offsets relative to &a[0]. */
376  k = lastofs;
377  lastofs = hint - ofs;
378  ofs = hint - k;
379  }
380  a -= hint;
381 
382  /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
383  * right of lastofs but no farther right than ofs. Do a binary
384  * search, with invariant a[lastofs-1] < key <= a[ofs].
385  */
386  ++lastofs;
387  while (lastofs < ofs)
388  {
389  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
390 
391  if (comp (a[m], key))
392  lastofs = m+1; /* a[m] < key */
393  else
394  ofs = m; /* key <= a[m] */
395  }
396 
397  return ofs;
398 }
399 
400 /*
401 Exactly like gallop_left(), except that if key already exists in a[0:n],
402 finds the position immediately to the right of the rightmost equal value.
403 
404 The return value is the int k in 0..n such that
405 
406  a[k-1] <= key < a[k]
407 
408 or -1 if error.
409 
410 The code duplication is massive, but this is enough different given that
411 we're sticking to "<" comparisons that it's much harder to follow if
412 written as one routine with yet another "left or right?" flag.
413 */
414 template <class T>
415 template <class Comp>
418  octave_idx_type hint,
419  Comp comp)
420 {
421  octave_idx_type ofs;
422  octave_idx_type lastofs;
423  octave_idx_type k;
424 
425  a += hint;
426  lastofs = 0;
427  ofs = 1;
428  if (comp (key, *a))
429  {
430  /* key < a[hint] -- gallop left, until
431  * a[hint - ofs] <= key < a[hint - lastofs]
432  */
433  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
434  while (ofs < maxofs)
435  {
436  if (comp (key, *(a-ofs)))
437  {
438  lastofs = ofs;
439  ofs = (ofs << 1) + 1;
440  if (ofs <= 0) /* int overflow */
441  ofs = maxofs;
442  }
443  else /* a[hint - ofs] <= key */
444  break;
445  }
446  if (ofs > maxofs)
447  ofs = maxofs;
448  /* Translate back to positive offsets relative to &a[0]. */
449  k = lastofs;
450  lastofs = hint - ofs;
451  ofs = hint - k;
452  }
453  else
454  {
455  /* a[hint] <= key -- gallop right, until
456  * a[hint + lastofs] <= key < a[hint + ofs]
457  */
458  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
459  while (ofs < maxofs)
460  {
461  if (comp (key, a[ofs]))
462  break;
463  /* a[hint + ofs] <= key */
464  lastofs = ofs;
465  ofs = (ofs << 1) + 1;
466  if (ofs <= 0) /* int overflow */
467  ofs = maxofs;
468  }
469  if (ofs > maxofs)
470  ofs = maxofs;
471  /* Translate back to offsets relative to &a[0]. */
472  lastofs += hint;
473  ofs += hint;
474  }
475  a -= hint;
476 
477  /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
478  * right of lastofs but no farther right than ofs. Do a binary
479  * search, with invariant a[lastofs-1] <= key < a[ofs].
480  */
481  ++lastofs;
482  while (lastofs < ofs)
483  {
484  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
485 
486  if (comp (key, a[m]))
487  ofs = m; /* key < a[m] */
488  else
489  lastofs = m+1; /* a[m] <= key */
490  }
491 
492  return ofs;
493 }
494 
495 static inline octave_idx_type
497 {
498  unsigned int nbits = 3;
499  octave_idx_type n2 = static_cast<octave_idx_type> (n) >> 8;
500 
501  /* Round up:
502  * If n < 256, to a multiple of 8.
503  * If n < 2048, to a multiple of 64.
504  * If n < 16384, to a multiple of 512.
505  * If n < 131072, to a multiple of 4096.
506  * If n < 1048576, to a multiple of 32768.
507  * If n < 8388608, to a multiple of 262144.
508  * If n < 67108864, to a multiple of 2097152.
509  * If n < 536870912, to a multiple of 16777216.
510  * ...
511  * If n < 2**(5+3*i), to a multiple of 2**(3*i).
512  *
513  * This over-allocates proportional to the list size, making room
514  * for additional growth. The over-allocation is mild, but is
515  * enough to give linear-time amortized behavior over a long
516  * sequence of appends() in the presence of a poorly-performing
517  * system realloc() (which is a reality, e.g., across all flavors
518  * of Windows, with Win9x behavior being particularly bad -- and
519  * we've still got address space fragmentation problems on Win9x
520  * even with this scheme, although it requires much longer lists to
521  * provoke them than it used to).
522  */
523  while (n2)
524  {
525  n2 >>= 3;
526  nbits += 3;
527  }
528 
529  return ((n >> nbits) + 1) << nbits;
530 }
531 
532 /* Ensure enough temp memory for 'need' array slots is available.
533  * Returns 0 on success and -1 if the memory can't be gotten.
534  */
535 template <class T>
536 void
538 {
539  if (need <= alloced)
540  return;
541 
542  need = roundupsize (need);
543  /* Don't realloc! That can cost cycles to copy the old data, but
544  * we don't care what's in the block.
545  */
546  delete [] a;
547  delete [] ia; // Must do this or fool possible next getmemi.
548  a = new T [need];
549  alloced = need;
550 
551 }
552 
553 template <class T>
554 void
556 {
557  if (ia && need <= alloced)
558  return;
559 
560  need = roundupsize (need);
561  /* Don't realloc! That can cost cycles to copy the old data, but
562  * we don't care what's in the block.
563  */
564  delete [] a;
565  delete [] ia;
566 
567  a = new T [need];
568  ia = new octave_idx_type [need];
569  alloced = need;
570 }
571 
572 /* Merge the na elements starting at pa with the nb elements starting at pb
573  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
574  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
575  * merge, and should have na <= nb. See listsort.txt for more info.
576  * Return 0 if successful, -1 if error.
577  */
578 template <class T>
579 template <class Comp>
580 int
582  T *pb, octave_idx_type nb,
583  Comp comp)
584 {
585  octave_idx_type k;
586  T *dest;
587  int result = -1; /* guilty until proved innocent */
588  octave_idx_type min_gallop = ms->min_gallop;
589 
590  ms->getmem (na);
591 
592  std::copy (pa, pa + na, ms->a);
593  dest = pa;
594  pa = ms->a;
595 
596  *dest++ = *pb++;
597  --nb;
598  if (nb == 0)
599  goto Succeed;
600  if (na == 1)
601  goto CopyB;
602 
603  for (;;)
604  {
605  octave_idx_type acount = 0; /* # of times A won in a row */
606  octave_idx_type bcount = 0; /* # of times B won in a row */
607 
608  /* Do the straightforward thing until (if ever) one run
609  * appears to win consistently.
610  */
611  for (;;)
612  {
613 
614  // FIXME: these loops are candidates for further optimizations.
615  // Rather than testing everything in each cycle, it may be more
616  // efficient to do it in hunks.
617  if (comp (*pb, *pa))
618  {
619  *dest++ = *pb++;
620  ++bcount;
621  acount = 0;
622  --nb;
623  if (nb == 0)
624  goto Succeed;
625  if (bcount >= min_gallop)
626  break;
627  }
628  else
629  {
630  *dest++ = *pa++;
631  ++acount;
632  bcount = 0;
633  --na;
634  if (na == 1)
635  goto CopyB;
636  if (acount >= min_gallop)
637  break;
638  }
639  }
640 
641  /* One run is winning so consistently that galloping may
642  * be a huge win. So try that, and continue galloping until
643  * (if ever) neither run appears to be winning consistently
644  * anymore.
645  */
646  ++min_gallop;
647  do
648  {
649  min_gallop -= min_gallop > 1;
650  ms->min_gallop = min_gallop;
651  k = gallop_right (*pb, pa, na, 0, comp);
652  acount = k;
653  if (k)
654  {
655  if (k < 0)
656  goto Fail;
657  dest = std::copy (pa, pa + k, dest);
658  pa += k;
659  na -= k;
660  if (na == 1)
661  goto CopyB;
662  /* na==0 is impossible now if the comparison
663  * function is consistent, but we can't assume
664  * that it is.
665  */
666  if (na == 0)
667  goto Succeed;
668  }
669  *dest++ = *pb++;
670  --nb;
671  if (nb == 0)
672  goto Succeed;
673 
674  k = gallop_left (*pa, pb, nb, 0, comp);
675  bcount = k;
676  if (k)
677  {
678  if (k < 0)
679  goto Fail;
680  dest = std::copy (pb, pb + k, dest);
681  pb += k;
682  nb -= k;
683  if (nb == 0)
684  goto Succeed;
685  }
686  *dest++ = *pa++;
687  --na;
688  if (na == 1)
689  goto CopyB;
690  }
691  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
692 
693  ++min_gallop; /* penalize it for leaving galloping mode */
694  ms->min_gallop = min_gallop;
695  }
696 
697 Succeed:
698  result = 0;
699 
700 Fail:
701  if (na)
702  std::copy (pa, pa + na, dest);
703  return result;
704 
705 CopyB:
706  /* The last element of pa belongs at the end of the merge. */
707  std::copy (pb, pb + nb, dest);
708  dest[nb] = *pa;
709 
710  return 0;
711 }
712 
713 template <class T>
714 template <class Comp>
715 int
717  T *pb, octave_idx_type *ipb, octave_idx_type nb,
718  Comp comp)
719 {
720  octave_idx_type k;
721  T *dest;
722  octave_idx_type *idest;
723  int result = -1; /* guilty until proved innocent */
724  octave_idx_type min_gallop = ms->min_gallop;
725 
726  ms->getmemi (na);
727 
728  std::copy (pa, pa + na, ms->a);
729  std::copy (ipa, ipa + na, ms->ia);
730  dest = pa; idest = ipa;
731  pa = ms->a; ipa = ms->ia;
732 
733  *dest++ = *pb++; *idest++ = *ipb++;
734  --nb;
735  if (nb == 0)
736  goto Succeed;
737  if (na == 1)
738  goto CopyB;
739 
740  for (;;)
741  {
742  octave_idx_type acount = 0; /* # of times A won in a row */
743  octave_idx_type bcount = 0; /* # of times B won in a row */
744 
745  /* Do the straightforward thing until (if ever) one run
746  * appears to win consistently.
747  */
748  for (;;)
749  {
750 
751  if (comp (*pb, *pa))
752  {
753  *dest++ = *pb++; *idest++ = *ipb++;
754  ++bcount;
755  acount = 0;
756  --nb;
757  if (nb == 0)
758  goto Succeed;
759  if (bcount >= min_gallop)
760  break;
761  }
762  else
763  {
764  *dest++ = *pa++; *idest++ = *ipa++;
765  ++acount;
766  bcount = 0;
767  --na;
768  if (na == 1)
769  goto CopyB;
770  if (acount >= min_gallop)
771  break;
772  }
773  }
774 
775  /* One run is winning so consistently that galloping may
776  * be a huge win. So try that, and continue galloping until
777  * (if ever) neither run appears to be winning consistently
778  * anymore.
779  */
780  ++min_gallop;
781  do
782  {
783  min_gallop -= min_gallop > 1;
784  ms->min_gallop = min_gallop;
785  k = gallop_right (*pb, pa, na, 0, comp);
786  acount = k;
787  if (k)
788  {
789  if (k < 0)
790  goto Fail;
791  dest = std::copy (pa, pa + k, dest);
792  idest = std::copy (ipa, ipa + k, idest);
793  pa += k; ipa += k;
794  na -= k;
795  if (na == 1)
796  goto CopyB;
797  /* na==0 is impossible now if the comparison
798  * function is consistent, but we can't assume
799  * that it is.
800  */
801  if (na == 0)
802  goto Succeed;
803  }
804  *dest++ = *pb++; *idest++ = *ipb++;
805  --nb;
806  if (nb == 0)
807  goto Succeed;
808 
809  k = gallop_left (*pa, pb, nb, 0, comp);
810  bcount = k;
811  if (k)
812  {
813  if (k < 0)
814  goto Fail;
815  dest = std::copy (pb, pb + k, dest);
816  idest = std::copy (ipb, ipb + k, idest);
817  pb += k; ipb += k;
818  nb -= k;
819  if (nb == 0)
820  goto Succeed;
821  }
822  *dest++ = *pa++; *idest++ = *ipa++;
823  --na;
824  if (na == 1)
825  goto CopyB;
826  }
827  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
828 
829  ++min_gallop; /* penalize it for leaving galloping mode */
830  ms->min_gallop = min_gallop;
831  }
832 
833 Succeed:
834  result = 0;
835 
836 Fail:
837  if (na)
838  {
839  std::copy (pa, pa + na, dest);
840  std::copy (ipa, ipa + na, idest);
841  }
842  return result;
843 
844 CopyB:
845  /* The last element of pa belongs at the end of the merge. */
846  std::copy (pb, pb + nb, dest);
847  std::copy (ipb, ipb + nb, idest);
848  dest[nb] = *pa;
849  idest[nb] = *ipa;
850 
851  return 0;
852 }
853 
854 /* Merge the na elements starting at pa with the nb elements starting at pb
855  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
856  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
857  * merge, and should have na >= nb. See listsort.txt for more info.
858  * Return 0 if successful, -1 if error.
859  */
860 template <class T>
861 template <class Comp>
862 int
864  T *pb, octave_idx_type nb,
865  Comp comp)
866 {
867  octave_idx_type k;
868  T *dest;
869  int result = -1; /* guilty until proved innocent */
870  T *basea, *baseb;
871  octave_idx_type min_gallop = ms->min_gallop;
872 
873  ms->getmem (nb);
874 
875  dest = pb + nb - 1;
876  std::copy (pb, pb + nb, ms->a);
877  basea = pa;
878  baseb = ms->a;
879  pb = ms->a + nb - 1;
880  pa += na - 1;
881 
882  *dest-- = *pa--;
883  --na;
884  if (na == 0)
885  goto Succeed;
886  if (nb == 1)
887  goto CopyA;
888 
889  for (;;)
890  {
891  octave_idx_type acount = 0; /* # of times A won in a row */
892  octave_idx_type bcount = 0; /* # of times B won in a row */
893 
894  /* Do the straightforward thing until (if ever) one run
895  * appears to win consistently.
896  */
897  for (;;)
898  {
899  if (comp (*pb, *pa))
900  {
901  *dest-- = *pa--;
902  ++acount;
903  bcount = 0;
904  --na;
905  if (na == 0)
906  goto Succeed;
907  if (acount >= min_gallop)
908  break;
909  }
910  else
911  {
912  *dest-- = *pb--;
913  ++bcount;
914  acount = 0;
915  --nb;
916  if (nb == 1)
917  goto CopyA;
918  if (bcount >= min_gallop)
919  break;
920  }
921  }
922 
923  /* One run is winning so consistently that galloping may
924  * be a huge win. So try that, and continue galloping until
925  * (if ever) neither run appears to be winning consistently
926  * anymore.
927  */
928  ++min_gallop;
929  do
930  {
931  min_gallop -= min_gallop > 1;
932  ms->min_gallop = min_gallop;
933  k = gallop_right (*pb, basea, na, na-1, comp);
934  if (k < 0)
935  goto Fail;
936  k = na - k;
937  acount = k;
938  if (k)
939  {
940  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
941  pa -= k;
942  na -= k;
943  if (na == 0)
944  goto Succeed;
945  }
946  *dest-- = *pb--;
947  --nb;
948  if (nb == 1)
949  goto CopyA;
950 
951  k = gallop_left (*pa, baseb, nb, nb-1, comp);
952  if (k < 0)
953  goto Fail;
954  k = nb - k;
955  bcount = k;
956  if (k)
957  {
958  dest -= k;
959  pb -= k;
960  std::copy (pb+1, pb+1 + k, dest+1);
961  nb -= k;
962  if (nb == 1)
963  goto CopyA;
964  /* nb==0 is impossible now if the comparison
965  * function is consistent, but we can't assume
966  * that it is.
967  */
968  if (nb == 0)
969  goto Succeed;
970  }
971  *dest-- = *pa--;
972  --na;
973  if (na == 0)
974  goto Succeed;
975  } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
976  ++min_gallop; /* penalize it for leaving galloping mode */
977  ms->min_gallop = min_gallop;
978  }
979 
980 Succeed:
981  result = 0;
982 
983 Fail:
984  if (nb)
985  std::copy (baseb, baseb + nb, dest-(nb-1));
986  return result;
987 
988 CopyA:
989  /* The first element of pb belongs at the front of the merge. */
990  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
991  pa -= na;
992  *dest = *pb;
993 
994  return 0;
995 }
996 
997 template <class T>
998 template <class Comp>
999 int
1001  T *pb, octave_idx_type *ipb, octave_idx_type nb,
1002  Comp comp)
1003 {
1004  octave_idx_type k;
1005  T *dest;
1006  octave_idx_type *idest;
1007  int result = -1; /* guilty until proved innocent */
1008  T *basea, *baseb;
1009  octave_idx_type *ibaseb;
1010  octave_idx_type min_gallop = ms->min_gallop;
1011 
1012  ms->getmemi (nb);
1013 
1014  dest = pb + nb - 1;
1015  idest = ipb + nb - 1;
1016  std::copy (pb, pb + nb, ms->a);
1017  std::copy (ipb, ipb + nb, ms->ia);
1018  basea = pa;
1019  baseb = ms->a; ibaseb = ms->ia;
1020  pb = ms->a + nb - 1; ipb = ms->ia + nb - 1;
1021  pa += na - 1; ipa += na - 1;
1022 
1023  *dest-- = *pa--; *idest-- = *ipa--;
1024  --na;
1025  if (na == 0)
1026  goto Succeed;
1027  if (nb == 1)
1028  goto CopyA;
1029 
1030  for (;;)
1031  {
1032  octave_idx_type acount = 0; /* # of times A won in a row */
1033  octave_idx_type bcount = 0; /* # of times B won in a row */
1034 
1035  /* Do the straightforward thing until (if ever) one run
1036  * appears to win consistently.
1037  */
1038  for (;;)
1039  {
1040  if (comp (*pb, *pa))
1041  {
1042  *dest-- = *pa--; *idest-- = *ipa--;
1043  ++acount;
1044  bcount = 0;
1045  --na;
1046  if (na == 0)
1047  goto Succeed;
1048  if (acount >= min_gallop)
1049  break;
1050  }
1051  else
1052  {
1053  *dest-- = *pb--; *idest-- = *ipb--;
1054  ++bcount;
1055  acount = 0;
1056  --nb;
1057  if (nb == 1)
1058  goto CopyA;
1059  if (bcount >= min_gallop)
1060  break;
1061  }
1062  }
1063 
1064  /* One run is winning so consistently that galloping may
1065  * be a huge win. So try that, and continue galloping until
1066  * (if ever) neither run appears to be winning consistently
1067  * anymore.
1068  */
1069  ++min_gallop;
1070  do
1071  {
1072  min_gallop -= min_gallop > 1;
1073  ms->min_gallop = min_gallop;
1074  k = gallop_right (*pb, basea, na, na-1, comp);
1075  if (k < 0)
1076  goto Fail;
1077  k = na - k;
1078  acount = k;
1079  if (k)
1080  {
1081  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
1082  idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1;
1083  pa -= k; ipa -= k;
1084  na -= k;
1085  if (na == 0)
1086  goto Succeed;
1087  }
1088  *dest-- = *pb--; *idest-- = *ipb--;
1089  --nb;
1090  if (nb == 1)
1091  goto CopyA;
1092 
1093  k = gallop_left (*pa, baseb, nb, nb-1, comp);
1094  if (k < 0)
1095  goto Fail;
1096  k = nb - k;
1097  bcount = k;
1098  if (k)
1099  {
1100  dest -= k; idest -= k;
1101  pb -= k; ipb -= k;
1102  std::copy (pb+1, pb+1 + k, dest+1);
1103  std::copy (ipb+1, ipb+1 + k, idest+1);
1104  nb -= k;
1105  if (nb == 1)
1106  goto CopyA;
1107  /* nb==0 is impossible now if the comparison
1108  * function is consistent, but we can't assume
1109  * that it is.
1110  */
1111  if (nb == 0)
1112  goto Succeed;
1113  }
1114  *dest-- = *pa--; *idest-- = *ipa--;
1115  --na;
1116  if (na == 0)
1117  goto Succeed;
1118  } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
1119  ++min_gallop; /* penalize it for leaving galloping mode */
1120  ms->min_gallop = min_gallop;
1121  }
1122 
1123 Succeed:
1124  result = 0;
1125 
1126 Fail:
1127  if (nb)
1128  {
1129  std::copy (baseb, baseb + nb, dest-(nb-1));
1130  std::copy (ibaseb, ibaseb + nb, idest-(nb-1));
1131  }
1132  return result;
1133 
1134 CopyA:
1135  /* The first element of pb belongs at the front of the merge. */
1136  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
1137  idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1;
1138  pa -= na; ipa -= na;
1139  *dest = *pb; *idest = *ipb;
1140 
1141  return 0;
1142 }
1143 
1144 /* Merge the two runs at stack indices i and i+1.
1145  * Returns 0 on success, -1 on error.
1146  */
1147 template <class T>
1148 template <class Comp>
1149 int
1151  Comp comp)
1152 {
1153  T *pa, *pb;
1154  octave_idx_type na, nb;
1155  octave_idx_type k;
1156 
1157  pa = data + ms->pending[i].base;
1158  na = ms->pending[i].len;
1159  pb = data + ms->pending[i+1].base;
1160  nb = ms->pending[i+1].len;
1161 
1162  /* Record the length of the combined runs; if i is the 3rd-last
1163  * run now, also slide over the last run (which isn't involved
1164  * in this merge). The current run i+1 goes away in any case.
1165  */
1166  ms->pending[i].len = na + nb;
1167  if (i == ms->n - 3)
1168  ms->pending[i+1] = ms->pending[i+2];
1169  ms->n--;
1170 
1171  /* Where does b start in a? Elements in a before that can be
1172  * ignored (already in place).
1173  */
1174  k = gallop_right (*pb, pa, na, 0, comp);
1175  if (k < 0)
1176  return -1;
1177  pa += k;
1178  na -= k;
1179  if (na == 0)
1180  return 0;
1181 
1182  /* Where does a end in b? Elements in b after that can be
1183  * ignored (already in place).
1184  */
1185  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1186  if (nb <= 0)
1187  return nb;
1188 
1189  /* Merge what remains of the runs, using a temp array with
1190  * min (na, nb) elements.
1191  */
1192  if (na <= nb)
1193  return merge_lo (pa, na, pb, nb, comp);
1194  else
1195  return merge_hi (pa, na, pb, nb, comp);
1196 }
1197 
1198 template <class T>
1199 template <class Comp>
1200 int
1202  Comp comp)
1203 {
1204  T *pa, *pb;
1205  octave_idx_type *ipa, *ipb;
1206  octave_idx_type na, nb;
1207  octave_idx_type k;
1208 
1209  pa = data + ms->pending[i].base;
1210  ipa = idx + ms->pending[i].base;
1211  na = ms->pending[i].len;
1212  pb = data + ms->pending[i+1].base;
1213  ipb = idx + ms->pending[i+1].base;
1214  nb = ms->pending[i+1].len;
1215 
1216  /* Record the length of the combined runs; if i is the 3rd-last
1217  * run now, also slide over the last run (which isn't involved
1218  * in this merge). The current run i+1 goes away in any case.
1219  */
1220  ms->pending[i].len = na + nb;
1221  if (i == ms->n - 3)
1222  ms->pending[i+1] = ms->pending[i+2];
1223  ms->n--;
1224 
1225  /* Where does b start in a? Elements in a before that can be
1226  * ignored (already in place).
1227  */
1228  k = gallop_right (*pb, pa, na, 0, comp);
1229  if (k < 0)
1230  return -1;
1231  pa += k; ipa += k;
1232  na -= k;
1233  if (na == 0)
1234  return 0;
1235 
1236  /* Where does a end in b? Elements in b after that can be
1237  * ignored (already in place).
1238  */
1239  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1240  if (nb <= 0)
1241  return nb;
1242 
1243  /* Merge what remains of the runs, using a temp array with
1244  * min (na, nb) elements.
1245  */
1246  if (na <= nb)
1247  return merge_lo (pa, ipa, na, pb, ipb, nb, comp);
1248  else
1249  return merge_hi (pa, ipa, na, pb, ipb, nb, comp);
1250 }
1251 
1252 /* Examine the stack of runs waiting to be merged, merging adjacent runs
1253  * until the stack invariants are re-established:
1254  *
1255  * 1. len[-3] > len[-2] + len[-1]
1256  * 2. len[-2] > len[-1]
1257  *
1258  * See listsort.txt for more info.
1259  *
1260  * Returns 0 on success, -1 on error.
1261  */
1262 template <class T>
1263 template <class Comp>
1264 int
1266 {
1267  struct s_slice *p = ms->pending;
1268 
1269  while (ms->n > 1)
1270  {
1271  octave_idx_type n = ms->n - 2;
1272  if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len)
1273  {
1274  if (p[n-1].len < p[n+1].len)
1275  --n;
1276  if (merge_at (n, data, comp) < 0)
1277  return -1;
1278  }
1279  else if (p[n].len <= p[n+1].len)
1280  {
1281  if (merge_at (n, data, comp) < 0)
1282  return -1;
1283  }
1284  else
1285  break;
1286  }
1287 
1288  return 0;
1289 }
1290 
1291 template <class T>
1292 template <class Comp>
1293 int
1295 {
1296  struct s_slice *p = ms->pending;
1297 
1298  while (ms->n > 1)
1299  {
1300  octave_idx_type n = ms->n - 2;
1301  if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len)
1302  {
1303  if (p[n-1].len < p[n+1].len)
1304  --n;
1305  if (merge_at (n, data, idx, comp) < 0)
1306  return -1;
1307  }
1308  else if (p[n].len <= p[n+1].len)
1309  {
1310  if (merge_at (n, data, idx, comp) < 0)
1311  return -1;
1312  }
1313  else
1314  break;
1315  }
1316 
1317  return 0;
1318 }
1319 
1320 /* Regardless of invariants, merge all runs on the stack until only one
1321  * remains. This is used at the end of the mergesort.
1322  *
1323  * Returns 0 on success, -1 on error.
1324  */
1325 template <class T>
1326 template <class Comp>
1327 int
1329 {
1330  struct s_slice *p = ms->pending;
1331 
1332  while (ms->n > 1)
1333  {
1334  octave_idx_type n = ms->n - 2;
1335  if (n > 0 && p[n-1].len < p[n+1].len)
1336  --n;
1337  if (merge_at (n, data, comp) < 0)
1338  return -1;
1339  }
1340 
1341  return 0;
1342 }
1343 
1344 template <class T>
1345 template <class Comp>
1346 int
1348 {
1349  struct s_slice *p = ms->pending;
1350 
1351  while (ms->n > 1)
1352  {
1353  octave_idx_type n = ms->n - 2;
1354  if (n > 0 && p[n-1].len < p[n+1].len)
1355  --n;
1356  if (merge_at (n, data, idx, comp) < 0)
1357  return -1;
1358  }
1359 
1360  return 0;
1361 }
1362 
1363 /* Compute a good value for the minimum run length; natural runs shorter
1364  * than this are boosted artificially via binary insertion.
1365  *
1366  * If n < 64, return n (it's too small to bother with fancy stuff).
1367  * Else if n is an exact power of 2, return 32.
1368  * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
1369  * strictly less than, an exact power of 2.
1370  *
1371  * See listsort.txt for more info.
1372  */
1373 template <class T>
1376 {
1377  octave_idx_type r = 0; /* becomes 1 if any 1 bits are shifted off */
1378 
1379  while (n >= 64)
1380  {
1381  r |= n & 1;
1382  n >>= 1;
1383  }
1384 
1385  return n + r;
1386 }
1387 
1388 template <class T>
1389 template <class Comp>
1390 void
1391 octave_sort<T>::sort (T *data, octave_idx_type nel, Comp comp)
1392 {
1393  /* Re-initialize the Mergestate as this might be the second time called */
1394  if (! ms) ms = new MergeState;
1395 
1396  ms->reset ();
1397  ms->getmem (1024);
1398 
1399  if (nel > 1)
1400  {
1401  octave_idx_type nremaining = nel;
1402  octave_idx_type lo = 0;
1403 
1404  /* March over the array once, left to right, finding natural runs,
1405  * and extending short natural runs to minrun elements.
1406  */
1407  octave_idx_type minrun = merge_compute_minrun (nremaining);
1408  do
1409  {
1410  bool descending;
1411  octave_idx_type n;
1412 
1413  /* Identify next run. */
1414  n = count_run (data + lo, nremaining, descending, comp);
1415  if (n < 0)
1416  goto fail;
1417  if (descending)
1418  std::reverse (data + lo, data + lo + n);
1419  /* If short, extend to min (minrun, nremaining). */
1420  if (n < minrun)
1421  {
1422  const octave_idx_type force = nremaining <= minrun ? nremaining
1423  : minrun;
1424  binarysort (data + lo, force, n, comp);
1425  n = force;
1426  }
1427  /* Push run onto pending-runs stack, and maybe merge. */
1428  assert (ms->n < MAX_MERGE_PENDING);
1429  ms->pending[ms->n].base = lo;
1430  ms->pending[ms->n].len = n;
1431  ms->n++;
1432  if (merge_collapse (data, comp) < 0)
1433  goto fail;
1434  /* Advance to find next run. */
1435  lo += n;
1436  nremaining -= n;
1437  }
1438  while (nremaining);
1439 
1440  merge_force_collapse (data, comp);
1441  }
1442 
1443 fail:
1444  return;
1445 }
1446 
1447 template <class T>
1448 template <class Comp>
1449 void
1451  Comp comp)
1452 {
1453  /* Re-initialize the Mergestate as this might be the second time called */
1454  if (! ms) ms = new MergeState;
1455 
1456  ms->reset ();
1457  ms->getmemi (1024);
1458 
1459  if (nel > 1)
1460  {
1461  octave_idx_type nremaining = nel;
1462  octave_idx_type lo = 0;
1463 
1464  /* March over the array once, left to right, finding natural runs,
1465  * and extending short natural runs to minrun elements.
1466  */
1467  octave_idx_type minrun = merge_compute_minrun (nremaining);
1468  do
1469  {
1470  bool descending;
1471  octave_idx_type n;
1472 
1473  /* Identify next run. */
1474  n = count_run (data + lo, nremaining, descending, comp);
1475  if (n < 0)
1476  goto fail;
1477  if (descending)
1478  {
1479  std::reverse (data + lo, data + lo + n);
1480  std::reverse (idx + lo, idx + lo + n);
1481  }
1482  /* If short, extend to min (minrun, nremaining). */
1483  if (n < minrun)
1484  {
1485  const octave_idx_type force = nremaining <= minrun ? nremaining
1486  : minrun;
1487  binarysort (data + lo, idx + lo, force, n, comp);
1488  n = force;
1489  }
1490  /* Push run onto pending-runs stack, and maybe merge. */
1491  assert (ms->n < MAX_MERGE_PENDING);
1492  ms->pending[ms->n].base = lo;
1493  ms->pending[ms->n].len = n;
1494  ms->n++;
1495  if (merge_collapse (data, idx, comp) < 0)
1496  goto fail;
1497  /* Advance to find next run. */
1498  lo += n;
1499  nremaining -= n;
1500  }
1501  while (nremaining);
1502 
1503  merge_force_collapse (data, idx, comp);
1504  }
1505 
1506 fail:
1507  return;
1508 }
1509 
1510 template <class T>
1511 void
1513 {
1514 #ifdef INLINE_ASCENDING_SORT
1515  if (compare == ascending_compare)
1516  sort (data, nel, std::less<T> ());
1517  else
1518 #endif
1519 #ifdef INLINE_DESCENDING_SORT
1520  if (compare == descending_compare)
1521  sort (data, nel, std::greater<T> ());
1522  else
1523 #endif
1524  if (compare)
1525  sort (data, nel, compare);
1526 }
1527 
1528 template <class T>
1529 void
1531 {
1532 #ifdef INLINE_ASCENDING_SORT
1533  if (compare == ascending_compare)
1534  sort (data, idx, nel, std::less<T> ());
1535  else
1536 #endif
1537 #ifdef INLINE_DESCENDING_SORT
1538  if (compare == descending_compare)
1539  sort (data, idx, nel, std::greater<T> ());
1540  else
1541 #endif
1542  if (compare)
1543  sort (data, idx, nel, compare);
1544 }
1545 
1546 template <class T> template <class Comp>
1547 bool
1548 octave_sort<T>::is_sorted (const T *data, octave_idx_type nel, Comp comp)
1549 {
1550  const T *end = data + nel;
1551  if (data != end)
1552  {
1553  const T *next = data;
1554  while (++next != end)
1555  {
1556  if (comp (*next, *data))
1557  break;
1558  data = next;
1559  }
1560  data = next;
1561  }
1562 
1563  return data == end;
1564 }
1565 
1566 template <class T>
1567 bool
1569 {
1570  bool retval = false;
1571 #ifdef INLINE_ASCENDING_SORT
1572  if (compare == ascending_compare)
1573  retval = is_sorted (data, nel, std::less<T> ());
1574  else
1575 #endif
1576 #ifdef INLINE_DESCENDING_SORT
1577  if (compare == descending_compare)
1578  retval = is_sorted (data, nel, std::greater<T> ());
1579  else
1580 #endif
1581  if (compare)
1582  retval = is_sorted (data, nel, compare);
1583 
1584  return retval;
1585 }
1586 
1587 // FIXME: is there really no way to make this local to the following function?
1589 {
1591  : col (c), ofs (o), nel (n) { }
1593 };
1594 
1595 
1596 template <class T> template <class Comp>
1597 void
1599  octave_idx_type rows, octave_idx_type cols,
1600  Comp comp)
1601 {
1602  OCTAVE_LOCAL_BUFFER (T, buf, rows);
1603  for (octave_idx_type i = 0; i < rows; i++)
1604  idx[i] = i;
1605 
1606  if (cols == 0 || rows <= 1)
1607  return;
1608 
1609 
1610  // This is a breadth-first traversal.
1611  typedef sortrows_run_t run_t;
1612  std::stack<run_t> runs;
1613 
1614  runs.push (run_t (0, 0, rows));
1615 
1616  while (! runs.empty ())
1617  {
1618  octave_idx_type col = runs.top ().col;
1619  octave_idx_type ofs = runs.top ().ofs;
1620  octave_idx_type nel = runs.top ().nel;
1621  runs.pop ();
1622  assert (nel > 1);
1623 
1624  T *lbuf = buf + ofs;
1625  const T *ldata = data + rows*col;
1626  octave_idx_type *lidx = idx + ofs;
1627 
1628  // Gather.
1629  for (octave_idx_type i = 0; i < nel; i++)
1630  lbuf[i] = ldata[lidx[i]];
1631 
1632  // Sort.
1633  sort (lbuf, lidx, nel, comp);
1634 
1635  // Identify constant runs and schedule subsorts.
1636  if (col < cols-1)
1637  {
1638  octave_idx_type lst = 0;
1639  for (octave_idx_type i = 0; i < nel; i++)
1640  {
1641  if (comp (lbuf[lst], lbuf[i]))
1642  {
1643  if (i > lst + 1)
1644  runs.push (run_t (col+1, ofs + lst, i - lst));
1645  lst = i;
1646  }
1647  }
1648  if (nel > lst + 1)
1649  runs.push (run_t (col+1, ofs + lst, nel - lst));
1650  }
1651  }
1652 }
1653 
1654 template <class T>
1655 void
1657  octave_idx_type rows, octave_idx_type cols)
1658 {
1659 #ifdef INLINE_ASCENDING_SORT
1660  if (compare == ascending_compare)
1661  sort_rows (data, idx, rows, cols, std::less<T> ());
1662  else
1663 #endif
1664 #ifdef INLINE_DESCENDING_SORT
1665  if (compare == descending_compare)
1666  sort_rows (data, idx, rows, cols, std::greater<T> ());
1667  else
1668 #endif
1669  if (compare)
1670  sort_rows (data, idx, rows, cols, compare);
1671 }
1672 
1673 template <class T> template <class Comp>
1674 bool
1676  octave_idx_type cols, Comp comp)
1677 {
1678  if (rows <= 1 || cols == 0)
1679  return true;
1680 
1681  // This is a breadth-first traversal.
1682  const T *lastrow = data + rows*(cols - 1);
1683  typedef std::pair<const T *, octave_idx_type> run_t;
1684  std::stack<run_t> runs;
1685 
1686  bool sorted = true;
1687  runs.push (run_t (data, rows));
1688  while (sorted && ! runs.empty ())
1689  {
1690  const T *lo = runs.top ().first;
1691  octave_idx_type n = runs.top ().second;
1692  runs.pop ();
1693  if (lo < lastrow)
1694  {
1695  // Not the final column.
1696  assert (n > 1);
1697  const T *hi = lo + n, *lst = lo;
1698  for (lo++; lo < hi; lo++)
1699  {
1700  if (comp (*lst, *lo))
1701  {
1702  if (lo > lst + 1)
1703  runs.push (run_t (lst + rows, lo - lst));
1704  lst = lo;
1705  }
1706  else if (comp (*lo, *lst))
1707  break;
1708 
1709  }
1710  if (lo == hi)
1711  {
1712  if (lo > lst + 1)
1713  runs.push (run_t (lst + rows, lo - lst));
1714  }
1715  else
1716  {
1717  sorted = false;
1718  break;
1719  }
1720  }
1721  else
1722  // The final column - use fast code.
1723  sorted = is_sorted (lo, n, comp);
1724  }
1725 
1726  return sorted;
1727 }
1728 
1729 template <class T>
1730 bool
1732  octave_idx_type cols)
1733 {
1734  bool retval = false;
1735 
1736 #ifdef INLINE_ASCENDING_SORT
1737  if (compare == ascending_compare)
1738  retval = is_sorted_rows (data, rows, cols, std::less<T> ());
1739  else
1740 #endif
1741 #ifdef INLINE_DESCENDING_SORT
1742  if (compare == descending_compare)
1743  retval = is_sorted_rows (data, rows, cols, std::greater<T> ());
1744  else
1745 #endif
1746  if (compare)
1747  retval = is_sorted_rows (data, rows, cols, compare);
1748 
1749  return retval;
1750 }
1751 
1752 // The simple binary lookup.
1753 
1754 template <class T> template <class Comp>
1757  const T& value, Comp comp)
1758 {
1759  octave_idx_type lo = 0, hi = nel;
1760 
1761  while (lo < hi)
1762  {
1763  octave_idx_type mid = lo + ((hi-lo) >> 1);
1764  if (comp (value, data[mid]))
1765  hi = mid;
1766  else
1767  lo = mid + 1;
1768  }
1769 
1770  return lo;
1771 }
1772 
1773 template <class T>
1776  const T& value)
1777 {
1778  octave_idx_type retval = 0;
1779 
1780 #ifdef INLINE_ASCENDING_SORT
1781  if (compare == ascending_compare)
1782  retval = lookup (data, nel, value, std::less<T> ());
1783  else
1784 #endif
1785 #ifdef INLINE_DESCENDING_SORT
1786  if (compare == descending_compare)
1787  retval = lookup (data, nel, value, std::greater<T> ());
1788  else
1789 #endif
1790  if (compare)
1791  retval = lookup (data, nel, value, std::ptr_fun (compare));
1792 
1793  return retval;
1794 }
1795 
1796 template <class T> template <class Comp>
1797 void
1799  const T *values, octave_idx_type nvalues,
1800  octave_idx_type *idx, Comp comp)
1801 {
1802  // Use a sequence of binary lookups.
1803  // TODO: Can this be sped up generally? The sorted merge case is dealt with
1804  // elsewhere.
1805  for (octave_idx_type j = 0; j < nvalues; j++)
1806  idx[j] = lookup (data, nel, values[j], comp);
1807 }
1808 
1809 template <class T>
1810 void
1812  const T* values, octave_idx_type nvalues,
1813  octave_idx_type *idx)
1814 {
1815 #ifdef INLINE_ASCENDING_SORT
1816  if (compare == ascending_compare)
1817  lookup (data, nel, values, nvalues, idx, std::less<T> ());
1818  else
1819 #endif
1820 #ifdef INLINE_DESCENDING_SORT
1821  if (compare == descending_compare)
1822  lookup (data, nel, values, nvalues, idx, std::greater<T> ());
1823  else
1824 #endif
1825  if (compare)
1826  lookup (data, nel, values, nvalues, idx, std::ptr_fun (compare));
1827 }
1828 
1829 template <class T> template <class Comp>
1830 void
1832  const T *values, octave_idx_type nvalues,
1833  octave_idx_type *idx, bool rev, Comp comp)
1834 {
1835  if (rev)
1836  {
1837  octave_idx_type i = 0, j = nvalues - 1;
1838 
1839  if (nvalues > 0 && nel > 0)
1840  {
1841  while (true)
1842  {
1843  if (comp (values[j], data[i]))
1844  {
1845  idx[j] = i;
1846  if (--j < 0)
1847  break;
1848  }
1849  else if (++i == nel)
1850  break;
1851  }
1852  }
1853 
1854  for (; j >= 0; j--)
1855  idx[j] = i;
1856  }
1857  else
1858  {
1859  octave_idx_type i = 0, j = 0;
1860 
1861  if (nvalues > 0 && nel > 0)
1862  {
1863  while (true)
1864  {
1865  if (comp (values[j], data[i]))
1866  {
1867  idx[j] = i;
1868  if (++j == nvalues)
1869  break;
1870  }
1871  else if (++i == nel)
1872  break;
1873  }
1874  }
1875 
1876  for (; j != nvalues; j++)
1877  idx[j] = i;
1878  }
1879 }
1880 
1881 template <class T>
1882 void
1884  const T* values, octave_idx_type nvalues,
1885  octave_idx_type *idx, bool rev)
1886 {
1887 #ifdef INLINE_ASCENDING_SORT
1888  if (compare == ascending_compare)
1889  lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ());
1890  else
1891 #endif
1892 #ifdef INLINE_DESCENDING_SORT
1893  if (compare == descending_compare)
1894  lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ());
1895  else
1896 #endif
1897  if (compare)
1898  lookup_sorted (data, nel, values, nvalues, idx, rev,
1899  std::ptr_fun (compare));
1900 }
1901 
1902 template <class T> template <class Comp>
1903 void
1906  Comp comp)
1907 {
1908  // Simply wrap the STL algorithms.
1909  // FIXME: this will fail if we attempt to inline <,> for Complex.
1910  if (up == lo+1)
1911  std::nth_element (data, data + lo, data + nel, comp);
1912  else if (lo == 0)
1913  std::partial_sort (data, data + up, data + nel, comp);
1914  else
1915  {
1916  std::nth_element (data, data + lo, data + nel, comp);
1917  if (up == lo + 2)
1918  {
1919  // Finding two subsequent elements.
1920  std::swap (data[lo+1],
1921  *std::min_element (data + lo + 1, data + nel, comp));
1922  }
1923  else
1924  std::partial_sort (data + lo + 1, data + up, data + nel, comp);
1925  }
1926 }
1927 
1928 template <class T>
1929 void
1932 {
1933  if (up < 0)
1934  up = lo + 1;
1935 #ifdef INLINE_ASCENDING_SORT
1936  if (compare == ascending_compare)
1937  nth_element (data, nel, lo, up, std::less<T> ());
1938  else
1939 #endif
1940 #ifdef INLINE_DESCENDING_SORT
1941  if (compare == descending_compare)
1942  nth_element (data, nel, lo, up, std::greater<T> ());
1943  else
1944 #endif
1945  if (compare)
1946  nth_element (data, nel, lo, up, std::ptr_fun (compare));
1947 }
1948 
1949 template <class T>
1950 bool
1952  typename ref_param<T>::type y)
1953 {
1954  return x < y;
1955 }
1956 
1957 template <class T>
1958 bool
1960  typename ref_param<T>::type y)
1961 {
1962  return x > y;
1963 }