...

Source file src/sort/sort.go

Documentation: sort

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run gen_sort_variants.go
     6  
     7  // Package sort provides primitives for sorting slices and user-defined collections.
     8  package sort
     9  
    10  import "math/bits"
    11  
    12  // An implementation of Interface can be sorted by the routines in this package.
    13  // The methods refer to elements of the underlying collection by integer index.
    14  type Interface interface {
    15  	// Len is the number of elements in the collection.
    16  	Len() int
    17  
    18  	// Less reports whether the element with index i
    19  	// must sort before the element with index j.
    20  	//
    21  	// If both Less(i, j) and Less(j, i) are false,
    22  	// then the elements at index i and j are considered equal.
    23  	// Sort may place equal elements in any order in the final result,
    24  	// while Stable preserves the original input order of equal elements.
    25  	//
    26  	// Less must describe a transitive ordering:
    27  	//  - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well.
    28  	//  - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well.
    29  	//
    30  	// Note that floating-point comparison (the < operator on float32 or float64 values)
    31  	// is not a transitive ordering when not-a-number (NaN) values are involved.
    32  	// See Float64Slice.Less for a correct implementation for floating-point values.
    33  	Less(i, j int) bool
    34  
    35  	// Swap swaps the elements with indexes i and j.
    36  	Swap(i, j int)
    37  }
    38  
    39  // Sort sorts data in ascending order as determined by the Less method.
    40  // It makes one call to data.Len to determine n and O(n*log(n)) calls to
    41  // data.Less and data.Swap. The sort is not guaranteed to be stable.
    42  //
    43  // Note: in many situations, the newer [slices.SortFunc] function is more
    44  // ergonomic and runs faster.
    45  func Sort(data Interface) {
    46  	n := data.Len()
    47  	if n <= 1 {
    48  		return
    49  	}
    50  	limit := bits.Len(uint(n))
    51  	pdqsort(data, 0, n, limit)
    52  }
    53  
    54  type sortedHint int // hint for pdqsort when choosing the pivot
    55  
    56  const (
    57  	unknownHint sortedHint = iota
    58  	increasingHint
    59  	decreasingHint
    60  )
    61  
    62  // xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
    63  type xorshift uint64
    64  
    65  func (r *xorshift) Next() uint64 {
    66  	*r ^= *r << 13
    67  	*r ^= *r >> 17
    68  	*r ^= *r << 5
    69  	return uint64(*r)
    70  }
    71  
    72  func nextPowerOfTwo(length int) uint {
    73  	shift := uint(bits.Len(uint(length)))
    74  	return uint(1 << shift)
    75  }
    76  
    77  // lessSwap is a pair of Less and Swap function for use with the
    78  // auto-generated func-optimized variant of sort.go in
    79  // zfuncversion.go.
    80  type lessSwap struct {
    81  	Less func(i, j int) bool
    82  	Swap func(i, j int)
    83  }
    84  
    85  type reverse struct {
    86  	// This embedded Interface permits Reverse to use the methods of
    87  	// another Interface implementation.
    88  	Interface
    89  }
    90  
    91  // Less returns the opposite of the embedded implementation's Less method.
    92  func (r reverse) Less(i, j int) bool {
    93  	return r.Interface.Less(j, i)
    94  }
    95  
    96  // Reverse returns the reverse order for data.
    97  func Reverse(data Interface) Interface {
    98  	return &reverse{data}
    99  }
   100  
   101  // IsSorted reports whether data is sorted.
   102  //
   103  // Note: in many situations, the newer [slices.IsSortedFunc] function is more
   104  // ergonomic and runs faster.
   105  func IsSorted(data Interface) bool {
   106  	n := data.Len()
   107  	for i := n - 1; i > 0; i-- {
   108  		if data.Less(i, i-1) {
   109  			return false
   110  		}
   111  	}
   112  	return true
   113  }
   114  
   115  // Convenience types for common cases
   116  
   117  // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   118  type IntSlice []int
   119  
   120  func (x IntSlice) Len() int           { return len(x) }
   121  func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] }
   122  func (x IntSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   123  
   124  // Sort is a convenience method: x.Sort() calls Sort(x).
   125  func (x IntSlice) Sort() { Sort(x) }
   126  
   127  // Float64Slice implements Interface for a []float64, sorting in increasing order,
   128  // with not-a-number (NaN) values ordered before other values.
   129  type Float64Slice []float64
   130  
   131  func (x Float64Slice) Len() int { return len(x) }
   132  
   133  // Less reports whether x[i] should be ordered before x[j], as required by the sort Interface.
   134  // Note that floating-point comparison by itself is not a transitive relation: it does not
   135  // report a consistent ordering for not-a-number (NaN) values.
   136  // This implementation of Less places NaN values before any others, by using:
   137  //
   138  //	x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j]))
   139  func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) }
   140  func (x Float64Slice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   141  
   142  // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   143  func isNaN(f float64) bool {
   144  	return f != f
   145  }
   146  
   147  // Sort is a convenience method: x.Sort() calls Sort(x).
   148  func (x Float64Slice) Sort() { Sort(x) }
   149  
   150  // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   151  type StringSlice []string
   152  
   153  func (x StringSlice) Len() int           { return len(x) }
   154  func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] }
   155  func (x StringSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   156  
   157  // Sort is a convenience method: x.Sort() calls Sort(x).
   158  func (x StringSlice) Sort() { Sort(x) }
   159  
   160  // Convenience wrappers for common cases
   161  
   162  // Ints sorts a slice of ints in increasing order.
   163  //
   164  // Note: as of Go 1.22, this function simply calls [slices.Sort].
   165  func Ints(x []int) { intsImpl(x) }
   166  
   167  // Float64s sorts a slice of float64s in increasing order.
   168  // Not-a-number (NaN) values are ordered before other values.
   169  //
   170  // Note: as of Go 1.22, this function simply calls [slices.Sort].
   171  func Float64s(x []float64) { float64sImpl(x) }
   172  
   173  // Strings sorts a slice of strings in increasing order.
   174  //
   175  // Note: as of Go 1.22, this function simply calls [slices.Sort].
   176  func Strings(x []string) { stringsImpl(x) }
   177  
   178  // IntsAreSorted reports whether the slice x is sorted in increasing order.
   179  //
   180  // Note: as of Go 1.22, this function simply calls [slices.IsSorted].
   181  func IntsAreSorted(x []int) bool { return intsAreSortedImpl(x) }
   182  
   183  // Float64sAreSorted reports whether the slice x is sorted in increasing order,
   184  // with not-a-number (NaN) values before any other values.
   185  //
   186  // Note: as of Go 1.22, this function simply calls [slices.IsSorted].
   187  func Float64sAreSorted(x []float64) bool { return float64sAreSortedImpl(x) }
   188  
   189  // StringsAreSorted reports whether the slice x is sorted in increasing order.
   190  //
   191  // Note: as of Go 1.22, this function simply calls [slices.IsSorted].
   192  func StringsAreSorted(x []string) bool { return stringsAreSortedImpl(x) }
   193  
   194  // Notes on stable sorting:
   195  // The used algorithms are simple and provable correct on all input and use
   196  // only logarithmic additional stack space. They perform well if compared
   197  // experimentally to other stable in-place sorting algorithms.
   198  //
   199  // Remarks on other algorithms evaluated:
   200  //  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   201  //    Not faster.
   202  //  - GCC's __rotate for block rotations: Not faster.
   203  //  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   204  //    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   205  //    The given algorithms are in-place, number of Swap and Assignments
   206  //    grow as n log n but the algorithm is not stable.
   207  //  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   208  //    V. Raman in Algorithmica (1996) 16, 115-160:
   209  //    This algorithm either needs additional 2n bits or works only if there
   210  //    are enough different elements available to encode some permutations
   211  //    which have to be undone later (so not stable on any input).
   212  //  - All the optimal in-place sorting/merging algorithms I found are either
   213  //    unstable or rely on enough different elements in each step to encode the
   214  //    performed block rearrangements. See also "In-Place Merging Algorithms",
   215  //    Denham Coates-Evely, Department of Computer Science, Kings College,
   216  //    January 2004 and the references in there.
   217  //  - Often "optimal" algorithms are optimal in the number of assignments
   218  //    but Interface has only Swap as operation.
   219  
   220  // Stable sorts data in ascending order as determined by the Less method,
   221  // while keeping the original order of equal elements.
   222  //
   223  // It makes one call to data.Len to determine n, O(n*log(n)) calls to
   224  // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   225  //
   226  // Note: in many situations, the newer slices.SortStableFunc function is more
   227  // ergonomic and runs faster.
   228  func Stable(data Interface) {
   229  	stable(data, data.Len())
   230  }
   231  
   232  /*
   233  Complexity of Stable Sorting
   234  
   235  
   236  Complexity of block swapping rotation
   237  
   238  Each Swap puts one new element into its correct, final position.
   239  Elements which reach their final position are no longer moved.
   240  Thus block swapping rotation needs |u|+|v| calls to Swaps.
   241  This is best possible as each element might need a move.
   242  
   243  Pay attention when comparing to other optimal algorithms which
   244  typically count the number of assignments instead of swaps:
   245  E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   246  rotations uses O(u + v + gcd(u,v)) assignments which is
   247  better than our O(3 * (u+v)) as gcd(u,v) <= u.
   248  
   249  
   250  Stable sorting by SymMerge and BlockSwap rotations
   251  
   252  SymMerg complexity for same size input M = N:
   253  Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   254  Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   255  
   256  (The following argument does not fuzz over a missing -1 or
   257  other stuff which does not impact the final result).
   258  
   259  Let n = data.Len(). Assume n = 2^k.
   260  
   261  Plain merge sort performs log(n) = k iterations.
   262  On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   263  
   264  Thus iteration i of merge sort performs:
   265  Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   266  Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   267  
   268  In total k = log(n) iterations are performed; so in total:
   269  Calls to Less O(log(n) * n)
   270  Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   271     = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   272  
   273  
   274  Above results should generalize to arbitrary n = 2^k + p
   275  and should not be influenced by the initial insertion sort phase:
   276  Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   277  size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   278  Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   279  Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   280     = O(n * log(n))
   281  Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   282  
   283  */
   284  

View as plain text