Source file
src/sort/gen_sort_variants.go
Documentation: sort
1
2
3
4
5
6
7
8
9
10
11
12 package main
13
14 import (
15 "bytes"
16 "flag"
17 "fmt"
18 "go/format"
19 "log"
20 "os"
21 "text/template"
22 )
23
24 type Variant struct {
25
26 Name string
27
28
29
30 Path string
31
32
33 Package string
34
35
36 Imports string
37
38
39
40 FuncSuffix string
41
42
43
44 DataType string
45
46
47 TypeParam string
48
49
50
51 ExtraParam string
52
53
54
55 ExtraArg string
56
57
58
59
60
61
62
63
64
65
66
67 Funcs template.FuncMap
68 }
69
70 var (
71 traditionalVariants = []Variant{
72 Variant{
73 Name: "interface",
74 Path: "zsortinterface.go",
75 Package: "sort",
76 Imports: "",
77 FuncSuffix: "",
78 TypeParam: "",
79 ExtraParam: "",
80 ExtraArg: "",
81 DataType: "Interface",
82 Funcs: template.FuncMap{
83 "Less": func(name, i, j string) string {
84 return fmt.Sprintf("%s.Less(%s, %s)", name, i, j)
85 },
86 "Swap": func(name, i, j string) string {
87 return fmt.Sprintf("%s.Swap(%s, %s)", name, i, j)
88 },
89 },
90 },
91 Variant{
92 Name: "func",
93 Path: "zsortfunc.go",
94 Package: "sort",
95 Imports: "",
96 FuncSuffix: "_func",
97 TypeParam: "",
98 ExtraParam: "",
99 ExtraArg: "",
100 DataType: "lessSwap",
101 Funcs: template.FuncMap{
102 "Less": func(name, i, j string) string {
103 return fmt.Sprintf("%s.Less(%s, %s)", name, i, j)
104 },
105 "Swap": func(name, i, j string) string {
106 return fmt.Sprintf("%s.Swap(%s, %s)", name, i, j)
107 },
108 },
109 },
110 }
111
112 genericVariants = []Variant{
113 Variant{
114 Name: "generic_ordered",
115 Path: "zsortordered.go",
116 Package: "slices",
117 Imports: "import \"cmp\"\n",
118 FuncSuffix: "Ordered",
119 TypeParam: "[E cmp.Ordered]",
120 ExtraParam: "",
121 ExtraArg: "",
122 DataType: "[]E",
123 Funcs: template.FuncMap{
124 "Less": func(name, i, j string) string {
125 return fmt.Sprintf("cmp.Less(%s[%s], %s[%s])", name, i, name, j)
126 },
127 "Swap": func(name, i, j string) string {
128 return fmt.Sprintf("%s[%s], %s[%s] = %s[%s], %s[%s]", name, i, name, j, name, j, name, i)
129 },
130 },
131 },
132 Variant{
133 Name: "generic_func",
134 Path: "zsortanyfunc.go",
135 Package: "slices",
136 FuncSuffix: "CmpFunc",
137 TypeParam: "[E any]",
138 ExtraParam: ", cmp func(a, b E) int",
139 ExtraArg: ", cmp",
140 DataType: "[]E",
141 Funcs: template.FuncMap{
142 "Less": func(name, i, j string) string {
143 return fmt.Sprintf("(cmp(%s[%s], %s[%s]) < 0)", name, i, name, j)
144 },
145 "Swap": func(name, i, j string) string {
146 return fmt.Sprintf("%s[%s], %s[%s] = %s[%s], %s[%s]", name, i, name, j, name, j, name, i)
147 },
148 },
149 },
150 }
151
152 expVariants = []Variant{
153 Variant{
154 Name: "exp_ordered",
155 Path: "zsortordered.go",
156 Package: "slices",
157 Imports: "import \"golang.org/x/exp/constraints\"\n",
158 FuncSuffix: "Ordered",
159 TypeParam: "[E constraints.Ordered]",
160 ExtraParam: "",
161 ExtraArg: "",
162 DataType: "[]E",
163 Funcs: template.FuncMap{
164 "Less": func(name, i, j string) string {
165 return fmt.Sprintf("cmpLess(%s[%s], %s[%s])", name, i, name, j)
166 },
167 "Swap": func(name, i, j string) string {
168 return fmt.Sprintf("%s[%s], %s[%s] = %s[%s], %s[%s]", name, i, name, j, name, j, name, i)
169 },
170 },
171 },
172 Variant{
173 Name: "exp_func",
174 Path: "zsortanyfunc.go",
175 Package: "slices",
176 FuncSuffix: "CmpFunc",
177 TypeParam: "[E any]",
178 ExtraParam: ", cmp func(a, b E) int",
179 ExtraArg: ", cmp",
180 DataType: "[]E",
181 Funcs: template.FuncMap{
182 "Less": func(name, i, j string) string {
183 return fmt.Sprintf("(cmp(%s[%s], %s[%s]) < 0)", name, i, name, j)
184 },
185 "Swap": func(name, i, j string) string {
186 return fmt.Sprintf("%s[%s], %s[%s] = %s[%s], %s[%s]", name, i, name, j, name, j, name, i)
187 },
188 },
189 },
190 }
191 )
192
193 func main() {
194 genGeneric := flag.Bool("generic", false, "generate generic versions")
195 genExp := flag.Bool("exp", false, "generate x/exp/slices versions")
196 flag.Parse()
197
198 var variants []Variant
199 if *genExp {
200 variants = expVariants
201 } else if *genGeneric {
202 variants = genericVariants
203 } else {
204 variants = traditionalVariants
205 }
206 for i := range variants {
207 generate(&variants[i])
208 }
209 }
210
211
212 func generate(v *Variant) {
213
214
215 tmpl, err := template.New("gen").Funcs(v.Funcs).Parse(templateCode)
216 if err != nil {
217 log.Fatal("template Parse:", err)
218 }
219
220 var out bytes.Buffer
221 err = tmpl.Execute(&out, v)
222 if err != nil {
223 log.Fatal("template Execute:", err)
224 }
225
226 formatted, err := format.Source(out.Bytes())
227 if err != nil {
228 log.Fatal("format:", err)
229 }
230
231 if err := os.WriteFile(v.Path, formatted, 0644); err != nil {
232 log.Fatal("WriteFile:", err)
233 }
234 }
235
236 var templateCode = `// Code generated by gen_sort_variants.go; DO NOT EDIT.
237
238 // Copyright 2022 The Go Authors. All rights reserved.
239 // Use of this source code is governed by a BSD-style
240 // license that can be found in the LICENSE file.
241
242 package {{.Package}}
243
244 {{.Imports}}
245
246 // insertionSort{{.FuncSuffix}} sorts data[a:b] using insertion sort.
247 func insertionSort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) {
248 for i := a + 1; i < b; i++ {
249 for j := i; j > a && {{Less "data" "j" "j-1"}}; j-- {
250 {{Swap "data" "j" "j-1"}}
251 }
252 }
253 }
254
255 // siftDown{{.FuncSuffix}} implements the heap property on data[lo:hi].
256 // first is an offset into the array where the root of the heap lies.
257 func siftDown{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, lo, hi, first int {{.ExtraParam}}) {
258 root := lo
259 for {
260 child := 2*root + 1
261 if child >= hi {
262 break
263 }
264 if child+1 < hi && {{Less "data" "first+child" "first+child+1"}} {
265 child++
266 }
267 if !{{Less "data" "first+root" "first+child"}} {
268 return
269 }
270 {{Swap "data" "first+root" "first+child"}}
271 root = child
272 }
273 }
274
275 func heapSort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) {
276 first := a
277 lo := 0
278 hi := b - a
279
280 // Build heap with greatest element at top.
281 for i := (hi - 1) / 2; i >= 0; i-- {
282 siftDown{{.FuncSuffix}}(data, i, hi, first {{.ExtraArg}})
283 }
284
285 // Pop elements, largest first, into end of data.
286 for i := hi - 1; i >= 0; i-- {
287 {{Swap "data" "first" "first+i"}}
288 siftDown{{.FuncSuffix}}(data, lo, i, first {{.ExtraArg}})
289 }
290 }
291
292 // pdqsort{{.FuncSuffix}} sorts data[a:b].
293 // The algorithm based on pattern-defeating quicksort(pdqsort), but without the optimizations from BlockQuicksort.
294 // pdqsort paper: https://arxiv.org/pdf/2106.05123.pdf
295 // C++ implementation: https://github.com/orlp/pdqsort
296 // Rust implementation: https://docs.rs/pdqsort/latest/pdqsort/
297 // limit is the number of allowed bad (very unbalanced) pivots before falling back to heapsort.
298 func pdqsort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, limit int {{.ExtraParam}}) {
299 const maxInsertion = 12
300
301 var (
302 wasBalanced = true // whether the last partitioning was reasonably balanced
303 wasPartitioned = true // whether the slice was already partitioned
304 )
305
306 for {
307 length := b - a
308
309 if length <= maxInsertion {
310 insertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
311 return
312 }
313
314 // Fall back to heapsort if too many bad choices were made.
315 if limit == 0 {
316 heapSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
317 return
318 }
319
320 // If the last partitioning was imbalanced, we need to breaking patterns.
321 if !wasBalanced {
322 breakPatterns{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
323 limit--
324 }
325
326 pivot, hint := choosePivot{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
327 if hint == decreasingHint {
328 reverseRange{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
329 // The chosen pivot was pivot-a elements after the start of the array.
330 // After reversing it is pivot-a elements before the end of the array.
331 // The idea came from Rust's implementation.
332 pivot = (b - 1) - (pivot - a)
333 hint = increasingHint
334 }
335
336 // The slice is likely already sorted.
337 if wasBalanced && wasPartitioned && hint == increasingHint {
338 if partialInsertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}}) {
339 return
340 }
341 }
342
343 // Probably the slice contains many duplicate elements, partition the slice into
344 // elements equal to and elements greater than the pivot.
345 if a > 0 && !{{Less "data" "a-1" "pivot"}} {
346 mid := partitionEqual{{.FuncSuffix}}(data, a, b, pivot {{.ExtraArg}})
347 a = mid
348 continue
349 }
350
351 mid, alreadyPartitioned := partition{{.FuncSuffix}}(data, a, b, pivot {{.ExtraArg}})
352 wasPartitioned = alreadyPartitioned
353
354 leftLen, rightLen := mid-a, b-mid
355 balanceThreshold := length / 8
356 if leftLen < rightLen {
357 wasBalanced = leftLen >= balanceThreshold
358 pdqsort{{.FuncSuffix}}(data, a, mid, limit {{.ExtraArg}})
359 a = mid + 1
360 } else {
361 wasBalanced = rightLen >= balanceThreshold
362 pdqsort{{.FuncSuffix}}(data, mid+1, b, limit {{.ExtraArg}})
363 b = mid
364 }
365 }
366 }
367
368 // partition{{.FuncSuffix}} does one quicksort partition.
369 // Let p = data[pivot]
370 // Moves elements in data[a:b] around, so that data[i]<p and data[j]>=p for i<newpivot and j>newpivot.
371 // On return, data[newpivot] = p
372 func partition{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivot int {{.ExtraParam}}) (newpivot int, alreadyPartitioned bool) {
373 {{Swap "data" "a" "pivot"}}
374 i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
375
376 for i <= j && {{Less "data" "i" "a"}} {
377 i++
378 }
379 for i <= j && !{{Less "data" "j" "a"}} {
380 j--
381 }
382 if i > j {
383 {{Swap "data" "j" "a"}}
384 return j, true
385 }
386 {{Swap "data" "i" "j"}}
387 i++
388 j--
389
390 for {
391 for i <= j && {{Less "data" "i" "a"}} {
392 i++
393 }
394 for i <= j && !{{Less "data" "j" "a"}} {
395 j--
396 }
397 if i > j {
398 break
399 }
400 {{Swap "data" "i" "j"}}
401 i++
402 j--
403 }
404 {{Swap "data" "j" "a"}}
405 return j, false
406 }
407
408 // partitionEqual{{.FuncSuffix}} partitions data[a:b] into elements equal to data[pivot] followed by elements greater than data[pivot].
409 // It assumed that data[a:b] does not contain elements smaller than the data[pivot].
410 func partitionEqual{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, pivot int {{.ExtraParam}}) (newpivot int) {
411 {{Swap "data" "a" "pivot"}}
412 i, j := a+1, b-1 // i and j are inclusive of the elements remaining to be partitioned
413
414 for {
415 for i <= j && !{{Less "data" "a" "i"}} {
416 i++
417 }
418 for i <= j && {{Less "data" "a" "j"}} {
419 j--
420 }
421 if i > j {
422 break
423 }
424 {{Swap "data" "i" "j"}}
425 i++
426 j--
427 }
428 return i
429 }
430
431 // partialInsertionSort{{.FuncSuffix}} partially sorts a slice, returns true if the slice is sorted at the end.
432 func partialInsertionSort{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) bool {
433 const (
434 maxSteps = 5 // maximum number of adjacent out-of-order pairs that will get shifted
435 shortestShifting = 50 // don't shift any elements on short arrays
436 )
437 i := a + 1
438 for j := 0; j < maxSteps; j++ {
439 for i < b && !{{Less "data" "i" "i-1"}} {
440 i++
441 }
442
443 if i == b {
444 return true
445 }
446
447 if b-a < shortestShifting {
448 return false
449 }
450
451 {{Swap "data" "i" "i-1"}}
452
453 // Shift the smaller one to the left.
454 if i-a >= 2 {
455 for j := i - 1; j >= 1; j-- {
456 if !{{Less "data" "j" "j-1"}} {
457 break
458 }
459 {{Swap "data" "j" "j-1"}}
460 }
461 }
462 // Shift the greater one to the right.
463 if b-i >= 2 {
464 for j := i + 1; j < b; j++ {
465 if !{{Less "data" "j" "j-1"}} {
466 break
467 }
468 {{Swap "data" "j" "j-1"}}
469 }
470 }
471 }
472 return false
473 }
474
475 // breakPatterns{{.FuncSuffix}} scatters some elements around in an attempt to break some patterns
476 // that might cause imbalanced partitions in quicksort.
477 func breakPatterns{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) {
478 length := b - a
479 if length >= 8 {
480 random := xorshift(length)
481 modulus := nextPowerOfTwo(length)
482
483 for idx := a + (length/4)*2 - 1; idx <= a + (length/4)*2 + 1; idx++ {
484 other := int(uint(random.Next()) & (modulus - 1))
485 if other >= length {
486 other -= length
487 }
488 {{Swap "data" "idx" "a+other"}}
489 }
490 }
491 }
492
493 // choosePivot{{.FuncSuffix}} chooses a pivot in data[a:b].
494 //
495 // [0,8): chooses a static pivot.
496 // [8,shortestNinther): uses the simple median-of-three method.
497 // [shortestNinther,∞): uses the Tukey ninther method.
498 func choosePivot{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) (pivot int, hint sortedHint) {
499 const (
500 shortestNinther = 50
501 maxSwaps = 4 * 3
502 )
503
504 l := b - a
505
506 var (
507 swaps int
508 i = a + l/4*1
509 j = a + l/4*2
510 k = a + l/4*3
511 )
512
513 if l >= 8 {
514 if l >= shortestNinther {
515 // Tukey ninther method, the idea came from Rust's implementation.
516 i = medianAdjacent{{.FuncSuffix}}(data, i, &swaps {{.ExtraArg}})
517 j = medianAdjacent{{.FuncSuffix}}(data, j, &swaps {{.ExtraArg}})
518 k = medianAdjacent{{.FuncSuffix}}(data, k, &swaps {{.ExtraArg}})
519 }
520 // Find the median among i, j, k and stores it into j.
521 j = median{{.FuncSuffix}}(data, i, j, k, &swaps {{.ExtraArg}})
522 }
523
524 switch swaps {
525 case 0:
526 return j, increasingHint
527 case maxSwaps:
528 return j, decreasingHint
529 default:
530 return j, unknownHint
531 }
532 }
533
534 // order2{{.FuncSuffix}} returns x,y where data[x] <= data[y], where x,y=a,b or x,y=b,a.
535 func order2{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int, swaps *int {{.ExtraParam}}) (int, int) {
536 if {{Less "data" "b" "a"}} {
537 *swaps++
538 return b, a
539 }
540 return a, b
541 }
542
543 // median{{.FuncSuffix}} returns x where data[x] is the median of data[a],data[b],data[c], where x is a, b, or c.
544 func median{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, c int, swaps *int {{.ExtraParam}}) int {
545 a, b = order2{{.FuncSuffix}}(data, a, b, swaps {{.ExtraArg}})
546 b, c = order2{{.FuncSuffix}}(data, b, c, swaps {{.ExtraArg}})
547 a, b = order2{{.FuncSuffix}}(data, a, b, swaps {{.ExtraArg}})
548 return b
549 }
550
551 // medianAdjacent{{.FuncSuffix}} finds the median of data[a - 1], data[a], data[a + 1] and stores the index into a.
552 func medianAdjacent{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a int, swaps *int {{.ExtraParam}}) int {
553 return median{{.FuncSuffix}}(data, a-1, a, a+1, swaps {{.ExtraArg}})
554 }
555
556 func reverseRange{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b int {{.ExtraParam}}) {
557 i := a
558 j := b - 1
559 for i < j {
560 {{Swap "data" "i" "j"}}
561 i++
562 j--
563 }
564 }
565
566 func swapRange{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, b, n int {{.ExtraParam}}) {
567 for i := 0; i < n; i++ {
568 {{Swap "data" "a+i" "b+i"}}
569 }
570 }
571
572 func stable{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, n int {{.ExtraParam}}) {
573 blockSize := 20 // must be > 0
574 a, b := 0, blockSize
575 for b <= n {
576 insertionSort{{.FuncSuffix}}(data, a, b {{.ExtraArg}})
577 a = b
578 b += blockSize
579 }
580 insertionSort{{.FuncSuffix}}(data, a, n {{.ExtraArg}})
581
582 for blockSize < n {
583 a, b = 0, 2*blockSize
584 for b <= n {
585 symMerge{{.FuncSuffix}}(data, a, a+blockSize, b {{.ExtraArg}})
586 a = b
587 b += 2 * blockSize
588 }
589 if m := a + blockSize; m < n {
590 symMerge{{.FuncSuffix}}(data, a, m, n {{.ExtraArg}})
591 }
592 blockSize *= 2
593 }
594 }
595
596 // symMerge{{.FuncSuffix}} merges the two sorted subsequences data[a:m] and data[m:b] using
597 // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
598 // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
599 // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
600 // Computer Science, pages 714-723. Springer, 2004.
601 //
602 // Let M = m-a and N = b-n. Wolog M < N.
603 // The recursion depth is bound by ceil(log(N+M)).
604 // The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
605 // The algorithm needs O((M+N)*log(M)) calls to data.Swap.
606 //
607 // The paper gives O((M+N)*log(M)) as the number of assignments assuming a
608 // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
609 // in the paper carries through for Swap operations, especially as the block
610 // swapping rotate uses only O(M+N) Swaps.
611 //
612 // symMerge assumes non-degenerate arguments: a < m && m < b.
613 // Having the caller check this condition eliminates many leaf recursion calls,
614 // which improves performance.
615 func symMerge{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, m, b int {{.ExtraParam}}) {
616 // Avoid unnecessary recursions of symMerge
617 // by direct insertion of data[a] into data[m:b]
618 // if data[a:m] only contains one element.
619 if m-a == 1 {
620 // Use binary search to find the lowest index i
621 // such that data[i] >= data[a] for m <= i < b.
622 // Exit the search loop with i == b in case no such index exists.
623 i := m
624 j := b
625 for i < j {
626 h := int(uint(i+j) >> 1)
627 if {{Less "data" "h" "a"}} {
628 i = h + 1
629 } else {
630 j = h
631 }
632 }
633 // Swap values until data[a] reaches the position before i.
634 for k := a; k < i-1; k++ {
635 {{Swap "data" "k" "k+1"}}
636 }
637 return
638 }
639
640 // Avoid unnecessary recursions of symMerge
641 // by direct insertion of data[m] into data[a:m]
642 // if data[m:b] only contains one element.
643 if b-m == 1 {
644 // Use binary search to find the lowest index i
645 // such that data[i] > data[m] for a <= i < m.
646 // Exit the search loop with i == m in case no such index exists.
647 i := a
648 j := m
649 for i < j {
650 h := int(uint(i+j) >> 1)
651 if !{{Less "data" "m" "h"}} {
652 i = h + 1
653 } else {
654 j = h
655 }
656 }
657 // Swap values until data[m] reaches the position i.
658 for k := m; k > i; k-- {
659 {{Swap "data" "k" "k-1"}}
660 }
661 return
662 }
663
664 mid := int(uint(a+b) >> 1)
665 n := mid + m
666 var start, r int
667 if m > mid {
668 start = n - b
669 r = mid
670 } else {
671 start = a
672 r = m
673 }
674 p := n - 1
675
676 for start < r {
677 c := int(uint(start+r) >> 1)
678 if !{{Less "data" "p-c" "c"}} {
679 start = c + 1
680 } else {
681 r = c
682 }
683 }
684
685 end := n - start
686 if start < m && m < end {
687 rotate{{.FuncSuffix}}(data, start, m, end {{.ExtraArg}})
688 }
689 if a < start && start < mid {
690 symMerge{{.FuncSuffix}}(data, a, start, mid {{.ExtraArg}})
691 }
692 if mid < end && end < b {
693 symMerge{{.FuncSuffix}}(data, mid, end, b {{.ExtraArg}})
694 }
695 }
696
697 // rotate{{.FuncSuffix}} rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data:
698 // Data of the form 'x u v y' is changed to 'x v u y'.
699 // rotate performs at most b-a many calls to data.Swap,
700 // and it assumes non-degenerate arguments: a < m && m < b.
701 func rotate{{.FuncSuffix}}{{.TypeParam}}(data {{.DataType}}, a, m, b int {{.ExtraParam}}) {
702 i := m - a
703 j := b - m
704
705 for i != j {
706 if i > j {
707 swapRange{{.FuncSuffix}}(data, m-i, m, j {{.ExtraArg}})
708 i -= j
709 } else {
710 swapRange{{.FuncSuffix}}(data, m-i, m+j-i, i {{.ExtraArg}})
711 j -= i
712 }
713 }
714 // i == j
715 swapRange{{.FuncSuffix}}(data, m-i, m, i {{.ExtraArg}})
716 }
717 `
718
View as plain text