Quicksort
#| Recursive, single-thread, random pivot, single-pass, quicksort implementation
multi quicksort(\a where a.elems < 2) { a }
multi quicksort(\a, \pivot = a.pick) {
my %prt{Order} is default([]) = a.classify: * cmp pivot;
|samewith(%prt{Less}), |%prt{Same}, |samewith(%prt{More})
}
concurrent implementation
The partitions can be sorted in parallel.
#| Recursive, parallel, random pivot, single-pass, quicksort implementation
multi quicksort-parallel-naive(\a where a.elems < 2) { a }
multi quicksort-parallel-naive(\a, \pivot = a.pick) {
my %prt{Order} is default([]) = a.classify: * cmp pivot;
my Promise $less = start { samewith(%prt{Less}) }
my $more = samewith(%prt{More});
await $less andthen |$less.result, |%prt{Same}, |$more;
}
Let's tune the parallel execution by applying a minimum batch size in order to spawn a new thread.
#| Recursive, parallel, batch tuned, single-pass, quicksort implementation
sub quicksort-parallel(@a, $batch = 2**9) {
return @a if @a.elems < 2;
# separate unsorted input into Order Less, Same and More compared to a random $pivot
my $pivot = @a.pick;
my %prt{Order} is default([]) = @a.classify( * cmp $pivot );
# decide if we sort the Less partition on a new thread
my $less = %prt{Less}.elems >= $batch
?? start { samewith(%prt{Less}, $batch) }
!! samewith(%prt{Less}, $batch);
# meanwhile use current thread for sorting the More partition
my $more = samewith(%prt{More}, $batch);
# if we went parallel, we need to await the result
await $less andthen $less = $less.result if $less ~~ Promise;
# concat all sorted partitions into a list and return
|$less, |%prt{Same}, |$more;
}
testing
Let's run some tests.
say "x" x 10 ~ " Testing " ~ "x" x 10;
use Test;
my @functions-under-test = &quicksort, &quicksort-parallel-naive, &quicksort-parallel;
my @testcases =
() => (),
<a>.List => <a>.List,
<a a> => <a a>,
("b", "a", 3) => (3, "a", "b"),
<h b a c d f e g> => <a b c d e f g h>,
<a 🎮 3 z 4 🐧> => <a 🎮 3 z 4 🐧>.sort
;
plan @testcases.elems * @functions-under-test.elems;
for @functions-under-test -> &fun {
say &fun.name;
is-deeply &fun(.key), .value, .key ~ " => " ~ .value for @testcases;
}
done-testing;
Output:
xxxxxxxxxx Testing xxxxxxxxxx
1..18
quicksort
ok 1 - =>
ok 2 - a => a
ok 3 - a a => a a
ok 4 - b a 3 => 3 a b
ok 5 - h b a c d f e g => a b c d e f g h
ok 6 - a 🎮 3 z 4 🐧 => 3 4 a z 🎮 🐧
quicksort-parallel-naive
ok 7 - =>
ok 8 - a => a
ok 9 - a a => a a
ok 10 - b a 3 => 3 a b
ok 11 - h b a c d f e g => a b c d e f g h
ok 12 - a 🎮 3 z 4 🐧 => 3 4 a z 🎮 🐧
quicksort-parallel
ok 13 - =>
ok 14 - a => a
ok 15 - a a => a a
ok 16 - b a 3 => 3 a b
ok 17 - h b a c d f e g => a b c d e f g h
ok 18 - a 🎮 3 z 4 🐧 => 3 4 a z 🎮 🐧
benchmarking
and some benchmarking
say "x" x 11 ~ " Benchmarking " ~ "x" x 11;
use Benchmark;
my $runs = 5;
my $elems = 10 * Kernel.cpu-cores * 2**10;
my @unsorted of Str = ('a'..'z').roll(8).join xx $elems;
my UInt $l-batch = 2**13;
my UInt $m-batch = 2**11;
my UInt $s-batch = 2**9;
my UInt $t-batch = 2**7;
say "elements: $elems, runs: $runs, cpu-cores: {Kernel.cpu-cores}, large/medium/small/tiny-batch: $l-batch/$m-batch/$s-batch/$t-batch";
my %results = timethese $runs, {
single-thread => { quicksort(@unsorted) },
parallel-naive => { quicksort-parallel-naive(@unsorted) },
parallel-tiny-batch => { quicksort-parallel(@unsorted, $t-batch) },
parallel-small-batch => { quicksort-parallel(@unsorted, $s-batch) },
parallel-medium-batch => { quicksort-parallel(@unsorted, $m-batch) },
parallel-large-batch => { quicksort-parallel(@unsorted, $l-batch) },
}, :statistics;
my @metrics = <mean median sd>;
my $msg-row = "%.4f\t" x @metrics.elems ~ '%s';
say @metrics.join("\t");
for %results.kv -> $name, %m {
say sprintf($msg-row, %m{@metrics}, $name);
}
Output:
xxxxxxxxxxx Benchmarking xxxxxxxxxxx
elements: 40960, runs: 5, cpu-cores: 4, large/medium/small/tiny-batch: 8192/2048/512/128
mean median sd
2.9503 2.8907 0.2071 parallel-small-batch
3.2054 3.1727 0.2078 parallel-tiny-batch
5.6524 5.0980 1.2628 parallel-naive
3.4717 3.3353 0.3622 parallel-medium-batch
4.6275 4.7793 0.4930 parallel-large-batch
6.5401 6.2832 0.5585 single-thread
Last updated