From d0840bfc5db83a8fe3a459c3ea30a7a7b374c116 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Tue, 10 Nov 2020 00:53:43 +0300 Subject: [PATCH] Move miniselect to submodule and replace quantile exact with better algorithm --- .gitmodules | 3 + contrib/miniselect | 1 + contrib/miniselect/.clang-format | 1 - contrib/miniselect/.gitignore | 100 -- contrib/miniselect/.travis.yml | 140 --- contrib/miniselect/AUTHORS | 2 - contrib/miniselect/CMakeLists.txt | 52 - contrib/miniselect/CONTRIBUTORS | 1 - contrib/miniselect/LICENSE_1_0.txt | 23 - contrib/miniselect/README.md | 272 ----- contrib/miniselect/benches/bench_common.h | 170 ---- .../miniselect/benches/benchmark_select.cpp | 46 - contrib/miniselect/benches/benchmark_sort.cpp | 46 - contrib/miniselect/examples/example.cpp | 18 - contrib/miniselect/fuzz/CMakeLists.txt | 38 - .../miniselect/fuzz/build_like_oss_fuzz.sh | 22 - contrib/miniselect/fuzz/fuzz_select.cpp | 66 -- contrib/miniselect/fuzz/fuzz_sort.cpp | 69 -- .../miniselect/fuzz/fuzz_string_select.cpp | 70 -- contrib/miniselect/fuzz/fuzz_string_sort.cpp | 73 -- contrib/miniselect/fuzz/main.cpp | 22 - contrib/miniselect/fuzz/ossfuzz.sh | 23 - .../include/miniselect/floyd_rivest_select.h | 120 --- .../include/miniselect/median_of_3_random.h | 69 -- .../include/miniselect/median_of_medians.h | 71 -- .../include/miniselect/median_of_ninthers.h | 190 ---- .../miniselect/include/miniselect/pdqselect.h | 935 ------------------ .../miniselect/private/median_common.h | 437 -------- contrib/miniselect/testing/test_common.h | 180 ---- contrib/miniselect/testing/test_select.cpp | 231 ----- contrib/miniselect/testing/test_sort.cpp | 161 --- src/AggregateFunctions/QuantileExact.h | 13 +- src/AggregateFunctions/QuantileTiming.h | 5 +- src/Columns/ColumnDecimal.cpp | 4 +- 34 files changed, 16 insertions(+), 3658 deletions(-) create mode 160000 contrib/miniselect delete mode 100644 contrib/miniselect/.clang-format delete mode 100644 contrib/miniselect/.gitignore delete mode 100644 contrib/miniselect/.travis.yml delete mode 100644 contrib/miniselect/AUTHORS delete mode 100644 contrib/miniselect/CMakeLists.txt delete mode 100644 contrib/miniselect/CONTRIBUTORS delete mode 100644 contrib/miniselect/LICENSE_1_0.txt delete mode 100644 contrib/miniselect/README.md delete mode 100644 contrib/miniselect/benches/bench_common.h delete mode 100644 contrib/miniselect/benches/benchmark_select.cpp delete mode 100644 contrib/miniselect/benches/benchmark_sort.cpp delete mode 100644 contrib/miniselect/examples/example.cpp delete mode 100644 contrib/miniselect/fuzz/CMakeLists.txt delete mode 100755 contrib/miniselect/fuzz/build_like_oss_fuzz.sh delete mode 100644 contrib/miniselect/fuzz/fuzz_select.cpp delete mode 100644 contrib/miniselect/fuzz/fuzz_sort.cpp delete mode 100644 contrib/miniselect/fuzz/fuzz_string_select.cpp delete mode 100644 contrib/miniselect/fuzz/fuzz_string_sort.cpp delete mode 100644 contrib/miniselect/fuzz/main.cpp delete mode 100755 contrib/miniselect/fuzz/ossfuzz.sh delete mode 100644 contrib/miniselect/include/miniselect/floyd_rivest_select.h delete mode 100644 contrib/miniselect/include/miniselect/median_of_3_random.h delete mode 100644 contrib/miniselect/include/miniselect/median_of_medians.h delete mode 100644 contrib/miniselect/include/miniselect/median_of_ninthers.h delete mode 100644 contrib/miniselect/include/miniselect/pdqselect.h delete mode 100644 contrib/miniselect/include/miniselect/private/median_common.h delete mode 100644 contrib/miniselect/testing/test_common.h delete mode 100644 contrib/miniselect/testing/test_select.cpp delete mode 100644 contrib/miniselect/testing/test_sort.cpp diff --git a/.gitmodules b/.gitmodules index fdd48fcce01..0e4291eac9e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -190,3 +190,6 @@ path = contrib/croaring url = https://github.com/RoaringBitmap/CRoaring branch = v0.2.66 +[submodule "contrib/miniselect"] + path = contrib/miniselect + url = https://github.com/danlark1/miniselect diff --git a/contrib/miniselect b/contrib/miniselect new file mode 160000 index 00000000000..be0af6bd0b6 --- /dev/null +++ b/contrib/miniselect @@ -0,0 +1 @@ +Subproject commit be0af6bd0b6eb044d1acc4f754b229972d99903a diff --git a/contrib/miniselect/.clang-format b/contrib/miniselect/.clang-format deleted file mode 100644 index f6cb8ad931f..00000000000 --- a/contrib/miniselect/.clang-format +++ /dev/null @@ -1 +0,0 @@ -BasedOnStyle: Google diff --git a/contrib/miniselect/.gitignore b/contrib/miniselect/.gitignore deleted file mode 100644 index f80f36759c8..00000000000 --- a/contrib/miniselect/.gitignore +++ /dev/null @@ -1,100 +0,0 @@ -# eclipse project files -.cproject -.project -.settings - -# emacs temp files -*~ - -# vim temp files -.*.swp - -# XCode -^build/ -*.pbxuser -!default.pbxuser -*.mode1v3 -!default.mode1v3 -*.mode2v3 -!default.mode2v3 -*.perspectivev3 -!default.perspectivev3 -xcuserdata -*.xccheckout -*.moved-aside -DerivedData -*.hmap -*.ipa -*.xcuserstate -*.DS_Store - -# IDE specific folder for JetBrains IDEs -.idea/ -cmake-build-debug/ -cmake-build-release/ - -# Visual Studio Code artifacts -.vscode/* -.history/ - -# Visual Studio artifacts -/VS/ - -# C/C++ build outputs -.build/ -bins -gens -libs -objs - -# C++ ignore from https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore - -# Prerequisites -*.d - -# Compiled Object files -*.slo -*.lo -*.o -*.obj - -# Precompiled Headers -*.gch -*.pch - -# Compiled Dynamic libraries -*.so -*.dylib -*.dll - -# Fortran module files -*.mod -*.smod - -# Compiled Static libraries -*.lai -*.la -*.a -*.lib - -# Executables -*.exe -*.out -*.app - - -# CMake files that may be specific to our installation - -# Build outputs -/build*/ -/visual_studio/ -/benchmark/ - -# Fuzzer outputs generated by instructions in fuzz/Fuzzing.md -/corpus.zip -/ossfuzz-out/ -/out/ - -# Generated docs -/doc/api -*.orig diff --git a/contrib/miniselect/.travis.yml b/contrib/miniselect/.travis.yml deleted file mode 100644 index a5036caf365..00000000000 --- a/contrib/miniselect/.travis.yml +++ /dev/null @@ -1,140 +0,0 @@ -language: cpp - -dist: bionic - -matrix: - include: - - os: linux - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-8 - env: - - COMPILER="CC=gcc-8 && CXX=g++-8" - compiler: gcc-8 - - - os: linux - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-9 - env: - - COMPILER="CC=gcc-9 && CXX=g++-9" - compiler: gcc-9 - - - os: linux - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-10 - env: - - COMPILER="CC=gcc-10 && CXX=g++-10" - compiler: gcc-10 - - - os: linux - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-10 - env: - - COMPILER="CC=gcc-10 && CXX=g++-10" - - SANITIZE="on" - compiler: gcc-10-sanitize - - - os: linux - addons: - apt: - sources: - - llvm-toolchain-bionic-6.0 - packages: - - clang-6.0 - env: - - COMPILER="CC=clang-6.0 && CXX=clang++-6.0" - compiler: clang-6 - - - os: linux - addons: - apt: - sources: - - llvm-toolchain-bionic-7 - packages: - - clang-7 - env: - - COMPILER="CC=clang-7 && CXX=clang++-7" - compiler: clang-7 - - - os: linux - addons: - apt: - sources: - - llvm-toolchain-bionic-8 - packages: - - clang-8 - env: - - COMPILER="CC=clang-8 && CXX=clang++-8" - compiler: clang-8 - - - os: linux - addons: - apt: - sources: - - llvm-toolchain-bionic-9 - packages: - - clang-9 - env: - - COMPILER="CC=clang-9 && CXX=clang++-9" - compiler: clang-9 - - - os: linux - addons: - apt: - packages: - - clang-10 - sources: - - ubuntu-toolchain-r-test - - sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main' - key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' - env: - - COMPILER="CC=clang-10 && CXX=clang++-10" - compiler: clang-10 - - - os: linux - addons: - apt: - packages: - - clang-10 - sources: - - ubuntu-toolchain-r-test - - sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main' - key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' - env: - - COMPILER="CC=clang-10 && CXX=clang++-10" - - SANITIZE="on" - compiler: clang-10-sanitize - -before_install: - - eval "${COMPILER}" - - git clone https://github.com/google/benchmark.git - - git clone https://github.com/google/googletest.git benchmark/googletest - -install: - - export CMAKE_FLAGS="-DMINISELECT_TESTING=on -DCMAKE_BUILD_TYPE=RelWithDebInfo"; - - if [[ "${SANITIZE}" == "on" ]]; then - export CMAKE_FLAGS="${CMAKE_FLAGS} -DMINISELECT_SANITIZE=on"; - fi - - export CTEST_FLAGS="-j4 --output-on-failure -E checkperf" - -script: - - mkdir build - - cd build - - cmake $CMAKE_FLAGS .. - - cmake --build . -- -j2 - - ctest $CTEST_FLAGS diff --git a/contrib/miniselect/AUTHORS b/contrib/miniselect/AUTHORS deleted file mode 100644 index 896a8046a73..00000000000 --- a/contrib/miniselect/AUTHORS +++ /dev/null @@ -1,2 +0,0 @@ -# List of authors for copyright purposes, in no particular order -Danila Kutenin diff --git a/contrib/miniselect/CMakeLists.txt b/contrib/miniselect/CMakeLists.txt deleted file mode 100644 index 09e92031784..00000000000 --- a/contrib/miniselect/CMakeLists.txt +++ /dev/null @@ -1,52 +0,0 @@ -cmake_minimum_required(VERSION 3.7) -project(miniselect) - -option(MINISELECT_TESTING "Building the tests." OFF) -option(MINISELECT_SANITIZE "Building the library with sanitizers." OFF) -option(MINISELECT_BUILD_LIBCXX "Building the library with libcxx." OFF) -option(MINISELECT_ENABLE_FUZZING "Building the library with fuzzing." OFF) - -include_directories(include) - -if (MINISELECT_TESTING) - enable_testing() - set(CMAKE_CXX_STANDARD 17) - if (NOT CMAKE_BUILD_TYPE) - message(STATUS "No build type selected, default to Release") - set(CMAKE_BUILD_TYPE "Release") - endif() - if (MINISELECT_SANITIZE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all") - endif() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Wall -Wextra -Wpedantic -Wno-gnu-zero-variadic-macro-arguments") - - if (MINISELECT_BUILD_LIBCXX AND "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - message(STATUS "Using libcxx as a default standard C++ library") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") - endif() - - add_subdirectory(benchmark) - include_directories(testing) - include_directories(benches) - - add_executable(benchmark_sort benches/benchmark_sort.cpp) - target_link_libraries(benchmark_sort benchmark::benchmark gtest) - add_executable(benchmark_select benches/benchmark_select.cpp) - target_link_libraries(benchmark_select benchmark::benchmark gtest) - - set(TEST_SOURCES testing/test_select.cpp) - add_executable(test_select ${TEST_SOURCES}) - target_link_libraries(test_select gtest gmock gtest_main) - add_test(NAME test_select COMMAND test_select) - - set(TEST_SOURCES testing/test_sort.cpp) - add_executable(test_sort ${TEST_SOURCES}) - target_link_libraries(test_sort gtest gmock gtest_main) - add_test(NAME test_sort COMMAND test_sort) -endif() - -if(MINISELECT_ENABLE_FUZZING) - add_subdirectory(benchmark) - include_directories(testing) - add_subdirectory(fuzz) -endif() diff --git a/contrib/miniselect/CONTRIBUTORS b/contrib/miniselect/CONTRIBUTORS deleted file mode 100644 index 75d47387e67..00000000000 --- a/contrib/miniselect/CONTRIBUTORS +++ /dev/null @@ -1 +0,0 @@ -# contributors (in no particular order) diff --git a/contrib/miniselect/LICENSE_1_0.txt b/contrib/miniselect/LICENSE_1_0.txt deleted file mode 100644 index 36b7cd93cdf..00000000000 --- a/contrib/miniselect/LICENSE_1_0.txt +++ /dev/null @@ -1,23 +0,0 @@ -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/contrib/miniselect/README.md b/contrib/miniselect/README.md deleted file mode 100644 index cbe576ddba8..00000000000 --- a/contrib/miniselect/README.md +++ /dev/null @@ -1,272 +0,0 @@ -[![Build Status](https://travis-ci.com/danlark1/miniselect.svg?branch=main)](https://travis-ci.com/danlark1/miniselect) -[![License](https://img.shields.io/badge/License-Boost%201.0-lightblue.svg)](https://www.boost.org/LICENSE_1_0.txt) - -miniselect : Generic selection and partial ordering algorithms -============================================================== - -`miniselect` is a C++ header-only library that contains various generic selection -and partial sorting algorithms with the ease of use, testing, advice on usage and -benchmarking. - -Sorting is everywhere and there are many outstanding sorting algorithms that -compete in speed, comparison count and cache friendliness. However selection -algorithms are always a bit outside of the competition scope, however they are -pretty important, for example, in databases ORDER BY LIMIT N is used extremely -often which can benefit from more optimal selection and partial sorting -algorithms. This library tries to solve this problem with Modern C++. - -* **Easy:** First-class, easy to use dependency and carefully documented APIs and algorithm properties. -* **Fast:** We do care about speed of the algorithms and provide reasonable implementations. -* **Standard compliant:** We provide C++11 compatible APIs that are compliant to the standard [`std::nth_element`](https://en.cppreference.com/w/cpp/algorithm/nth_element) and [`std::partial_sort`](https://en.cppreference.com/w/cpp/algorithm/partial_sort) functions including custom comparators and order guarantees. Just replace the names of the functions in your project and it should work! -* **Well tested:** We test all algorithms with a unified framework, under sanitizers and fuzzing. -* **Benchmarked:** We gather benchmarks for all implementations to better understand good and bad spots. - -Table of Contents ------------------ - -* [Quick Start](#quick-start) -* [Testing](#testing) -* [Documentation](#documentation) -* [Performance results](#performance-results) -* [Real-world usage](#real-world-usage) -* [Contributing](#contributing) -* [Motivation](#motivation) -* [License](#license) - -Quick Start ------------ - -You can either include this project as a cmake dependency and then use the -headers that are provided in the [include](./include) folder or just pass the -[include](./include) folder to your compiler. - -```cpp -#include -#include - -#include "miniselect/median_of_ninthers.h" - -int main() { - std::vector v = {1, 8, 4, 3, 2, 9, 0, 7, 6, 5}; - miniselect::median_of_ninthers_select(v.begin(), v.begin() + 5, v.end()); - for (const int i : v) { - std::cout << i << ' '; - } - return 0; -} -// Compile it `clang++/g++ -I$DIRECTORY/miniselect/include/ example.cpp -std=c++11 -O3 -o example -// Possible output: 0 1 4 3 2 5 8 7 6 9 -``` - -Examples can be found in [examples](./examples). - -We support all compilers starting from GCC 7 and Clang 6. We are also planning -to support Windows, for now it is best effort but no issues are known so far. - -More on which algorithms are available, see [documentation](#documentation). - -Testing -------- - -To test and benchmark, we use [Google benchmark](https://github.com/google/benchmark) library. -Simply do in the root directory: - -```console -# Check out the library. -$ git clone https://github.com/google/benchmark.git -# Benchmark requires Google Test as a dependency. Add the source tree as a subdirectory. -$ git clone https://github.com/google/googletest.git benchmark/googletest -$ mkdir build && cd build -$ cmake -DMINISELECT_TESTING=on .. -$ make -j -$ ctest -j4 --output-on-failure -``` - -It will create two tests and two benchmarks `test_sort`, `test_select`, -`benchmark_sort`, `benchmark_select`. Use them to validate or contribute. You -can also use `ctest` - -Documentation -------------- - -There are several selection algorithms available, further ![\large n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+n) is the number -of elements in the array, ![\large k](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+k) is the selection element that is needed to be found (all algorithms are deterministic and not stable unless otherwise is specified): - - -| Name | Average | Best Case | Worst Case | Comparisons | Memory | -|------------------------- |--------------------------------------------------------------------------------------------------------- |--------------------------------------------------------------------------------------------------------- |----------------------------------------------------------------------------------------------------------------------- |---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |--------------------------------------------------------------------------------------------------------------------------------- | -| [pdqselect](./include/miniselect/pdqselect.h) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n\log n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%5Clog+n%29) | At least ![\large 2n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+2n). Random data ![\large 2.5n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+2.5n) | ![\large O(1)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%281%29) | -| [Floyd-Rivest](./include/miniselect/floyd_rivest_select.h) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n^2 )](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%5E2+%29) | Avg: ![\large n + \min(k, n - k) + O(\sqrt{n \log n})](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+n+%2B+%5Cmin%28k%2C+n+-+k%29+%2B+O%28%5Csqrt%7Bn+%5Clog+n%7D%29) | ![\large O(\log \log n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28%5Clog+%5Clog+n%29) | -| [Median Of Medians](./include/miniselect/median_of_medians.h) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | Between ![\large 2n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+2n) and ![\large 22n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+22n). Random data ![\large 2.5n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+2.5n) | ![\large O(\log n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28%5Clog+n%29) | -| [Median Of Ninthers](./include/miniselect/median_of_ninthers.h) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | Between ![\large 2n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+2n) and ![\large 12n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+12.5n). Random data ![\large 2n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+2n) | ![\large O(\log n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28%5Clog+n%29) | -| [Median Of 3 Random](./include/miniselect/median_of_3_random.h) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n^2 )](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%5E2+%29) | At least ![\large 2n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+2n). Random data ![\large 3n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+3n) | ![\large O(\log n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28%5Clog+n%29) | -| [libstdc++ (introselect)](https://github.com/gcc-mirror/gcc/blob/e0af865ab9d9d5b6b3ac7fdde26cf9bbf635b6b4/libstdc%2B%2B-v3/include/bits/stl_algo.h#L4748) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n\log n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%5Clog+n%29) | At least ![\large 2n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+2n). Random data ![\large 3n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+3n) | ![\large O(1)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%281%29) | -| [libc++ (median of 3)](https://github.com/llvm/llvm-project/blob/3ed89b51da38f081fedb57727076262abb81d149/libcxx/include/algorithm#L5159) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n^2 )](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%5E2+%29) | At least ![\large 2n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+2n). Random data ![\large 3n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+3n) | ![\large O(1)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%281%29) | - -For sorting the situation is similar except every line adds ![\large O(k\log k)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28k%5Clog+k%29) comparisons and pdqselect is using ![\large O(\log n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28%5Clog+n%29) memory with one more general exception called partial sorting in C++ standard library. - -| Name | Average | Best Case | Worst Case | Comparisons | Memory | -|-------------------|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------- | -| [std::partial_sort](https://github.com/llvm/llvm-project/blob/3ed89b51da38f081fedb57727076262abb81d149/libcxx/include/algorithm#L5074) | ![\large O(n\log k)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%5Clog+k%29) | ![\large O(n)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%29) | ![\large O(n\log k)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%28n%5Clog+k%29) | ![\large n\log k](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+n%5Clog+k) on average, for some data patterns might be better | ![\large O(1)](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+O%281%29) | - -## API - -All functions end either in `select`, either in `partial_sort` and -their behavior is exactly the same as for -[`std::nth_element`](https://en.cppreference.com/w/cpp/algorithm/nth_element) -and [`std::partial_sort`](https://en.cppreference.com/w/cpp/algorithm/partial_sort) -respectively, i.e. they accept 3 arguments as `first`, `middle`, `end` iterators -and an optional comparator. Several notes: - -* You should not throw exceptions from `Compare` function. Standard library -also does not specify the behavior in that matter. -* We don't support ParallelSTL for now. -* C++20 constexpr specifiers might be added but currently we don't have them -because of some floating point math in several algorithms. -* All functions are in the `miniselect` namespace. See the example for that. - -- pdqselect - - This algorithm is based on [`pdqsort`](https://github.com/orlp/pdqsort) which is acknowledged as one of the fastest generic sort algorithms. - - **Location:** [`miniselect/pdqselect.h`](./include/miniselect/pdqselect.h). - - **Functions:** `pdqselect`, `pdqselect_branchless`, `pdqpartial_sort`, `pdqpartial_sort_branchless`. Branchless version uses branchless partition algorithm provided by [`pdqsort`](https://github.com/orlp/pdqsort). Use it if your comparison function is branchless, it might give performance for very big ranges. - - **Performance advice:** Use it when you need to sort a big chunk so that ![\large k](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+k) is close to ![\large n](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+n). - -

- -- Floyd-Rivest - - This algorithm is based on [Floyd-Rivest algorithm](https://en.wikipedia.org/wiki/Floyd%E2%80%93Rivest_algorithm). - - **Location:** [`miniselect/floyd_rivest_select.h`](./include/miniselect/floyd_rivest_select.h). - - **Functions:** `floyd_rivest_select`, `floyd_rivest_partial_sort`. - - **Performance advice:** Given that this algorithm performs as one of the best on average case in terms of comparisons and speed, we highly advise to - at least try this in your project. Especially it is good for small ![\large k](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+k) or types that are expensive to compare (for example, strings). But even for median the benchmarks show it outperforms others. It is not easy for this algorithm to build a reasonable worst case but one of examples when this algorithm does not perform well is when there are lots of similar values of linear size (random01 dataset showed some moderate penalties). - -We present here two gifs, for median and for ![\large k = n / 10](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+k+%3D+n+%2F+10) order statistic. - -

- - -

- -- Median Of Medians - - This algorithm is based on [Median of Medians](https://en.wikipedia.org/wiki/Median_of_medians) algorithm, one of the first deterministic linear time worst case median algorithm - - **Location:** [`miniselect/median_of_medians.h`](./include/miniselect/median_of_medians.h). - - **Functions:** `median_of_medians_select`, `median_of_medians_partial_sort`. - - **Performance advice:** This algorithm does not show advantages over others, implemented for historical reasons and for bechmarking. - -

- -- Median Of Ninthers - - This algorithm is based on [Fast Deterministic Selection](https://erdani.com/research/sea2017.pdf) paper by Andrei Alexandrescu, one of the latest and fastest deterministic linear time worst case median algorithms - - **Location:** [`miniselect/median_of_ninthers.h`](./include/miniselect/median_of_ninthers.h). - - **Functions:** `median_of_ninthers_select`, `median_of_ninthers_partial_sort`. - - **Performance advice:** Use this algorithm if you absolutely need linear time worst case scenario for selection algorithm. This algorithm shows some strengths over other deterministic [`PICK`](https://en.wikipedia.org/wiki/Median_of_medians) algorithms and has lower constanst than MedianOfMedians. - -

- -- Median Of 3 Random - - This algorithm is based on QuickSelect with the random median of 3 pivot choice algorithm (it chooses random 3 elements in the range and takes the middle value). It is a rando - - **Location:** [`miniselect/median_of_3_random.h`](./include/miniselect/median_of_3_random.h). - - **Functions:** `median_of_3_random_select`, `median_of_3_random_partial_sort`. - - **Performance advice:** This is a randomized algorithm and also it did not show any strengths against Median Of Ninthers. - -

- -- Introselect - - This algorithm is based on [Introselect](https://en.wikipedia.org/wiki/Introselect) algorithm, it is used in libstdc++ in `std::nth_element`, however instead of falling back to MedianOfMedians it is using HeapSelect which adds logarithm to its worst complexity. - - **Location:** ``. - - **Functions:** `std::nth_element`. - - **Performance advice:** This algorithm is used in standard library and is not recommended to use if you are looking for performance. - -

- -- Median Of 3 - - This algorithm is based on QuickSelect with median of 3 pivot choice algorithm (the middle value between begin, mid and end values), it is used in libc++ in `std::nth_element`. - - **Location:** ``. - - **Functions:** `std::nth_element`. - - **Performance advice:** This algorithm is used in standard library and is not recommended to use if you are looking for performance. - -

- -- `std::partial_sort` - - This algorithm has [heap-based solutions](https://en.wikipedia.org/wiki/Partial_sorting) both in libc++ and libstdc++, from the first ![\large k](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+k) elements the max heap is built, then one by one the elements are trying to be pushed to that heap with HeapSort in the end. - - **Location:** ``. - - **Functions:** `std::partial_sort`. - - **Performance advice:** This algorithm is very good for random data and small ![\large k](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+k) and might outperform all selection+sort algorithms. However, for descending data it starts to significantly degrade and is not recommended for use if you have such patterns in real data. - -

- -## Other algorithms to come - -* Kiwiel modification of FloydRivest algorithm which is described in [On Floyd and Rivest’s SELECT algorithm](https://core.ac.uk/download/pdf/82672439.pdf) with ternary and quintary pivots. -* Combination of FloydRivest and pdqsort pivot strategies, currently all experiments did not show any boost. - -Performance results -------------------- - -We use 10 datasets and 8 algorithms with 10000000 elements to find median and -other ![\large k](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+k) on `Intel(R) Core(TM) i5-4200H CPU @ 2.80GHz` for `std::vector`, -for median the benchmarks are the following: - -![median](benches/plots/result_10000000_5000000.png) - -![median](benches/plots/result_comparisons_10000000_5000000.png) - -For smaller ![\large k](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+k), -for example, 1000, the results are the following - -![k equals 1000](benches/plots/result_10000000_1000.png) - -![k equals 1000](benches/plots/result_comparisons_10000000_1000.png) - -Other benchmarks can be found [here](https://drive.google.com/drive/folders/1DHEaeXgZuX6AJ9eByeZ8iQVQv0ueP8XM). - -The benchmarks for number of swaps will be later. - -Real-world usage ----------------- - -- [Yandex ClickHouse](https://github.com/yandex/ClickHouse) - -If you are planning to use miniselect in your product, please work from one of -our releases and if you wish, you can write the acknowledgment in this section -for visibility. - -Contributing ------------- - -Patches are welcome with new algorithms! You should add the selection algorithm -together with the partial sorting algorithm in [include](./include), add -tests in [testing](./testing) and ideally run benchmarks to see how it performs. -If you also have some data cases to test against, we would be more than happy -to merge them. - -Motivation ----------- - -Firstly the author was interested if any research had been done for small ![\large k](https://render.githubusercontent.com/render/math?math=%5Cdisplaystyle+%5Clarge+k) -in selection algorithms and was struggling to find working implementations to -compare different approaches from standard library and quickselect algorithms. -After that it turned out that the problem is much more interesting than it looks -like and after reading The Art of Computer Programming from Donald Knuth about -minimum comparison sorting and selection algorithms the author decided to look -through all non-popular algorithms and try them out. - -The author have not found any decent library for selection algorithms and little -research is published in open source, so that they decided to merge all that -implementations and compare them with possible merging of different ideas -into a decent one algorithm for most needs. For a big story of adventures see -the author's blog post TODO. - -License -------- - -The code is made available under the [Boost License 1.0](https://boost.org/LICENSE_1_0.txt). - -Third-Party Libraries Used and Adjusted ---------------------------------------- - -| Library | License | -|---------------------|--------------------------------------------------------------------------------------------------| -| pdqsort | [MIT](https://github.com/orlp/pdqsort/blob/47a46767d76fc852284eaa083e4b7034ee6e2559/license.txt) | -| MedianOfNinthers | [Boost License 1.0](https://github.com/andralex/MedianOfNinthers/blob/master/LICENSE_1_0.txt) | - diff --git a/contrib/miniselect/benches/bench_common.h b/contrib/miniselect/benches/bench_common.h deleted file mode 100644 index b49b55dac9d..00000000000 --- a/contrib/miniselect/benches/bench_common.h +++ /dev/null @@ -1,170 +0,0 @@ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -#pragma once - -#include -#include -#include - -namespace miniselect { -namespace datagens { - -struct Random { - static std::vector Gen(size_t size) { - std::random_device rnd_device; - std::mt19937_64 mersenne_engine{rnd_device()}; - std::vector v; - v.reserve(size); - for (size_t i = 0; i < size; ++i) { - v.push_back(i); - } - std::shuffle(v.begin(), v.end(), mersenne_engine); - return v; - } -}; - -struct Shuffled16 { - static std::vector Gen(size_t size) { - std::random_device rnd_device; - std::mt19937_64 mersenne_engine{rnd_device()}; - std::vector v; - v.reserve(size); - for (size_t i = 0; i < size; ++i) { - v.push_back(i % 16); - } - std::shuffle(v.begin(), v.end(), mersenne_engine); - return v; - } -}; - -struct Random01 { - static std::vector Gen(size_t size) { - std::random_device rnd_device; - std::mt19937_64 mersenne_engine{rnd_device()}; - std::vector v; - v.reserve(size); - for (size_t i = 0; i < size; ++i) { - v.push_back(i % 2); - } - std::shuffle(v.begin(), v.end(), mersenne_engine); - return v; - } -}; - -struct Ascending { - static std::vector Gen(size_t size) { - std::vector v; - v.reserve(size); - for (size_t i = 0; i < size; ++i) { - v.push_back(i); - } - return v; - } -}; - -struct Descending { - static std::vector Gen(size_t size) { - std::vector v; - v.reserve(size); - for (int i = size - 1; i >= 0; --i) { - v.push_back(i); - } - return v; - } -}; - -struct PipeOrgan { - static std::vector Gen(size_t size) { - std::vector v; - v.reserve(size); - for (size_t i = 0; i < size / 2; ++i) { - v.push_back(i); - } - for (size_t i = size / 2; i < size; ++i) { - v.push_back(size - i); - } - return v; - } -}; - -struct PushFront { - static std::vector Gen(size_t size) { - std::vector v; - v.reserve(size); - for (size_t i = 1; i < size; ++i) { - v.push_back(i); - } - v.push_back(0); - return v; - } -}; - -struct PushMiddle { - static std::vector Gen(size_t size) { - std::vector v; - v.reserve(size); - for (size_t i = 0; i < size; ++i) { - if (i != size / 2) { - v.push_back(i); - } - } - v.push_back(size / 2); - return v; - } -}; - -struct Median3Killer { - static std::vector Gen(size_t size) { - size_t k = size / 2; - std::vector v; - v.reserve(size); - for (size_t i = 1; i < k + 1; ++i) { - if (i & 1) { - v.push_back(i); - } else { - v.push_back(k + i - 1); - } - } - for (size_t i = 1; i < k + 1; ++i) { - v.push_back(2 * i); - } - return v; - } -}; - -#define BENCH_IMPL(BENCH, GEN, IMPL) \ - BENCHMARK_TEMPLATE(BENCH, GEN, IMPL) \ - ->Unit(benchmark::kMicrosecond) \ - ->Arg(kSize - 10) \ - ->Arg(kSize / 2) \ - ->Arg(10000) \ - ->Arg(1000) \ - ->Arg(100) \ - ->Arg(10) \ - ->Arg(1) - -#define BENCH_GENS(BENCH, IMPL) \ - BENCH_IMPL(BENCH, datagens::Random, IMPL); \ - BENCH_IMPL(BENCH, datagens::Shuffled16, IMPL); \ - BENCH_IMPL(BENCH, datagens::Random01, IMPL); \ - BENCH_IMPL(BENCH, datagens::Ascending, IMPL); \ - BENCH_IMPL(BENCH, datagens::Descending, IMPL); \ - BENCH_IMPL(BENCH, datagens::PipeOrgan, IMPL); \ - BENCH_IMPL(BENCH, datagens::PushMiddle, IMPL); \ - BENCH_IMPL(BENCH, datagens::PushFront, IMPL); \ - BENCH_IMPL(BENCH, datagens::Median3Killer, IMPL) - -#define BENCH(NAME) \ - BENCH_GENS(NAME, algorithms::FloydRivest); \ - BENCH_GENS(NAME, algorithms::MedianOfNinthers); \ - BENCH_GENS(NAME, algorithms::MedianOfMedians); \ - BENCH_GENS(NAME, algorithms::MedianOf3Random); \ - BENCH_GENS(NAME, algorithms::PDQ); \ - BENCH_GENS(NAME, algorithms::PDQBranchless); \ - BENCH_GENS(NAME, algorithms::STD) - -} // namespace datagens -} // namespace miniselect diff --git a/contrib/miniselect/benches/benchmark_select.cpp b/contrib/miniselect/benches/benchmark_select.cpp deleted file mode 100644 index 2a9b238c90a..00000000000 --- a/contrib/miniselect/benches/benchmark_select.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -#include - -#include -#include -#include -#include -#include -#include - -#include "bench_common.h" -#include "test_common.h" - -namespace miniselect { -namespace { - -static constexpr size_t kSize = 65536; - -template -static void BM_sel(benchmark::State& state) { - auto vec = DataGen::Gen(kSize); - const size_t arg = state.range(0); - size_t cnt = 0; - size_t cmp = 0; - for (auto _ : state) { - Impl::Select(vec.begin(), vec.begin() + arg, vec.end(), - [&cmp](const auto& left, const auto& right) { - cmp++; - return left < right; - }); - ++cnt; - benchmark::DoNotOptimize(vec[arg]); - } - state.counters["Comparisons"] = 1.0 * cmp / cnt; -} - -BENCH(BM_sel); - -} // namespace -} // namespace miniselect - -BENCHMARK_MAIN(); diff --git a/contrib/miniselect/benches/benchmark_sort.cpp b/contrib/miniselect/benches/benchmark_sort.cpp deleted file mode 100644 index 8b3bbd1a77f..00000000000 --- a/contrib/miniselect/benches/benchmark_sort.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -#include - -#include -#include -#include -#include -#include -#include - -#include "bench_common.h" -#include "test_common.h" - -namespace miniselect { -namespace { - -static constexpr size_t kSize = 65536; - -template -static void BM_sort(benchmark::State& state) { - auto vec = DataGen::Gen(kSize); - const size_t arg = state.range(0); - size_t cnt = 0; - size_t cmp = 0; - for (auto _ : state) { - Impl::Sort(vec.begin(), vec.begin() + arg, vec.end(), - [&cmp](const auto& left, const auto& right) { - cmp++; - return left < right; - }); - ++cnt; - benchmark::DoNotOptimize(vec[arg]); - } - state.counters["Comparisons"] = 1.0 * cmp / cnt; -} - -BENCH(BM_sort); - -} // namespace -} // namespace miniselect - -BENCHMARK_MAIN(); diff --git a/contrib/miniselect/examples/example.cpp b/contrib/miniselect/examples/example.cpp deleted file mode 100644 index 183e81ae1b6..00000000000 --- a/contrib/miniselect/examples/example.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include - -#include "miniselect/median_of_ninthers.h" - -int main() { - std::vector v = {1, 8, 4, 3, 2, 9, 0, 7, 6, 5}; - miniselect::median_of_ninthers_select(v.begin(), v.begin() + 5, v.end()); - for (const int i : v) { - std::cout << i << ' '; - } - return 0; -} - -// Compile it `clang++/g++ -I$DIRECTORY/miniselect/include/ example.cpp -std=c++11 -O3 -o example - -// Possible output: 0 1 4 3 2 5 8 7 6 9 -// ^ on the right place diff --git a/contrib/miniselect/fuzz/CMakeLists.txt b/contrib/miniselect/fuzz/CMakeLists.txt deleted file mode 100644 index 38473bd78ad..00000000000 --- a/contrib/miniselect/fuzz/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -cmake_minimum_required(VERSION 3.7) - -project(fuzz) - -option(ENABLE_FUZZING "enable building the fuzzers" ON) -set(CMAKE_CXX_STANDARD 17) - -if(ENABLE_FUZZING) - set(MINISELECT_FUZZ_LDFLAGS "" CACHE STRING "LDFLAGS for the fuzz targets") - - add_library(miniselect-fuzzer INTERFACE) - target_link_libraries(miniselect-fuzzer INTERFACE gtest) - target_link_libraries(miniselect-fuzzer INTERFACE ${MINISELECT_FUZZ_LDFLAGS}) - - if(MINISELECT_FUZZ_LINKMAIN) - target_sources(simdjson-fuzzer INTERFACE $/main.cpp) - endif() - - # Define the fuzzers - add_custom_target(all_fuzzers) - - set(fuzzernames) - function(implement_fuzzer name) - add_executable(${name} ${name}.cpp) - target_link_libraries(${name} PRIVATE miniselect-fuzzer) - add_dependencies(all_fuzzers ${name}) - set(fuzzernames ${fuzzernames} ${name} PARENT_SCOPE) - endfunction() - - implement_fuzzer(fuzz_select) - implement_fuzzer(fuzz_string_select) - implement_fuzzer(fuzz_sort) - implement_fuzzer(fuzz_string_sort) - - # to be able to get a list of all fuzzers from within a script - add_custom_target(print_all_fuzzernames - COMMAND ${CMAKE_COMMAND} -E echo ${fuzzernames}) -endif() diff --git a/contrib/miniselect/fuzz/build_like_oss_fuzz.sh b/contrib/miniselect/fuzz/build_like_oss_fuzz.sh deleted file mode 100755 index 547348133a9..00000000000 --- a/contrib/miniselect/fuzz/build_like_oss_fuzz.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh -# -# This script emulates how oss fuzz invokes the build -# process, handy for trouble shooting cmake issues and possibly -# recreating testcases. For proper debugging of the oss fuzz -# build, follow the procedure at https://google.github.io/oss-fuzz/getting-started/new-project-guide/#testing-locally - -set -eu - -ossfuzz=$(readlink -f $(dirname $0))/ossfuzz.sh - -mkdir -p ossfuzz-out -export OUT=$(pwd)/ossfuzz-out -export CC=clang -export CXX="clang++" -export CFLAGS="-fsanitize=fuzzer-no-link" -export CXXFLAGS="-fsanitize=fuzzer-no-link,address,undefined -O1" -export LIB_FUZZING_ENGINE="-fsanitize=fuzzer" - -$ossfuzz - -echo "look at the results in $OUT" diff --git a/contrib/miniselect/fuzz/fuzz_select.cpp b/contrib/miniselect/fuzz/fuzz_select.cpp deleted file mode 100644 index f70980bd0d9..00000000000 --- a/contrib/miniselect/fuzz/fuzz_select.cpp +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include -#include -#include - -#include "test_common.h" - -template -void ChooseImplementation(uint8_t byte, std::vector& working, - Iter partition_iter, const ::testing::Types&) { - static_assert(sizeof...(T) < 256); - int i = 0; - constexpr size_t size = sizeof...(T); - ( - [&]() { - if (byte % size == i++) { - T::Select(working.begin(), partition_iter, working.end()); - } - }(), - ...); -} - -// Use the first element as a position into the data -extern "C" int LLVMFuzzerTestOneInput(const std::uint8_t* data, - std::size_t size) { - if (size <= 3) return 0; - uint8_t impl = data[0]; - uint16_t partition_point = 0; - memcpy(&partition_point, data + 1, 2); - partition_point %= (size - 3); - std::vector working(data + 3, data + size); - auto canonical = working; - const auto partition_iter = working.begin() + partition_point; - ChooseImplementation(impl, working, partition_iter, - miniselect::algorithms::All{}); - - if (partition_iter != working.end()) { - const auto& nth = *partition_iter; - bool is_error = false; - if (!std::all_of(working.begin(), partition_iter, - [&](const auto& v) { return v <= nth; })) { - is_error = true; - } - if (!std::all_of(partition_iter, working.end(), - [&](const auto& v) { return v >= nth; })) { - is_error = true; - } - if (is_error) { - std::cerr << "FAILED!\nCanonical: "; - for (const auto& s : canonical) { - std::cerr << static_cast(s) << ' '; - } - std::cerr << std::endl; - std::cerr << "Got: "; - for (const auto& s : working) { - std::cerr << static_cast(s) << ' '; - } - std::cerr << std::endl; - std::cerr << "partition_iter = " << partition_iter - working.begin() - << std::endl; - std::abort(); - } - } - - return 0; -} diff --git a/contrib/miniselect/fuzz/fuzz_sort.cpp b/contrib/miniselect/fuzz/fuzz_sort.cpp deleted file mode 100644 index ba0a2b6ca3e..00000000000 --- a/contrib/miniselect/fuzz/fuzz_sort.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include -#include -#include -#include - -#include "test_common.h" - -template -void ChooseImplementation(uint8_t byte, std::vector& working, - Iter partition_iter, const ::testing::Types&) { - static_assert(sizeof...(T) < 256); - int i = 0; - constexpr size_t size = sizeof...(T); - ( - [&]() { - if (byte % size == i++) { - T::Sort(working.begin(), partition_iter, working.end()); - } - }(), - ...); -} - -// Use the first element as a position into the data -extern "C" int LLVMFuzzerTestOneInput(const std::uint8_t* data, - std::size_t size) { - if (size <= 3) return 0; - uint8_t impl = data[0]; - uint16_t partition_point = 0; - memcpy(&partition_point, data + 1, 2); - partition_point %= (size - 3); - std::vector working(data + 3, data + size); - auto canonical = working; - const auto partition_iter = working.begin() + partition_point; - ChooseImplementation(impl, working, partition_iter, - miniselect::algorithms::All{}); - - bool is_error = false; - if (partition_iter != working.end()) { - const auto& nth = *std::min_element(partition_iter, working.end()); - if (!std::all_of(working.begin(), partition_iter, - [&](const auto& v) { return v <= nth; })) { - is_error = true; - } - if (!std::all_of(partition_iter, working.end(), - [&](const auto& v) { return v >= nth; })) { - is_error = true; - } - } - if (!std::is_sorted(working.begin(), partition_iter)) { - is_error = true; - } - if (is_error) { - std::cerr << "FAILED!\nCanonical: "; - for (const auto& s : canonical) { - std::cerr << static_cast(s) << ' '; - } - std::cerr << std::endl; - std::cerr << "Got: "; - for (const auto& s : working) { - std::cerr << static_cast(s) << ' '; - } - std::cerr << std::endl; - std::cerr << "partition_iter = " << partition_iter - working.begin() - << std::endl; - std::abort(); - } - - return 0; -} diff --git a/contrib/miniselect/fuzz/fuzz_string_select.cpp b/contrib/miniselect/fuzz/fuzz_string_select.cpp deleted file mode 100644 index cd24b376d86..00000000000 --- a/contrib/miniselect/fuzz/fuzz_string_select.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include - -#include "test_common.h" - -template -void ChooseImplementation(uint8_t byte, std::vector& working, - Iter partition_iter, const ::testing::Types&) { - static_assert(sizeof...(T) < 256); - int i = 0; - constexpr size_t size = sizeof...(T); - ( - [&]() { - if (byte % size == i++) { - T::Select(working.begin(), partition_iter, working.end()); - } - }(), - ...); -} - -// Use the first element as a position into the data -extern "C" int LLVMFuzzerTestOneInput(const std::uint8_t* data, - std::size_t size) { - if (size <= 3) return 0; - uint8_t impl = data[0]; - uint16_t partition_point = 0; - memcpy(&partition_point, data + 1, 2); - partition_point %= (size - 3); - std::vector working; - for (auto i = data + 3; i < data + size; ++i) { - std::string s(1, *i); - working.push_back(s); - } - auto canonical = working; - const auto partition_iter = working.begin() + partition_point; - ChooseImplementation(impl, working, partition_iter, - miniselect::algorithms::All{}); - // nth may be the end iterator, in this case nth_element has no effect. - if (partition_iter != working.end()) { - const auto& nth = *partition_iter; - bool is_error = false; - if (!std::all_of(working.begin(), partition_iter, - [&](const auto& v) { return v <= nth; })) { - is_error = true; - } - if (!std::all_of(partition_iter, working.end(), - [&](const auto& v) { return v >= nth; })) { - is_error = true; - } - if (is_error) { - std::cerr << "FAILED!\nCanonical: "; - for (const auto& s : canonical) { - std::cerr << s << ' '; - } - std::cerr << std::endl; - std::cerr << "Got: "; - for (const auto& s : working) { - std::cerr << s << ' '; - } - std::cerr << std::endl; - std::cerr << "partition_iter = " << partition_iter - working.begin() - << std::endl; - std::abort(); - } - } - - return 0; -} diff --git a/contrib/miniselect/fuzz/fuzz_string_sort.cpp b/contrib/miniselect/fuzz/fuzz_string_sort.cpp deleted file mode 100644 index a797e0d7e22..00000000000 --- a/contrib/miniselect/fuzz/fuzz_string_sort.cpp +++ /dev/null @@ -1,73 +0,0 @@ -#include -#include -#include -#include - -#include "test_common.h" - -template -void ChooseImplementation(uint8_t byte, std::vector& working, - Iter partition_iter, const ::testing::Types&) { - static_assert(sizeof...(T) < 256); - int i = 0; - constexpr size_t size = sizeof...(T); - ( - [&]() { - if (byte % size == i++) { - T::Sort(working.begin(), partition_iter, working.end()); - } - }(), - ...); -} - -// Use the first element as a position into the data -extern "C" int LLVMFuzzerTestOneInput(const std::uint8_t* data, - std::size_t size) { - if (size <= 3) return 0; - uint8_t impl = data[0]; - uint16_t partition_point = 0; - memcpy(&partition_point, data + 1, 2); - partition_point %= (size - 3); - std::vector working; - for (auto i = data + 3; i < data + size; ++i) { - std::string s(1, *i); - working.push_back(s); - } - auto canonical = working; - const auto partition_iter = working.begin() + partition_point; - ChooseImplementation(impl, working, partition_iter, - miniselect::algorithms::All{}); - // nth may be the end iterator, in this case nth_element has no effect. - bool is_error = false; - if (partition_iter != working.end()) { - const auto& nth = *std::min_element(partition_iter, working.end()); - if (!std::all_of(working.begin(), partition_iter, - [&](const auto& v) { return v <= nth; })) { - is_error = true; - } - if (!std::all_of(partition_iter, working.end(), - [&](const auto& v) { return v >= nth; })) { - is_error = true; - } - } - if (!std::is_sorted(working.begin(), partition_iter)) { - is_error = true; - } - if (is_error) { - std::cerr << "FAILED!\nCanonical: "; - for (const auto& s : canonical) { - std::cerr << s << ' '; - } - std::cerr << std::endl; - std::cerr << "Got: "; - for (const auto& s : working) { - std::cerr << s << ' '; - } - std::cerr << std::endl; - std::cerr << "partition_iter = " << partition_iter - working.begin() - << std::endl; - std::abort(); - } - - return 0; -} diff --git a/contrib/miniselect/fuzz/main.cpp b/contrib/miniselect/fuzz/main.cpp deleted file mode 100644 index e3377035f33..00000000000 --- a/contrib/miniselect/fuzz/main.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#include -#include - -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, std::size_t Size); - -int main(int argc, char* argv[]) { - for (int i = 1; i < argc; ++i) { - std::ifstream in(argv[i]); - assert(in); - in.seekg(0, std::ios_base::end); - const auto pos = in.tellg(); - assert(pos >= 0); - in.seekg(0, std::ios_base::beg); - std::vector buf(static_cast(pos)); - in.read(buf.data(), static_cast(buf.size())); - assert(in.gcount() == pos); - LLVMFuzzerTestOneInput(reinterpret_cast(buf.data()), - buf.size()); - } -} diff --git a/contrib/miniselect/fuzz/ossfuzz.sh b/contrib/miniselect/fuzz/ossfuzz.sh deleted file mode 100755 index 83f37f54eee..00000000000 --- a/contrib/miniselect/fuzz/ossfuzz.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/sh -# -# entry point for oss-fuzz, so that fuzzers -# and build invocation can be changed without having -# to modify the oss-fuzz repo. -# -# invoke it from the git root. - -# make sure to exit on problems -set -eux - -mkdir -p build -cd build - -cmake .. \ --GNinja \ --DCMAKE_BUILD_TYPE=Debug \ --DENABLE_FUZZING=On \ --DMINISELECT_FUZZ_LINKMAIN=off \ --DMINISELECT_FUZZ_LDFLAGS=$LIB_FUZZING_ENGINE - -cmake --build . --target all_fuzzers - diff --git a/contrib/miniselect/include/miniselect/floyd_rivest_select.h b/contrib/miniselect/include/miniselect/floyd_rivest_select.h deleted file mode 100644 index e7d5f80f572..00000000000 --- a/contrib/miniselect/include/miniselect/floyd_rivest_select.h +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace miniselect { -namespace floyd_rivest_detail { - -template -struct CompareRefType { - // Pass the comparator by lvalue reference. Or in debug mode, using a - // debugging wrapper that stores a reference. - using type = typename std::add_lvalue_reference::type; -}; - -template -inline void floyd_rivest_select_loop(Iter begin, Diff left, Diff right, Diff k, - Compare comp) { - while (right > left) { - Diff size = right - left; - if (size > 600) { - Diff n = right - left + 1; - Diff i = k - left + 1; - double z = log(n); - double s = 0.5 * exp(2 * z / 3); - double sd = 0.5 * sqrt(z * s * (n - s) / n); - if (i < n / 2) { - sd *= -1.0; - } - Diff newLeft = std::max(left, (Diff)(k - i * s / n + sd)); - Diff newRight = std::min(right, (Diff)(k + (n - i) * s / n + sd)); - floyd_rivest_select_loop(begin, newLeft, newRight, k, - comp); - } - Diff i = left; - Diff j = right; - std::swap(begin[left], begin[k]); - const bool to_swap = comp(begin[left], begin[right]); - if (to_swap) { - std::swap(begin[left], begin[right]); - } - // Make sure that non copyable types compile. - const auto& t = to_swap ? begin[left] : begin[right]; - while (i < j) { - std::swap(begin[i], begin[j]); - i++; - j--; - while (comp(begin[i], t)) { - i++; - } - while (comp(t, begin[j])) { - j--; - } - } - - if (to_swap) { - std::swap(begin[left], begin[j]); - } else { - j++; - std::swap(begin[right], begin[j]); - } - - if (j <= k) { - left = j + 1; - } - if (k <= j) { - right = j - 1; - } - } -} - -} // namespace floyd_rivest_detail - -template -inline void floyd_rivest_partial_sort(Iter begin, Iter mid, Iter end, - Compare comp) { - if (begin == end) return; - if (begin == mid) return; - using CompType = typename floyd_rivest_detail::CompareRefType::type; - - floyd_rivest_detail::floyd_rivest_select_loop< - Iter, CompType, typename std::iterator_traits::difference_type>( - begin, 0, end - begin - 1, mid - begin - 1, comp); - // std::sort proved to be better than other sorts because of pivoting. - std::sort(begin, mid, comp); -} - -template -inline void floyd_rivest_partial_sort(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - floyd_rivest_partial_sort(begin, mid, end, std::less()); -} - -template -inline void floyd_rivest_select(Iter begin, Iter mid, Iter end, Compare comp) { - if (mid == end) return; - using CompType = typename floyd_rivest_detail::CompareRefType::type; - - floyd_rivest_detail::floyd_rivest_select_loop< - Iter, CompType, typename std::iterator_traits::difference_type>( - begin, 0, end - begin - 1, mid - begin, comp); -} - -template -inline void floyd_rivest_select(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - floyd_rivest_select(begin, mid, end, std::less()); -} - -} // namespace miniselect diff --git a/contrib/miniselect/include/miniselect/median_of_3_random.h b/contrib/miniselect/include/miniselect/median_of_3_random.h deleted file mode 100644 index 0f7b62fd61c..00000000000 --- a/contrib/miniselect/include/miniselect/median_of_3_random.h +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -#pragma once - -#include -#include -#include -#include -#include - -#include "private/median_common.h" - -namespace miniselect { -namespace median_of_3_random_detail { - -template -static inline Iter partition(Iter r, Iter end, Compare&& comp) { - typedef typename std::iterator_traits::difference_type T; - const T len = end - r; - assert(len >= 3); - static std::mt19937_64 gen(1); - std::uniform_int_distribution dis(0, len - 1); - T x = dis(gen); - T y = dis(gen); - T z = dis(gen); - return median_common_detail::pivotPartition( - r, median_common_detail::medianIndex(r, x, y, z, comp), len, comp); -} - -} // namespace median_of_3_random_detail - -template -inline void median_of_3_random_select(Iter begin, Iter mid, Iter end, - Compare comp) { - if (mid == end) return; - using CompType = typename floyd_rivest_detail::CompareRefType::type; - - median_common_detail::quickselect< - Iter, CompType, &median_of_3_random_detail::partition>( - begin, mid, end, comp); -} - -template -inline void median_of_3_random_select(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - median_of_3_random_select(begin, mid, end, std::less()); -} - -template -inline void median_of_3_random_sort(Iter begin, Iter mid, Iter end, - Compare comp) { - if (begin == mid) return; - using CompType = typename floyd_rivest_detail::CompareRefType::type; - median_common_detail::quickselect< - Iter, CompType, &median_of_3_random_detail::partition>( - begin, mid - 1, end, comp); - std::sort(begin, mid, comp); -} - -template -inline void median_of_3_random_sort(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - median_of_3_random_sort(begin, mid, end, std::less()); -} - -} // namespace miniselect diff --git a/contrib/miniselect/include/miniselect/median_of_medians.h b/contrib/miniselect/include/miniselect/median_of_medians.h deleted file mode 100644 index 922401b12d0..00000000000 --- a/contrib/miniselect/include/miniselect/median_of_medians.h +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -#pragma once - -#include -#include -#include -#include - -#include "private/median_common.h" - -namespace miniselect { -namespace median_of_medians_detail { - -template -static inline Iter partition(Iter r, Iter end, Compare&& comp) { - using CompType = typename median_common_detail::CompareRefType::type; - const size_t len = end - r; - if (len < 5) { - return median_common_detail::pivotPartition(r, len / 2, len, comp); - } - size_t j = 0; - for (size_t i = 4; i < len; i += 5, ++j) { - median_common_detail::partition5(r, i - 4, i - 3, i, i - 2, i - 1, comp); - std::swap(r[i], r[j]); - } - median_common_detail::quickselect(r, r + j / 2, - r + j, comp); - return median_common_detail::pivotPartition(r, j / 2, len, comp); -} - -} // namespace median_of_medians_detail - -template -inline void median_of_medians_select(Iter begin, Iter mid, Iter end, - Compare comp) { - if (mid == end) return; - using CompType = typename median_common_detail::CompareRefType::type; - - median_common_detail::quickselect< - Iter, CompType, &median_of_medians_detail::partition>( - begin, mid, end, comp); -} - -template -inline void median_of_medians_select(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - median_of_medians_select(begin, mid, end, std::less()); -} - -template -inline void median_of_medians_sort(Iter begin, Iter mid, Iter end, - Compare comp) { - if (begin == mid) return; - using CompType = typename median_common_detail::CompareRefType::type; - median_common_detail::quickselect< - Iter, CompType, &median_of_medians_detail::partition>( - begin, mid - 1, end, comp); - std::sort(begin, mid, comp); -} - -template -inline void median_of_medians_sort(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - median_of_medians_sort(begin, mid, end, std::less()); -} - -} // namespace miniselect diff --git a/contrib/miniselect/include/miniselect/median_of_ninthers.h b/contrib/miniselect/include/miniselect/median_of_ninthers.h deleted file mode 100644 index 099786cf518..00000000000 --- a/contrib/miniselect/include/miniselect/median_of_ninthers.h +++ /dev/null @@ -1,190 +0,0 @@ -/* Copyright Andrei Alexandrescu, 2016-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -// Adjusted from Alexandrescu paper to support arbitrary comparators. -#pragma once - -#include -#include -#include -#include -#include - -#include "private/median_common.h" - -namespace miniselect { -namespace median_of_ninthers_detail { - -template -void adaptiveQuickselect(Iter r, size_t n, size_t length, Compare&& comp); - -/** -Median of minima -*/ -template -size_t medianOfMinima(Iter const r, const size_t n, const size_t length, - Compare&& comp) { - assert(length >= 2); - assert(n * 4 <= length); - assert(n > 0); - const size_t subset = n * 2, computeMinOver = (length - subset) / subset; - assert(computeMinOver > 0); - for (size_t i = 0, j = subset; i < subset; ++i) { - const auto limit = j + computeMinOver; - size_t minIndex = j; - while (++j < limit) - if (comp(r[j], r[minIndex])) minIndex = j; - if (comp(r[minIndex], r[i])) std::swap(r[i], r[minIndex]); - assert(j < length || i + 1 == subset); - } - adaptiveQuickselect(r, n, subset, comp); - return median_common_detail::expandPartition(r, 0, n, subset, length, comp); -} - -/** -Median of maxima -*/ -template -size_t medianOfMaxima(Iter const r, const size_t n, const size_t length, - Compare&& comp) { - assert(length >= 2); - assert(n * 4 >= length * 3 && n < length); - const size_t subset = (length - n) * 2, subsetStart = length - subset, - computeMaxOver = subsetStart / subset; - assert(computeMaxOver > 0); - for (size_t i = subsetStart, j = i - subset * computeMaxOver; i < length; - ++i) { - const auto limit = j + computeMaxOver; - size_t maxIndex = j; - while (++j < limit) - if (comp(r[maxIndex], r[j])) maxIndex = j; - if (comp(r[i], r[maxIndex])) std::swap(r[i], r[maxIndex]); - assert(j != 0 || i + 1 == length); - } - adaptiveQuickselect(r + subsetStart, length - n, subset, comp); - return median_common_detail::expandPartition(r, subsetStart, n, length, - length, comp); -} - -/** -Partitions r[0 .. length] using a pivot of its own choosing. Attempts to pick a -pivot that approximates the median. Returns the position of the pivot. -*/ -template -size_t medianOfNinthers(Iter const r, const size_t length, Compare&& comp) { - assert(length >= 12); - const auto frac = length <= 1024 - ? length / 12 - : length <= 128 * 1024 ? length / 64 : length / 1024; - auto pivot = frac / 2; - const auto lo = length / 2 - pivot, hi = lo + frac; - assert(lo >= frac * 4); - assert(length - hi >= frac * 4); - assert(lo / 2 >= pivot); - const auto gap = (length - 9 * frac) / 4; - auto a = lo - 4 * frac - gap, b = hi + gap; - for (size_t i = lo; i < hi; ++i, a += 3, b += 3) { - median_common_detail::ninther(r, a, i - frac, b, a + 1, i, b + 1, a + 2, - i + frac, b + 2, comp); - } - - adaptiveQuickselect(r + lo, pivot, frac, comp); - return median_common_detail::expandPartition(r, lo, lo + pivot, hi, length, - comp); -} - -/** -Quickselect driver for medianOfNinthers, medianOfMinima, and medianOfMaxima. -Dispathes to each depending on the relationship between n (the sought order -statistics) and length. -*/ -template -void adaptiveQuickselect(Iter r, size_t n, size_t length, Compare&& comp) { - assert(n < length); - for (;;) { - // Decide strategy for partitioning - if (n == 0) { - // That would be the max - auto pivot = n; - for (++n; n < length; ++n) - if (comp(r[n], r[pivot])) pivot = n; - std::swap(r[0], r[pivot]); - return; - } - if (n + 1 == length) { - // That would be the min - auto pivot = 0; - for (n = 1; n < length; ++n) - if (comp(r[pivot], r[n])) pivot = n; - std::swap(r[pivot], r[length - 1]); - return; - } - assert(n < length); - size_t pivot; - if (length <= 16) - pivot = median_common_detail::pivotPartition(r, n, length, comp) - r; - else if (n * 6 <= length) - pivot = medianOfMinima(r, n, length, comp); - else if (n * 6 >= length * 5) - pivot = medianOfMaxima(r, n, length, comp); - else - pivot = medianOfNinthers(r, length, comp); - - // See how the pivot fares - if (pivot == n) { - return; - } - if (pivot > n) { - length = pivot; - } else { - ++pivot; - r += pivot; - length -= pivot; - n -= pivot; - } - } -} - -} // namespace median_of_ninthers_detail - -template -inline void median_of_ninthers_select(Iter begin, Iter mid, Iter end, - Compare comp) { - if (mid == end) return; - using CompType = typename median_common_detail::CompareRefType::type; - - median_of_ninthers_detail::adaptiveQuickselect( - begin, mid - begin, end - begin, comp); -} - -template -inline void median_of_ninthers_select(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - median_of_ninthers_select(begin, mid, end, std::less()); -} - -template -inline void median_of_ninthers_sort(Iter begin, Iter mid, Iter end, - Compare comp) { - if (begin == mid) return; - using CompType = typename median_common_detail::CompareRefType::type; - - median_of_ninthers_detail::adaptiveQuickselect( - begin, mid - begin - 1, end - begin, comp); - std::sort(begin, mid, comp); -} - -template -inline void median_of_ninthers_sort(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - median_of_ninthers_sort(begin, mid, end, std::less()); -} - -} // namespace miniselect diff --git a/contrib/miniselect/include/miniselect/pdqselect.h b/contrib/miniselect/include/miniselect/pdqselect.h deleted file mode 100644 index 0a22d059103..00000000000 --- a/contrib/miniselect/include/miniselect/pdqselect.h +++ /dev/null @@ -1,935 +0,0 @@ -/* - pdqsort.h - Pattern-defeating quicksort. - - Copyright (c) 2015 Orson Peters - - This software is provided 'as-is', without any express or implied warranty. - In no event will the authors be held liable for any damages arising from the - use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software in a - product, an acknowledgment in the product documentation would be appreciated - but is not required. - - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - - 3. This notice may not be removed or altered from any source distribution. -*/ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -// Adjusted by Danila Kutenin to support pdqselect and pdqpartial_sort. - -#ifndef PDQSORT_H -#define PDQSORT_H - -#include -#include -#include -#include -#include - -#if __cplusplus >= 201103L -#include -#include -#define PDQSORT_PREFER_MOVE(x) std::move(x) -#else -#define PDQSORT_PREFER_MOVE(x) (x) -#endif - -namespace miniselect { -namespace pdqsort_detail { - -template -struct CompareRefType { - // Pass the comparator by lvalue reference. Or in debug mode, using a - // debugging wrapper that stores a reference. - using type = typename std::add_lvalue_reference::type; -}; - -enum { - // Partitions below this size are sorted using insertion sort. - insertion_sort_threshold = 24, - - // Partitions above this size use Tukey's ninther to select the pivot. - ninther_threshold = 128, - - // When we detect an already sorted partition, attempt an insertion sort that - // allows this - // amount of element moves before giving up. - partial_insertion_sort_limit = 8, - - // Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned - // char. - block_size = 64, - - // Cacheline size, assumes power of two. - cacheline_size = 64 - -}; - -#if __cplusplus >= 201103L -template -struct is_default_compare : std::false_type {}; -template -struct is_default_compare> : std::true_type {}; -template -struct is_default_compare> : std::true_type {}; -#endif - -// Returns floor(log2(n)), assumes n > 0. -template -inline int log2(T n) { - int log = 0; - while (n >>= 1) ++log; - return log; -} - -// Sorts [begin, end) using insertion sort with the given comparison function. -template -inline void insertion_sort(Iter begin, Iter end, Compare& comp) { - typedef typename std::iterator_traits::value_type T; - if (begin == end) return; - - for (Iter cur = begin + 1; cur != end; ++cur) { - Iter sift = cur; - Iter sift_1 = cur - 1; - - // Compare first so we can avoid 2 moves for an element already positioned - // correctly. - if (comp(*sift, *sift_1)) { - T tmp = PDQSORT_PREFER_MOVE(*sift); - - do { - *sift-- = PDQSORT_PREFER_MOVE(*sift_1); - } while (sift != begin && comp(tmp, *--sift_1)); - - *sift = PDQSORT_PREFER_MOVE(tmp); - } - } -} - -// Sorts [begin, end) using insertion sort with the given comparison function. -// Assumes -// *(begin - 1) is an element smaller than or equal to any element in [begin, -// end). -template -inline void unguarded_insertion_sort(Iter begin, Iter end, Compare& comp) { - typedef typename std::iterator_traits::value_type T; - if (begin == end) return; - - for (Iter cur = begin + 1; cur != end; ++cur) { - Iter sift = cur; - Iter sift_1 = cur - 1; - - // Compare first so we can avoid 2 moves for an element already positioned - // correctly. - if (comp(*sift, *sift_1)) { - T tmp = PDQSORT_PREFER_MOVE(*sift); - - do { - *sift-- = PDQSORT_PREFER_MOVE(*sift_1); - } while (comp(tmp, *--sift_1)); - - *sift = PDQSORT_PREFER_MOVE(tmp); - } - } -} - -// Attempts to use insertion sort on [begin, end). Will return false if more -// than partial_insertion_sort_limit elements were moved, and abort sorting. -// Otherwise it will successfully sort and return true. -template -inline bool partial_insertion_sort(Iter begin, Iter end, Compare& comp) { - typedef typename std::iterator_traits::value_type T; - if (begin == end) return true; - - std::size_t limit = 0; - for (Iter cur = begin + 1; cur != end; ++cur) { - Iter sift = cur; - Iter sift_1 = cur - 1; - - // Compare first so we can avoid 2 moves for an element already positioned - // correctly. - if (comp(*sift, *sift_1)) { - T tmp = PDQSORT_PREFER_MOVE(*sift); - - do { - *sift-- = PDQSORT_PREFER_MOVE(*sift_1); - } while (sift != begin && comp(tmp, *--sift_1)); - - *sift = PDQSORT_PREFER_MOVE(tmp); - limit += cur - sift; - } - - if (limit > partial_insertion_sort_limit) return false; - } - - return true; -} - -template -inline void sort2(Iter a, Iter b, Compare& comp) { - if (comp(*b, *a)) std::iter_swap(a, b); -} - -// Sorts the elements *a, *b and *c using comparison function comp. -template -inline void sort3(Iter a, Iter b, Iter c, Compare& comp) { - sort2(a, b, comp); - sort2(b, c, comp); - sort2(a, b, comp); -} - -template -inline T* align_cacheline(T* p) { -#if defined(UINTPTR_MAX) && __cplusplus >= 201103L - std::uintptr_t ip = reinterpret_cast(p); -#else - std::size_t ip = reinterpret_cast(p); -#endif - ip = (ip + cacheline_size - 1) & -cacheline_size; - return reinterpret_cast(ip); -} - -template -inline void swap_offsets(Iter first, Iter last, unsigned char* offsets_l, - unsigned char* offsets_r, int num, bool use_swaps) { - typedef typename std::iterator_traits::value_type T; - if (use_swaps) { - // This case is needed for the descending distribution, where we need - // to have proper swapping for pdqsort to remain O(n). - for (int i = 0; i < num; ++i) { - std::iter_swap(first + offsets_l[i], last - offsets_r[i]); - } - } else if (num > 0) { - Iter l = first + offsets_l[0]; - Iter r = last - offsets_r[0]; - T tmp(PDQSORT_PREFER_MOVE(*l)); - *l = PDQSORT_PREFER_MOVE(*r); - for (int i = 1; i < num; ++i) { - l = first + offsets_l[i]; - *r = PDQSORT_PREFER_MOVE(*l); - r = last - offsets_r[i]; - *l = PDQSORT_PREFER_MOVE(*r); - } - *r = PDQSORT_PREFER_MOVE(tmp); - } -} - -// Partitions [begin, end) around pivot *begin using comparison function comp. -// Elements equal to the pivot are put in the right-hand partition. Returns the -// position of the pivot after partitioning and whether the passed sequence -// already was correctly partitioned. Assumes the pivot is a median of at least -// 3 elements and that [begin, end) is at least insertion_sort_threshold long. -// Uses branchless partitioning. -template -inline std::pair partition_right_branchless(Iter begin, Iter end, - Compare& comp) { - typedef typename std::iterator_traits::value_type T; - - // Move pivot into local for speed. - T pivot(PDQSORT_PREFER_MOVE(*begin)); - Iter first = begin; - Iter last = end; - - // Find the first element greater than or equal than the pivot (the median of - // 3 guarantees this exists). - while (comp(*++first, pivot)) - ; - - // Find the first element strictly smaller than the pivot. We have to guard - // this search if there was no element before *first. - if (first - 1 == begin) - while (first < last && !comp(*--last, pivot)) - ; - else - while (!comp(*--last, pivot)) - ; - - // If the first pair of elements that should be swapped to partition are the - // same element, the passed in sequence already was correctly partitioned. - bool already_partitioned = first >= last; - if (!already_partitioned) { - std::iter_swap(first, last); - ++first; - } - - // The following branchless partitioning is derived from "BlockQuicksort: How - // Branch Mispredictions don’t affect Quicksort" by Stefan Edelkamp and Armin - // Weiss. - unsigned char offsets_l_storage[block_size + cacheline_size]; - unsigned char offsets_r_storage[block_size + cacheline_size]; - unsigned char* offsets_l = align_cacheline(offsets_l_storage); - unsigned char* offsets_r = align_cacheline(offsets_r_storage); - int num_l, num_r, start_l, start_r; - num_l = num_r = start_l = start_r = 0; - - while (last - first > 2 * block_size) { - // Fill up offset blocks with elements that are on the wrong side. - if (num_l == 0) { - start_l = 0; - Iter it = first; - for (unsigned char i = 0; i < block_size;) { - offsets_l[num_l] = i++; - num_l += !comp(*it, pivot); - ++it; - offsets_l[num_l] = i++; - num_l += !comp(*it, pivot); - ++it; - offsets_l[num_l] = i++; - num_l += !comp(*it, pivot); - ++it; - offsets_l[num_l] = i++; - num_l += !comp(*it, pivot); - ++it; - offsets_l[num_l] = i++; - num_l += !comp(*it, pivot); - ++it; - offsets_l[num_l] = i++; - num_l += !comp(*it, pivot); - ++it; - offsets_l[num_l] = i++; - num_l += !comp(*it, pivot); - ++it; - offsets_l[num_l] = i++; - num_l += !comp(*it, pivot); - ++it; - } - } - if (num_r == 0) { - start_r = 0; - Iter it = last; - for (unsigned char i = 0; i < block_size;) { - offsets_r[num_r] = ++i; - num_r += comp(*--it, pivot); - offsets_r[num_r] = ++i; - num_r += comp(*--it, pivot); - offsets_r[num_r] = ++i; - num_r += comp(*--it, pivot); - offsets_r[num_r] = ++i; - num_r += comp(*--it, pivot); - offsets_r[num_r] = ++i; - num_r += comp(*--it, pivot); - offsets_r[num_r] = ++i; - num_r += comp(*--it, pivot); - offsets_r[num_r] = ++i; - num_r += comp(*--it, pivot); - offsets_r[num_r] = ++i; - num_r += comp(*--it, pivot); - } - } - - // Swap elements and update block sizes and first/last boundaries. - int num = std::min(num_l, num_r); - swap_offsets(first, last, offsets_l + start_l, offsets_r + start_r, num, - num_l == num_r); - num_l -= num; - num_r -= num; - start_l += num; - start_r += num; - if (num_l == 0) first += block_size; - if (num_r == 0) last -= block_size; - } - - int l_size = 0, r_size = 0; - int unknown_left = (int)(last - first) - ((num_r || num_l) ? block_size : 0); - if (num_r) { - // Handle leftover block by assigning the unknown elements to the other - // block. - l_size = unknown_left; - r_size = block_size; - } else if (num_l) { - l_size = block_size; - r_size = unknown_left; - } else { - // No leftover block, split the unknown elements in two blocks. - l_size = unknown_left / 2; - r_size = unknown_left - l_size; - } - - // Fill offset buffers if needed. - if (unknown_left && !num_l) { - start_l = 0; - Iter it = first; - for (unsigned char i = 0; i < l_size;) { - offsets_l[num_l] = i++; - num_l += !comp(*it, pivot); - ++it; - } - } - if (unknown_left && !num_r) { - start_r = 0; - Iter it = last; - for (unsigned char i = 0; i < r_size;) { - offsets_r[num_r] = ++i; - num_r += comp(*--it, pivot); - } - } - - int num = std::min(num_l, num_r); - swap_offsets(first, last, offsets_l + start_l, offsets_r + start_r, num, - num_l == num_r); - num_l -= num; - num_r -= num; - start_l += num; - start_r += num; - if (num_l == 0) first += l_size; - if (num_r == 0) last -= r_size; - - // We have now fully identified [first, last)'s proper position. Swap the last - // elements. - if (num_l) { - offsets_l += start_l; - while (num_l--) std::iter_swap(first + offsets_l[num_l], --last); - first = last; - } - if (num_r) { - offsets_r += start_r; - while (num_r--) std::iter_swap(last - offsets_r[num_r], first), ++first; - last = first; - } - - // Put the pivot in the right place. - Iter pivot_pos = first - 1; - *begin = PDQSORT_PREFER_MOVE(*pivot_pos); - *pivot_pos = PDQSORT_PREFER_MOVE(pivot); - - return std::make_pair(pivot_pos, already_partitioned); -} - -// Partitions [begin, end) around pivot *begin using comparison function comp. -// Elements equal to the pivot are put in the right-hand partition. Returns the -// position of the pivot after partitioning and whether the passed sequence -// already was correctly partitioned. Assumes the pivot is a median of at least -// 3 elements and that [begin, end) is at least insertion_sort_threshold long. -template -inline std::pair partition_right(Iter begin, Iter end, - Compare& comp) { - typedef typename std::iterator_traits::value_type T; - - // Move pivot into local for speed. - T pivot(PDQSORT_PREFER_MOVE(*begin)); - - Iter first = begin; - Iter last = end; - - // Find the first element greater than or equal than the pivot (the median of - // 3 guarantees this exists). - while (comp(*++first, pivot)) - ; - - // Find the first element strictly smaller than the pivot. We have to guard - // this search if there was no element before *first. - if (first - 1 == begin) - while (first < last && !comp(*--last, pivot)) - ; - else - while (!comp(*--last, pivot)) - ; - - // If the first pair of elements that should be swapped to partition are the - // same element, the passed in sequence already was correctly partitioned. - bool already_partitioned = first >= last; - - // Keep swapping pairs of elements that are on the wrong side of the pivot. - // Previously swapped pairs guard the searches, which is why the first - // iteration is special-cased above. - while (first < last) { - std::iter_swap(first, last); - while (comp(*++first, pivot)) - ; - while (!comp(*--last, pivot)) - ; - } - - // Put the pivot in the right place. - Iter pivot_pos = first - 1; - *begin = PDQSORT_PREFER_MOVE(*pivot_pos); - *pivot_pos = PDQSORT_PREFER_MOVE(pivot); - - return std::make_pair(pivot_pos, already_partitioned); -} - -// Similar function to the one above, except elements equal to the pivot are put -// to the left of the pivot and it doesn't check or return if the passed -// sequence already was partitioned. Since this is rarely used (the many equal -// case), and in that case pdqsort already has O(n) performance, no block -// quicksort is applied here for simplicity. -template -inline Iter partition_left(Iter begin, Iter end, Compare& comp) { - typedef typename std::iterator_traits::value_type T; - - T pivot(PDQSORT_PREFER_MOVE(*begin)); - Iter first = begin; - Iter last = end; - - while (comp(pivot, *--last)) - ; - - if (last + 1 == end) - while (first < last && !comp(pivot, *++first)) - ; - else - while (!comp(pivot, *++first)) - ; - - while (first < last) { - std::iter_swap(first, last); - while (comp(pivot, *--last)) - ; - while (!comp(pivot, *++first)) - ; - } - - Iter pivot_pos = last; - *begin = PDQSORT_PREFER_MOVE(*pivot_pos); - *pivot_pos = PDQSORT_PREFER_MOVE(pivot); - - return pivot_pos; -} - -template -inline void pdqsort_loop(Iter begin, Iter end, Compare& comp, int bad_allowed, - bool leftmost = true) { - typedef typename std::iterator_traits::difference_type diff_t; - - // Use a while loop for tail recursion elimination. - while (true) { - diff_t size = end - begin; - - // Insertion sort is faster for small arrays. - if (size < insertion_sort_threshold) { - if (leftmost) - insertion_sort(begin, end, comp); - else - unguarded_insertion_sort(begin, end, comp); - return; - } - - // Choose pivot as median of 3 or pseudomedian of 9. - diff_t s2 = size / 2; - if (size > ninther_threshold) { - sort3(begin, begin + s2, end - 1, comp); - sort3(begin + 1, begin + (s2 - 1), end - 2, comp); - sort3(begin + 2, begin + (s2 + 1), end - 3, comp); - sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp); - std::iter_swap(begin, begin + s2); - } else - sort3(begin + s2, begin, end - 1, comp); - - // If *(begin - 1) is the end of the right partition of a previous partition - // operation there is no element in [begin, end) that is smaller than - // *(begin - 1). Then if our pivot compares equal to *(begin - 1) we change - // strategy, putting equal elements in the left partition, greater elements - // in the right partition. We do not have to recurse on the left partition, - // since it's sorted (all equal). - if (!leftmost && !comp(*(begin - 1), *begin)) { - begin = partition_left(begin, end, comp) + 1; - continue; - } - - // Partition and get results. - std::pair part_result = - Branchless ? partition_right_branchless(begin, end, comp) - : partition_right(begin, end, comp); - Iter pivot_pos = part_result.first; - bool already_partitioned = part_result.second; - - // Check for a highly unbalanced partition. - diff_t l_size = pivot_pos - begin; - diff_t r_size = end - (pivot_pos + 1); - bool highly_unbalanced = l_size < size / 8 || r_size < size / 8; - - // If we got a highly unbalanced partition we shuffle elements to break many - // patterns. - if (highly_unbalanced) { - // If we had too many bad partitions, switch to heapsort to guarantee O(n - // log n). - if (--bad_allowed == 0) { - std::make_heap(begin, end, comp); - std::sort_heap(begin, end, comp); - return; - } - - if (l_size >= insertion_sort_threshold) { - std::iter_swap(begin, begin + l_size / 4); - std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4); - - if (l_size > ninther_threshold) { - std::iter_swap(begin + 1, begin + (l_size / 4 + 1)); - std::iter_swap(begin + 2, begin + (l_size / 4 + 2)); - std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1)); - std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2)); - } - } - - if (r_size >= insertion_sort_threshold) { - std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4)); - std::iter_swap(end - 1, end - r_size / 4); - - if (r_size > ninther_threshold) { - std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4)); - std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4)); - std::iter_swap(end - 2, end - (1 + r_size / 4)); - std::iter_swap(end - 3, end - (2 + r_size / 4)); - } - } - } else { - // If we were decently balanced and we tried to sort an already - // partitioned sequence try to use insertion sort. - if (already_partitioned && - partial_insertion_sort(begin, pivot_pos, comp) && - partial_insertion_sort(pivot_pos + 1, end, comp)) - return; - } - - // Sort the left partition first using recursion and do tail recursion - // elimination for the right-hand partition. - pdqsort_loop(begin, pivot_pos, comp, bad_allowed, - leftmost); - begin = pivot_pos + 1; - leftmost = false; - } -} - -template -inline void pdqpartial_sort_loop(Iter begin, Iter mid, Iter end, Compare& comp, - int bad_allowed, bool leftmost = true) { - typedef typename std::iterator_traits::difference_type diff_t; - - // Use a while loop for tail recursion elimination. - while (true) { - diff_t size = end - begin; - - // Insertion sort is faster for small arrays. - if (size < insertion_sort_threshold) { - if (leftmost) - insertion_sort(begin, end, comp); - else - unguarded_insertion_sort(begin, end, comp); - return; - } - - // Choose pivot as median of 3 or pseudomedian of 9. - diff_t s2 = size / 2; - if (size > ninther_threshold) { - sort3(begin, begin + s2, end - 1, comp); - sort3(begin + 1, begin + (s2 - 1), end - 2, comp); - sort3(begin + 2, begin + (s2 + 1), end - 3, comp); - sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp); - std::iter_swap(begin, begin + s2); - } else - sort3(begin + s2, begin, end - 1, comp); - - // If *(begin - 1) is the end of the right partition of a previous partition - // operation there is no element in [begin, end) that is smaller than - // *(begin - 1). Then if our pivot compares equal to *(begin - 1) we change - // strategy, putting equal elements in the left partition, greater elements - // in the right partition. We do not have to recurse on the left partition, - // since it's sorted (all equal). - if (!leftmost && !comp(*(begin - 1), *begin)) { - begin = partition_left(begin, end, comp) + 1; - continue; - } - - // Partition and get results. - std::pair part_result = - Branchless ? partition_right_branchless(begin, end, comp) - : partition_right(begin, end, comp); - Iter pivot_pos = part_result.first; - bool already_partitioned = part_result.second; - - // Check for a highly unbalanced partition. - diff_t l_size = pivot_pos - begin; - diff_t r_size = end - (pivot_pos + 1); - bool highly_unbalanced = l_size < size / 8 || r_size < size / 8; - - // If we got a highly unbalanced partition we shuffle elements to break many - // patterns. - if (highly_unbalanced) { - // If we had too many bad partitions, switch to heapsort to guarantee O(n - // log n). - if (--bad_allowed == 0) { - std::make_heap(begin, end, comp); - std::sort_heap(begin, end, comp); - return; - } - - if (l_size >= insertion_sort_threshold) { - std::iter_swap(begin, begin + l_size / 4); - std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4); - - if (l_size > ninther_threshold) { - std::iter_swap(begin + 1, begin + (l_size / 4 + 1)); - std::iter_swap(begin + 2, begin + (l_size / 4 + 2)); - std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1)); - std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2)); - } - } - - if (r_size >= insertion_sort_threshold) { - std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4)); - std::iter_swap(end - 1, end - r_size / 4); - - if (r_size > ninther_threshold) { - std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4)); - std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4)); - std::iter_swap(end - 2, end - (1 + r_size / 4)); - std::iter_swap(end - 3, end - (2 + r_size / 4)); - } - } - } else { - // If we were decently balanced and we tried to sort an already - // partitioned sequence try to use insertion sort. - if (already_partitioned && - partial_insertion_sort(begin, pivot_pos, comp) && - partial_insertion_sort(pivot_pos + 1, end, comp)) - return; - } - - // Sort the left partition first using recursion and do tail recursion - // elimination for the right-hand partition. - if (pivot_pos < mid) { - pdqsort_loop(begin, pivot_pos, comp, - bad_allowed, leftmost); - begin = pivot_pos + 1; - leftmost = false; - } else { - end = pivot_pos; - } - } -} - -template -inline void pdqselect_loop(Iter begin, Iter mid, Iter end, Compare& comp, - int bad_allowed, bool leftmost = true) { - typedef typename std::iterator_traits::difference_type diff_t; - - // Use a while loop for tail recursion elimination. - while (true) { - diff_t size = end - begin; - - // Insertion sort is faster for small arrays. - if (size < insertion_sort_threshold) { - if (leftmost) - insertion_sort(begin, end, comp); - else - unguarded_insertion_sort(begin, end, comp); - return; - } - - // Choose pivot as median of 3 or pseudomedian of 9. - diff_t s2 = size / 2; - if (size > ninther_threshold) { - sort3(begin, begin + s2, end - 1, comp); - sort3(begin + 1, begin + (s2 - 1), end - 2, comp); - sort3(begin + 2, begin + (s2 + 1), end - 3, comp); - sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp); - std::iter_swap(begin, begin + s2); - } else - sort3(begin + s2, begin, end - 1, comp); - - // If *(begin - 1) is the end of the right partition of a previous partition - // operation there is no element in [begin, end) that is smaller than - // *(begin - 1). Then if our pivot compares equal to *(begin - 1) we change - // strategy, putting equal elements in the left partition, greater elements - // in the right partition. We do not have to recurse on the left partition, - // since it's sorted (all equal). - if (!leftmost && !comp(*(begin - 1), *begin)) { - begin = partition_left(begin, end, comp) + 1; - continue; - } - - // Partition and get results. - std::pair part_result = - Branchless ? partition_right_branchless(begin, end, comp) - : partition_right(begin, end, comp); - Iter pivot_pos = part_result.first; - bool already_partitioned = part_result.second; - - // Check for a highly unbalanced partition. - diff_t l_size = pivot_pos - begin; - diff_t r_size = end - (pivot_pos + 1); - bool highly_unbalanced = l_size < size / 8 || r_size < size / 8; - - // If we got a highly unbalanced partition we shuffle elements to break many - // patterns. - if (highly_unbalanced) { - // If we had too many bad partitions, switch to heapsort to guarantee O(n - // log n). - if (--bad_allowed == 0) { - std::nth_element(begin, mid, end, comp); - return; - } - - if (l_size >= insertion_sort_threshold) { - std::iter_swap(begin, begin + l_size / 4); - std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4); - - if (l_size > ninther_threshold) { - std::iter_swap(begin + 1, begin + (l_size / 4 + 1)); - std::iter_swap(begin + 2, begin + (l_size / 4 + 2)); - std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1)); - std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2)); - } - } - - if (r_size >= insertion_sort_threshold) { - std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4)); - std::iter_swap(end - 1, end - r_size / 4); - - if (r_size > ninther_threshold) { - std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4)); - std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4)); - std::iter_swap(end - 2, end - (1 + r_size / 4)); - std::iter_swap(end - 3, end - (2 + r_size / 4)); - } - } - } else { - // If we were decently balanced and we tried to sort an already - // partitioned sequence try to use insertion sort. - if (already_partitioned && - partial_insertion_sort(begin, pivot_pos, comp) && - partial_insertion_sort(pivot_pos + 1, end, comp)) - return; - } - // Sort the left partition first using recursion and do tail recursion - // elimination for the right-hand partition. - if (pivot_pos < mid) { - begin = pivot_pos + 1; - leftmost = false; - } else { - end = pivot_pos; - } - } -} -} // namespace pdqsort_detail - -template -inline void pdqsort(Iter begin, Iter end, Compare comp) { - if (begin == end) return; - -#if __cplusplus >= 201103L - pdqsort_detail::pdqsort_loop< - Iter, Compare, - pdqsort_detail::is_default_compare< - typename std::decay::type>::value && - std::is_arithmetic< - typename std::iterator_traits::value_type>::value>( - begin, end, comp, pdqsort_detail::log2(end - begin)); -#else - pdqsort_detail::pdqsort_loop( - begin, end, comp, pdqsort_detail::log2(end - begin)); -#endif -} - -template -inline void pdqsort(Iter begin, Iter end) { - typedef typename std::iterator_traits::value_type T; - pdqsort(begin, end, std::less()); -} - -template -inline void pdqsort_branchless(Iter begin, Iter end, Compare comp) { - if (begin == end) return; - pdqsort_detail::pdqsort_loop( - begin, end, comp, pdqsort_detail::log2(end - begin)); -} - -template -inline void pdqsort_branchless(Iter begin, Iter end) { - typedef typename std::iterator_traits::value_type T; - pdqsort_branchless(begin, end, std::less()); -} - -template -inline void pdqpartial_sort(Iter begin, Iter mid, Iter end, Compare comp) { - if (begin == end) return; - -#if __cplusplus >= 201103L - pdqsort_detail::pdqpartial_sort_loop< - Iter, Compare, - pdqsort_detail::is_default_compare< - typename std::decay::type>::value && - std::is_arithmetic< - typename std::iterator_traits::value_type>::value>( - begin, mid, end, comp, pdqsort_detail::log2(end - begin)); -#else - pdqsort_detail::pdqpartial_sort_loop( - begin, end, comp, pdqsort_detail::log2(end - begin)); -#endif -} - -template -inline void pdqpartial_sort(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - pdqpartial_sort(begin, mid, end, std::less()); -} - -template -inline void pdqpartial_sort_branchless(Iter begin, Iter mid, Iter end, - Compare comp) { - if (begin == end) return; - pdqsort_detail::pdqpartial_sort_loop( - begin, mid, end, comp, pdqsort_detail::log2(end - begin)); -} - -template -inline void pdqpartial_sort_branchless(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - pdqpartial_sort_branchless(begin, mid, end, std::less()); -} - -template -inline void pdqselect(Iter begin, Iter mid, Iter end, Compare comp) { - if (mid == end) return; - using CompType = typename median_common_detail::CompareRefType::type; - -#if __cplusplus >= 201103L - pdqsort_detail::pdqselect_loop< - Iter, CompType, - pdqsort_detail::is_default_compare< - typename std::decay::type>::value && - std::is_arithmetic< - typename std::iterator_traits::value_type>::value>( - begin, mid, end, comp, pdqsort_detail::log2(end - begin)); -#else - pdqsort_detail::pdqselect_loop( - begin, end, comp, pdqsort_detail::log2(end - begin)); -#endif -} - -template -inline void pdqselect(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - pdqselect(begin, mid, end, std::less()); -} - -template -inline void pdqselect_branchless(Iter begin, Iter mid, Iter end, Compare comp) { - if (mid == end) return; - using CompType = typename median_common_detail::CompareRefType::type; - pdqsort_detail::pdqselect_loop( - begin, mid, end, comp, pdqsort_detail::log2(end - begin)); -} - -template -inline void pdqselect_branchless(Iter begin, Iter mid, Iter end) { - typedef typename std::iterator_traits::value_type T; - pdqselect_branchless(begin, mid, end, std::less()); -} - -#undef PDQSORT_PREFER_MOVE - -#endif - -} // namespace miniselect diff --git a/contrib/miniselect/include/miniselect/private/median_common.h b/contrib/miniselect/include/miniselect/private/median_common.h deleted file mode 100644 index 30cb1323bbf..00000000000 --- a/contrib/miniselect/include/miniselect/private/median_common.h +++ /dev/null @@ -1,437 +0,0 @@ -/* Copyright Andrei Alexandrescu, 2016-, - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -#pragma once - -#include -#include -#include - -namespace miniselect { -namespace median_common_detail { - -template -struct CompareRefType { - // Pass the comparator by lvalue reference. Or in debug mode, using a - // debugging wrapper that stores a reference. - using type = typename std::add_lvalue_reference::type; -}; -/** -Swaps the median of r[a], r[b], and r[c] into r[b]. -*/ -template -void median3(Iter r, size_t a, size_t b, size_t c, Compare&& comp) { - if (comp(r[b], r[a])) // b < a - { - if (comp(r[b], r[c])) // b < a, b < c - { - if (comp(r[c], r[a])) // b < c < a - std::swap(r[b], r[c]); - else // b < a <= c - std::swap(r[b], r[a]); - } - } else if (comp(r[c], r[b])) // a <= b, c < b - { - if (comp(r[c], r[a])) // c < a <= b - std::swap(r[b], r[a]); - else // a <= c < b - std::swap(r[b], r[c]); - } -} - -/** -Sorts in place r[a], r[b], and r[c]. -*/ -template -void sort3(Iter r, size_t a, size_t b, size_t c, Compare&& comp) { - typedef typename std::iterator_traits::value_type T; - if (comp(r[b], r[a])) // b < a - { - if (comp(r[c], r[b])) // c < b < a - { - std::swap(r[a], r[c]); // a < b < c - } else // b < a, b <= c - { - T t = std::move(r[a]); - r[a] = std::move(r[b]); - if (comp(r[c], t)) // b <= c < a - { - r[b] = std::move(r[c]); - r[c] = std::move(t); - } else // b < a <= c - { - r[b] = std::move(t); - } - } - } else if (comp(r[c], r[b])) // a <= b, c < b - { - T t = std::move(r[c]); - r[c] = std::move(r[b]); - if (comp(t, r[a])) // c < a < b - { - r[b] = std::move(r[a]); - r[a] = std::move(t); - } else // a <= c < b - { - r[b] = std::move(t); - } - } - - assert(!comp(r[b], r[a]) && !comp(r[c], r[b])); -} - -/** -If leanRight == false, swaps the lower median of r[a]...r[d] into r[b] and -the minimum into r[a]. If leanRight == true, swaps the upper median of -r[a]...r[d] into r[c] and the minimum into r[d]. -*/ -template -void partition4(Iter r, size_t a, size_t b, size_t c, size_t d, - Compare&& comp) { - assert(a != b && a != c && a != d && b != c && b != d && c != d); - /* static */ if (leanRight) { - // In the median of 5 algorithm, consider r[e] infinite - if (comp(r[c], r[a])) { - std::swap(r[a], r[c]); - } // a <= c - if (comp(r[d], r[b])) { - std::swap(r[b], r[d]); - } // a <= c, b <= d - if (comp(r[d], r[c])) { - std::swap(r[c], r[d]); // a <= d, b <= c < d - std::swap(r[a], r[b]); // b <= d, a <= c < d - } // a <= c <= d, b <= d - if (comp(r[c], r[b])) { // a <= c <= d, c < b <= d - std::swap(r[b], r[c]); // a <= b <= c <= d - } // a <= b <= c <= d - } else { - // In the median of 5 algorithm consider r[a] infinitely small, then - // change b->a. c->b, d->c, e->d - if (comp(r[c], r[a])) { - std::swap(r[a], r[c]); - } - if (comp(r[c], r[b])) { - std::swap(r[b], r[c]); - } - if (comp(r[d], r[a])) { - std::swap(r[a], r[d]); - } - if (comp(r[d], r[b])) { - std::swap(r[b], r[d]); - } else { - if (comp(r[b], r[a])) { - std::swap(r[a], r[b]); - } - } - } -} - -/** -Places the median of r[a]...r[e] in r[c] and partitions the other elements -around it. -*/ -template -void partition5(Iter r, size_t a, size_t b, size_t c, size_t d, size_t e, - Compare&& comp) { - assert(a != b && a != c && a != d && a != e && b != c && b != d && b != e && - c != d && c != e && d != e); - if (comp(r[c], r[a])) { - std::swap(r[a], r[c]); - } - if (comp(r[d], r[b])) { - std::swap(r[b], r[d]); - } - if (comp(r[d], r[c])) { - std::swap(r[c], r[d]); - std::swap(r[a], r[b]); - } - if (comp(r[e], r[b])) { - std::swap(r[b], r[e]); - } - if (comp(r[e], r[c])) { - std::swap(r[c], r[e]); - if (comp(r[c], r[a])) { - std::swap(r[a], r[c]); - } - } else { - if (comp(r[c], r[b])) { - std::swap(r[b], r[c]); - } - } -} - -/** -Implements Hoare partition. -*/ -template -Iter pivotPartition(Iter r, size_t k, size_t length, Compare&& comp) { - assert(k < length); - std::swap(*r, r[k]); - size_t lo = 1, hi = length - 1; - for (;; ++lo, --hi) { - for (;; ++lo) { - if (lo > hi) goto loop_done; - if (!comp(r[lo], *r)) break; - } - // found the left bound: r[lo] >= r[0] - assert(lo <= hi); - for (; comp(*r, r[hi]); --hi) { - } - if (lo >= hi) break; - // found the right bound: r[hi] <= r[0], swap & make progress - std::swap(r[lo], r[hi]); - } -loop_done: - --lo; - std::swap(r[lo], *r); - return r + lo; -} - -/** -Implements the quickselect algorithm, parameterized with a partition function. -*/ -template -void quickselect(Iter r, Iter mid, Iter end, Compare&& comp) { - if (r == end || mid >= end) return; - assert(r <= mid && mid < end); - for (;;) switch (end - r) { - case 1: - return; - case 2: - if (comp(r[1], *r)) std::swap(*r, r[1]); - return; - case 3: - sort3(r, 0, 1, 2, comp); - return; - case 4: - switch (mid - r) { - case 0: - goto select_min; - case 1: - partition4(r, 0, 1, 2, 3, comp); - break; - case 2: - partition4(r, 0, 1, 2, 3, comp); - break; - case 3: - goto select_max; - default: - assert(false); - } - return; - default: - assert(end - r > 4); - if (r == mid) { - select_min: - auto pivot = r; - for (++mid; mid < end; ++mid) - if (comp(*mid, *pivot)) pivot = mid; - std::swap(*r, *pivot); - return; - } - if (mid + 1 == end) { - select_max: - auto pivot = r; - for (mid = r + 1; mid < end; ++mid) - if (comp(*pivot, *mid)) pivot = mid; - std::swap(*pivot, end[-1]); - return; - } - auto pivot = partition(r, end, comp); - if (pivot == mid) return; - if (mid < pivot) { - end = pivot; - } else { - r = pivot + 1; - } - } -} - -/** -Returns the index of the median of r[a], r[b], and r[c] without writing -anything. -*/ -template -size_t medianIndex(const Iter r, size_t a, size_t b, size_t c, Compare&& comp) { - if (r[a] > r[c]) std::swap(a, c); - if (r[b] > r[c]) return c; - if (comp(r[b], r[a])) return a; - return b; -} - -/** -Returns the index of the median of r[a], r[b], r[c], and r[d] without writing -anything. If leanRight is true, computes the upper median. Otherwise, conputes -the lower median. -*/ -template -static size_t medianIndex(Iter r, size_t a, size_t b, size_t c, size_t d, - Compare&& comp) { - if (comp(r[d], r[c])) std::swap(c, d); - assert(r[c] <= r[d]); - /* static */ if (leanRight) { - if (comp(r[c], r[a])) { - assert(comp(r[c], r[a]) && !comp(r[d], r[c])); // so r[c]) is out - return medianIndex(r, a, b, d, comp); - } - } else { - if (!comp(r[d], r[a])) { - return medianIndex(r, a, b, c, comp); - } - } - // Could return medianIndex(r, b, c, d) but we already know r[c] <= r[d] - if (!comp(r[c], r[b])) return c; - if (comp(r[d], r[b])) return d; - return b; -} - -/** -Tukey's Ninther: compute the median of r[_1], r[_2], r[_3], then the median of -r[_4], r[_5], r[_6], then the median of r[_7], r[_8], r[_9], and then swap the -median of those three medians into r[_5]. -*/ -template -void ninther(Iter r, size_t _1, size_t _2, size_t _3, size_t _4, size_t _5, - size_t _6, size_t _7, size_t _8, size_t _9, Compare&& comp) { - _2 = medianIndex(r, _1, _2, _3, comp); - _8 = medianIndex(r, _7, _8, _9, comp); - if (comp(r[_8], r[_2])) std::swap(_2, _8); - if (comp(r[_6], r[_4])) std::swap(_4, _6); - // Here we know that r[_2] and r[_8] are the other two medians and that - // r[_2] <= r[_8]. We also know that r[_4] <= r[_6] - if (comp(r[_5], r[_4])) { - // r[_4] is the median of r[_4], r[_5], r[_6] - } else if (comp(r[_6], r[_5])) { - // r[_6] is the median of r[_4], r[_5], r[_6] - _4 = _6; - } else { - // Here we know r[_5] is the median of r[_4], r[_5], r[_6] - if (comp(r[_5], r[_2])) return std::swap(r[_5], r[_2]); - if (comp(r[_8], r[_5])) return std::swap(r[_5], r[_8]); - // This is the only path that returns with no swap - return; - } - // Here we know r[_4] is the median of r[_4], r[_5], r[_6] - if (comp(r[_4], r[_2])) - _4 = _2; - else if (comp(r[_8], r[_4])) - _4 = _8; - std::swap(r[_5], r[_4]); -} - -/** -Input assumptions: -(a) hi <= rite -(c) the range r[0 .. hi] contains elements no smaller than r[0] -Output guarantee: same as Hoare partition using r[0] as pivot. Returns the new -position of the pivot. -*/ -template -size_t expandPartitionRight(Iter r, size_t hi, size_t rite, Compare&& comp) { - size_t pivot = 0; - assert(pivot <= hi); - assert(hi <= rite); - // First loop: spend r[pivot .. hi] - for (; pivot < hi; --rite) { - if (rite == hi) goto done; - if (!comp(r[rite], r[0])) continue; - ++pivot; - std::swap(r[rite], r[pivot]); - } - // Second loop: make left and pivot meet - for (; rite > pivot; --rite) { - if (!comp(r[rite], r[0])) continue; - while (rite > pivot) { - ++pivot; - if (comp(r[0], r[pivot])) { - std::swap(r[rite], r[pivot]); - break; - } - } - } - -done: - std::swap(r[0], r[pivot]); - return pivot; -} - -/** -Input assumptions: -(a) lo > 0, lo <= pivot -(b) the range r[lo .. pivot] already contains elements no greater than r[pivot] -Output guarantee: Same as Hoare partition around r[pivot]. Returns the new -position of the pivot. -*/ -template -size_t expandPartitionLeft(Iter r, size_t lo, size_t pivot, Compare&& comp) { - assert(lo > 0 && lo <= pivot); - size_t left = 0; - const auto oldPivot = pivot; - for (; lo < pivot; ++left) { - if (left == lo) goto done; - if (!comp(r[oldPivot], r[left])) continue; - --pivot; - std::swap(r[left], r[pivot]); - } - // Second loop: make left and pivot meet - for (;; ++left) { - if (left == pivot) break; - if (!comp(r[oldPivot], r[left])) continue; - for (;;) { - if (left == pivot) goto done; - --pivot; - if (comp(r[pivot], r[oldPivot])) { - std::swap(r[left], r[pivot]); - break; - } - } - } - -done: - std::swap(r[oldPivot], r[pivot]); - return pivot; -} - -/** -Input assumptions: -(a) lo <= pivot, pivot < hi, hi <= length -(b) the range r[lo .. pivot] already contains elements no greater than -r[pivot] -(c) the range r[pivot .. hi] already contains elements no smaller than -r[pivot] -Output guarantee: Same as Hoare partition around r[pivot], returning the new -position of the pivot. -*/ -template -size_t expandPartition(Iter r, size_t lo, size_t pivot, size_t hi, - size_t length, Compare&& comp) { - assert(lo <= pivot && pivot < hi && hi <= length); - --hi; - --length; - size_t left = 0; - for (;; ++left, --length) { - for (;; ++left) { - if (left == lo) - return pivot + expandPartitionRight(r + pivot, hi - pivot, - length - pivot, comp); - if (comp(r[pivot], r[left])) break; - } - for (;; --length) { - if (length == hi) - return left + - expandPartitionLeft(r + left, lo - left, pivot - left, comp); - if (!comp(r[pivot], r[length])) break; - } - std::swap(r[left], r[length]); - } -} - -} // namespace median_common_detail -} // namespace miniselect diff --git a/contrib/miniselect/testing/test_common.h b/contrib/miniselect/testing/test_common.h deleted file mode 100644 index df0c179c840..00000000000 --- a/contrib/miniselect/testing/test_common.h +++ /dev/null @@ -1,180 +0,0 @@ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -#pragma once - -#include - -#include - -#include "miniselect/floyd_rivest_select.h" -#include "miniselect/median_of_3_random.h" -#include "miniselect/median_of_medians.h" -#include "miniselect/median_of_ninthers.h" -#include "miniselect/pdqselect.h" - -namespace miniselect { -namespace algorithms { - -struct STD { - template - static void Sort(Iter begin, Iter mid, Iter end, Compare&& comp) { - std::partial_sort(begin, mid, end, std::move(comp)); - } - - template - static void Sort(Iter begin, Iter mid, Iter end) { - std::partial_sort(begin, mid, end); - } - - template - static void Select(Iter begin, Iter mid, Iter end, Compare&& comp) { - std::nth_element(begin, mid, end, std::move(comp)); - } - - template - static void Select(Iter begin, Iter mid, Iter end) { - std::nth_element(begin, mid, end); - } -}; - -struct PDQ { - template - static void Sort(Iter begin, Iter mid, Iter end, Compare&& comp) { - pdqpartial_sort(begin, mid, end, std::move(comp)); - } - - template - static void Sort(Iter begin, Iter mid, Iter end) { - pdqpartial_sort(begin, mid, end); - } - - template - static void Select(Iter begin, Iter mid, Iter end, Compare&& comp) { - pdqselect(begin, mid, end, std::move(comp)); - } - - template - static void Select(Iter begin, Iter mid, Iter end) { - pdqselect(begin, mid, end); - } -}; - -struct PDQBranchless { - template - static void Sort(Iter begin, Iter mid, Iter end, Compare&& comp) { - pdqpartial_sort_branchless(begin, mid, end, std::move(comp)); - } - - template - static void Sort(Iter begin, Iter mid, Iter end) { - pdqpartial_sort_branchless(begin, mid, end); - } - - template - static void Select(Iter begin, Iter mid, Iter end, Compare&& comp) { - pdqselect_branchless(begin, mid, end, std::move(comp)); - } - - template - static void Select(Iter begin, Iter mid, Iter end) { - pdqselect_branchless(begin, mid, end); - } -}; - -struct FloydRivest { - template - static void Sort(Iter begin, Iter mid, Iter end, Compare&& comp) { - floyd_rivest_partial_sort(begin, mid, end, std::move(comp)); - } - - template - static void Sort(Iter begin, Iter mid, Iter end) { - floyd_rivest_partial_sort(begin, mid, end); - } - - template - static void Select(Iter begin, Iter mid, Iter end, Compare&& comp) { - floyd_rivest_select(begin, mid, end, std::move(comp)); - } - - template - static void Select(Iter begin, Iter mid, Iter end) { - floyd_rivest_select(begin, mid, end); - } -}; - -struct MedianOfNinthers { - template - static void Sort(Iter begin, Iter mid, Iter end, Compare&& comp) { - median_of_ninthers_sort(begin, mid, end, std::move(comp)); - } - - template - static void Sort(Iter begin, Iter mid, Iter end) { - median_of_ninthers_sort(begin, mid, end); - } - - template - static void Select(Iter begin, Iter mid, Iter end, Compare&& comp) { - median_of_ninthers_select(begin, mid, end, std::move(comp)); - } - - template - static void Select(Iter begin, Iter mid, Iter end) { - median_of_ninthers_select(begin, mid, end); - } -}; - -struct MedianOfMedians { - template - static void Sort(Iter begin, Iter mid, Iter end, Compare&& comp) { - median_of_medians_sort(begin, mid, end, std::move(comp)); - } - - template - static void Sort(Iter begin, Iter mid, Iter end) { - median_of_medians_sort(begin, mid, end); - } - - template - static void Select(Iter begin, Iter mid, Iter end, Compare&& comp) { - median_of_medians_select(begin, mid, end, std::move(comp)); - } - - template - static void Select(Iter begin, Iter mid, Iter end) { - median_of_medians_select(begin, mid, end); - } -}; - -struct MedianOf3Random { - template - static void Sort(Iter begin, Iter mid, Iter end, Compare&& comp) { - median_of_3_random_sort(begin, mid, end, std::move(comp)); - } - - template - static void Sort(Iter begin, Iter mid, Iter end) { - median_of_3_random_sort(begin, mid, end); - } - - template - static void Select(Iter begin, Iter mid, Iter end, Compare&& comp) { - median_of_3_random_select(begin, mid, end, std::move(comp)); - } - - template - static void Select(Iter begin, Iter mid, Iter end) { - median_of_3_random_select(begin, mid, end); - } -}; - -using All = - ::testing::Types; - -} // namespace algorithms -} // namespace miniselect diff --git a/contrib/miniselect/testing/test_select.cpp b/contrib/miniselect/testing/test_select.cpp deleted file mode 100644 index 9b8e9dce970..00000000000 --- a/contrib/miniselect/testing/test_select.cpp +++ /dev/null @@ -1,231 +0,0 @@ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -#include -#include - -#include -#include -#include -#include -#include - -#include "test_common.h" - -using ::testing::Eq; - -namespace miniselect { -namespace { - -struct IndirectLess { - // Non const comparator with deleted copy. - template - bool operator()(const P &x, const P &y) const { - return *x < *y; - } - IndirectLess(const IndirectLess &) = default; - IndirectLess &operator=(const IndirectLess &) = default; - IndirectLess(IndirectLess &&) = default; - IndirectLess &operator=(IndirectLess &&) = default; -}; - -template -class SelectTest : public ::testing::Test { - public: - using Base = Selector; - - static void TestSelects(size_t N, size_t M) { - ASSERT_NE(N, 0); - ASSERT_GT(N, M); - SCOPED_TRACE(N); - SCOPED_TRACE(M); - std::vector array(N); - for (size_t i = 0; i < N; ++i) { - array[i] = i; - } - auto array_smaller = array; - std::mt19937_64 mersenne_engine; - std::shuffle(array.begin(), array.end(), mersenne_engine); - Selector::Select(array.begin(), array.begin() + M, array.end(), - std::greater()); - EXPECT_EQ(array[M], N - M - 1); - for (size_t i = 0; i < M; ++i) { - EXPECT_GE(array[i], array[M]); - } - for (size_t i = M; i < N; ++i) { - EXPECT_LE(array[i], array[M]); - } - std::shuffle(array_smaller.begin(), array_smaller.end(), mersenne_engine); - Selector::Select(array_smaller.begin(), array_smaller.begin() + M, - array_smaller.end()); - EXPECT_EQ(array_smaller[M], M); - for (size_t i = 0; i < M; ++i) { - EXPECT_LE(array_smaller[i], array_smaller[M]); - } - for (size_t i = M; i < N; ++i) { - EXPECT_GE(array_smaller[i], array_smaller[M]); - } - } - - static void TestSelects(size_t N) { - TestSelects(N, 0); - TestSelects(N, 1); - TestSelects(N, 2); - TestSelects(N, 3); - TestSelects(N, N / 2 - 1); - TestSelects(N, N / 2); - TestSelects(N, N / 2 + 1); - TestSelects(N, N - 2); - TestSelects(N, N - 1); - } - - static void TestManySelects() { - TestSelects(10); - TestSelects(256); - TestSelects(257); - TestSelects(499); - TestSelects(500); - TestSelects(997); - TestSelects(1000); - TestSelects(1000 * 100); - TestSelects(1009); - TestSelects(1009 * 109); - } - - static void TestCustomComparators() { - std::vector> v(1000); - for (int i = 0; static_cast(i) < v.size(); ++i) { - v[i] = std::make_unique(i); - } - Selector::Select(v.begin(), v.begin() + v.size() / 2, v.end(), - IndirectLess{}); - EXPECT_EQ(*v[v.size() / 2], v.size() / 2); - for (size_t i = 0; i < v.size() / 2; ++i) { - ASSERT_NE(v[i], nullptr); - EXPECT_LE(*v[i], v.size() / 2); - } - for (size_t i = v.size() / 2; i < v.size(); ++i) { - ASSERT_NE(v[i], nullptr); - EXPECT_GE(*v[i], v.size() / 2); - } - } - - static void TestRepeat(size_t N, size_t M) { - ASSERT_NE(N, 0); - ASSERT_GT(N, M); - SCOPED_TRACE(N); - SCOPED_TRACE(M); - std::mt19937_64 mersenne_engine(10); - std::vector array(N); - for (size_t i = 0; i < M; ++i) { - array[i] = false; - } - for (size_t i = M; i < N; ++i) { - array[i] = true; - } - std::shuffle(array.begin(), array.end(), mersenne_engine); - Selector::Select(array.begin(), array.begin() + M, array.end()); - EXPECT_EQ(array[M], true); - for (size_t i = 0; i < M; ++i) { - EXPECT_EQ(array[i], false); - } - for (size_t i = M; i < N; ++i) { - EXPECT_EQ(array[i], true); - } - std::shuffle(array.begin(), array.end(), mersenne_engine); - Selector::Select(array.begin(), array.begin() + M / 2, array.end()); - EXPECT_EQ(array[M / 2], false); - for (size_t i = 0; i < M / 2; ++i) { - EXPECT_EQ(array[i], false); - } - std::shuffle(array.begin(), array.end(), mersenne_engine); - Selector::Select(array.begin(), array.begin() + M - 1, array.end()); - EXPECT_EQ(array[M - 1], false); - for (size_t i = 0; i < M - 1; ++i) { - EXPECT_EQ(array[i], false); - } - } - - static void TestRepeats(size_t N) { - TestRepeat(N, 1); - TestRepeat(N, 2); - TestRepeat(N, 3); - TestRepeat(N, N / 2 - 1); - TestRepeat(N, N / 2); - TestRepeat(N, N / 2 + 1); - TestRepeat(N, N - 2); - TestRepeat(N, N - 1); - } - - static void TestManyRepeats() { - TestRepeats(10); - TestRepeats(100); - TestRepeats(257); - TestRepeats(1000); - TestRepeats(100000); - } -}; - -TYPED_TEST_SUITE(SelectTest, algorithms::All); - -TYPED_TEST(SelectTest, TestSmall) { - std::vector v = {"ab", "aaa", "ab"}; - TypeParam::Select(v.begin(), v.begin() + 1, v.end()); - EXPECT_THAT(v, Eq(std::vector{"aaa", "ab", "ab"})); - v = {"aba"}; - TypeParam::Select(v.begin(), v.begin(), v.end()); - EXPECT_THAT(v, Eq(std::vector{"aba"})); - v.clear(); - TypeParam::Select(v.begin(), v.end(), v.end()); - EXPECT_TRUE(v.empty()); -} - -TYPED_TEST(SelectTest, TestAnotherSmall) { - std::vector v = {"ab", "ab", "aaa"}; - TypeParam::Select(v.begin(), v.begin() + 1, v.end()); - EXPECT_THAT(v, Eq(std::vector{"aaa", "ab", "ab"})); -} - -TYPED_TEST(SelectTest, TestEmptySmall) { - std::vector v = {"", ""}; - TypeParam::Select(v.begin(), v.begin() + 1, v.end()); - EXPECT_THAT(v, Eq(std::vector{"", ""})); -} - -TYPED_TEST(SelectTest, TestBasic) { TestFixture::TestManySelects(); } - -TYPED_TEST(SelectTest, TestComparators) { - TestFixture::TestCustomComparators(); -} - -TYPED_TEST(SelectTest, TestRepeats) { TestFixture::TestManyRepeats(); } - -TYPED_TEST(SelectTest, TestLast) { - std::vector array(100); - for (size_t i = 0; i < 100; ++i) { - array[i] = i; - } - auto array_smaller = array; - std::mt19937_64 mersenne_engine; - std::shuffle(array.begin(), array.end(), mersenne_engine); - auto copy_array = array; - // Should be no effect. - size_t cmp = 0; - TypeParam::Select(array.begin(), array.end(), array.end(), - [&cmp](const auto &lhs, const auto &rhs) { - ++cmp; - return lhs < rhs; - }); - EXPECT_EQ(cmp, 0); - EXPECT_EQ(copy_array, array); -} - -} // namespace -} // namespace miniselect - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/contrib/miniselect/testing/test_sort.cpp b/contrib/miniselect/testing/test_sort.cpp deleted file mode 100644 index 19c6ff036fe..00000000000 --- a/contrib/miniselect/testing/test_sort.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* Copyright Danila Kutenin, 2020-. - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying file LICENSE_1_0.txt or copy at - * https://boost.org/LICENSE_1_0.txt) - */ -#include -#include - -#include -#include -#include -#include -#include - -#include "test_common.h" - -using ::testing::Eq; - -namespace miniselect { -namespace { - -struct IndirectLess { - // Non const comparator with deleted copy. - template - bool operator()(const P &x, const P &y) const { - return *x < *y; - } - IndirectLess(const IndirectLess &) = default; - IndirectLess &operator=(const IndirectLess &) = default; - IndirectLess(IndirectLess &&) = default; - IndirectLess &operator=(IndirectLess &&) = default; -}; - -template -class PartialSortTest : public ::testing::Test { - public: - static void TestSorts(size_t N, size_t M) { - ASSERT_NE(N, 0); - ASSERT_GE(N, M); - SCOPED_TRACE(N); - SCOPED_TRACE(M); - std::vector array(N); - for (size_t i = 0; i < N; ++i) { - array[i] = i; - } - auto array_smaller = array; - std::mt19937_64 mersenne_engine; - std::shuffle(array.begin(), array.end(), mersenne_engine); - Sorter::Sort(array.begin(), array.begin() + M, array.end(), - std::greater()); - for (size_t i = 0; i < M; ++i) { - EXPECT_EQ(array[i], N - i - 1); - } - std::shuffle(array_smaller.begin(), array_smaller.end(), mersenne_engine); - Sorter::Sort(array_smaller.begin(), array_smaller.begin() + M, - array_smaller.end()); - for (size_t i = 0; i < M; ++i) { - EXPECT_EQ(array_smaller[i], i); - } - } - - static void TestSorts(size_t N) { - TestSorts(N, 0); - TestSorts(N, 1); - TestSorts(N, 2); - TestSorts(N, 3); - TestSorts(N, N / 2 - 1); - TestSorts(N, N / 2); - TestSorts(N, N / 2 + 1); - TestSorts(N, N - 2); - TestSorts(N, N - 1); - TestSorts(N, N); - } - - static void TestManySorts() { - TestSorts(10); - TestSorts(256); - TestSorts(257); - TestSorts(499); - TestSorts(500); - TestSorts(997); - TestSorts(1000); - TestSorts(1000 * 100); - TestSorts(1009); - TestSorts(1009 * 109); - } - - static void TestCustomComparators() { - std::vector> v(1000); - for (int i = 0; static_cast(i) < v.size(); ++i) { - v[i] = std::make_unique(i); - } - Sorter::Sort(v.begin(), v.begin() + v.size() / 2, v.end(), IndirectLess{}); - for (int i = 0; static_cast(i) < v.size() / 2; ++i) { - ASSERT_NE(v[i], nullptr); - EXPECT_EQ(*v[i], i); - } - } -}; - -TYPED_TEST_SUITE(PartialSortTest, algorithms::All); - -TYPED_TEST(PartialSortTest, TestSmall) { - std::vector v = {"ab", "aaa", "ab"}; - TypeParam::Sort(v.begin(), v.begin() + 1, v.end()); - EXPECT_THAT(v, Eq(std::vector{"aaa", "ab", "ab"})); - v = {"aba"}; - TypeParam::Sort(v.begin(), v.begin(), v.end()); - EXPECT_THAT(v, Eq(std::vector{"aba"})); - v.clear(); - TypeParam::Sort(v.begin(), v.end(), v.end()); - EXPECT_TRUE(v.empty()); -} - -TYPED_TEST(PartialSortTest, TestAnotherSmall) { - std::vector v = {"ab", "ab", "aaa"}; - TypeParam::Sort(v.begin(), v.begin() + 1, v.end()); - EXPECT_THAT(v, Eq(std::vector{"aaa", "ab", "ab"})); -} - -TYPED_TEST(PartialSortTest, TestEmptySmall) { - std::vector v = {"", ""}; - TypeParam::Sort(v.begin(), v.begin() + 1, v.end()); - EXPECT_THAT(v, Eq(std::vector{"", ""})); -} - -TYPED_TEST(PartialSortTest, TestBasic) { TestFixture::TestManySorts(); } - -TYPED_TEST(PartialSortTest, TestComparators) { - TestFixture::TestCustomComparators(); -} - -// The standard says that the order of other elements is unspecified even if -// nothing should be sorted so it fails for libcxx and PDQ which is Ok. Saving -// this test for a reference. -TYPED_TEST(PartialSortTest, DISABLED_TestEmpty) { - std::vector array(100); - for (size_t i = 0; i < 100; ++i) { - array[i] = i; - } - std::mt19937_64 mersenne_engine; - std::shuffle(array.begin(), array.end(), mersenne_engine); - size_t cmp = 0; - auto copy_array = array; - // Should be no effect. - TypeParam::Sort(array.begin(), array.begin(), array.end(), - [&cmp](const auto &lhs, const auto &rhs) { - ++cmp; - return lhs < rhs; - }); - EXPECT_EQ(cmp, 0); - EXPECT_EQ(copy_array, array); -} - -} // namespace -} // namespace miniselect - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/src/AggregateFunctions/QuantileExact.h b/src/AggregateFunctions/QuantileExact.h index 3f5a0907126..e3d1ae5d8b8 100644 --- a/src/AggregateFunctions/QuantileExact.h +++ b/src/AggregateFunctions/QuantileExact.h @@ -8,6 +8,7 @@ #include #include +#include namespace DB { @@ -87,7 +88,7 @@ struct QuantileExact : QuantileExactBase> { size_t n = level < 1 ? level * array.size() : (array.size() - 1); - std::nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE You can think of the radix-select algorithm. + miniselect::floyd_rivest_partial_select(array.begin(), array.begin() + n, array.end()); /// NOTE You can think of the radix-select algorithm. return array[n]; } @@ -107,7 +108,7 @@ struct QuantileExact : QuantileExactBase> size_t n = level < 1 ? level * array.size() : (array.size() - 1); - std::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); + miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n, array.end()); result[indices[i]] = array[n]; prev_n = n; @@ -144,7 +145,7 @@ struct QuantileExactExclusive : public QuantileExact else if (n < 1) return static_cast(array[0]); - std::nth_element(array.begin(), array.begin() + n - 1, array.end()); + miniselect::floyd_rivest_select(array.begin(), array.begin() + n - 1, array.end()); auto nth_element = std::min_element(array.begin() + n, array.end()); return static_cast(array[n - 1]) + (h - n) * static_cast(*nth_element - array[n - 1]); @@ -173,7 +174,7 @@ struct QuantileExactExclusive : public QuantileExact result[indices[i]] = static_cast(array[0]); else { - std::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); + miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n - 1, array.end()); auto nth_element = std::min_element(array.begin() + n, array.end()); result[indices[i]] = static_cast(array[n - 1]) + (h - n) * static_cast(*nth_element - array[n - 1]); @@ -209,7 +210,7 @@ struct QuantileExactInclusive : public QuantileExact else if (n < 1) return static_cast(array[0]); - std::nth_element(array.begin(), array.begin() + n - 1, array.end()); + miniselect::floyd_rivest_select(array.begin(), array.begin() + n - 1, array.end()); auto nth_element = std::min_element(array.begin() + n, array.end()); return static_cast(array[n - 1]) + (h - n) * static_cast(*nth_element - array[n - 1]); @@ -236,7 +237,7 @@ struct QuantileExactInclusive : public QuantileExact result[indices[i]] = static_cast(array[0]); else { - std::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); + miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n - 1, array.end()); auto nth_element = std::min_element(array.begin() + n, array.end()); result[indices[i]] = static_cast(array[n - 1]) + (h - n) * static_cast(*nth_element - array[n - 1]); diff --git a/src/AggregateFunctions/QuantileTiming.h b/src/AggregateFunctions/QuantileTiming.h index 2ab8c866615..28bcde5c140 100644 --- a/src/AggregateFunctions/QuantileTiming.h +++ b/src/AggregateFunctions/QuantileTiming.h @@ -7,6 +7,7 @@ #include #include +#include namespace DB { @@ -179,7 +180,7 @@ namespace detail /// Sorting an array will not be considered a violation of constancy. auto & array = elems; - std::nth_element(array.begin(), array.begin() + n, array.end()); + miniselect::floyd_rivest_select(array.begin(), array.begin() + n, array.end()); quantile = array[n]; } @@ -200,7 +201,7 @@ namespace detail ? level * elems.size() : (elems.size() - 1); - std::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); + miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n, array.end()); result[level_index] = array[n]; prev_n = n; diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index b27506c1cfb..4285259a4f4 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -163,10 +163,10 @@ void ColumnDecimal::updatePermutation(bool reverse, size_t limit, int, IColum { const auto& [first, last] = equal_ranges[i]; if (reverse) - miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + last, res.begin() + last, + std::sort(res.begin() + first, res.begin() + last, [this](size_t a, size_t b) { return data[a] > data[b]; }); else - miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + last, res.begin() + last, + std::sort(res.begin() + first, res.begin() + last, [this](size_t a, size_t b) { return data[a] < data[b]; }); auto new_first = first;