From a0d023cf54686ae701b133c7f1091f4571a91dd3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 24 Oct 2022 00:51:57 +0000 Subject: [PATCH 0001/1687] add tcp protocol fuzzer --- contrib/CMakeLists.txt | 1 + contrib/libfuzzer-cmake/CMakeLists.txt | 35 ++++++ programs/server/CMakeLists.txt | 2 + programs/server/fuzzers/CMakeLists.txt | 19 +++ .../server/fuzzers/tcp_protocol_fuzzer.cpp | 117 ++++++++++++++++++ 5 files changed, 174 insertions(+) create mode 100644 contrib/libfuzzer-cmake/CMakeLists.txt create mode 100644 programs/server/fuzzers/CMakeLists.txt create mode 100644 programs/server/fuzzers/tcp_protocol_fuzzer.cpp diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index f914c0d2d3f..c2b16ae6dd6 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -114,6 +114,7 @@ if (ENABLE_TESTS) endif() add_contrib (llvm-project-cmake llvm-project) +add_contrib (libfuzzer-cmake llvm-project) add_contrib (libxml2-cmake libxml2) add_contrib (aws-s3-cmake aws diff --git a/contrib/libfuzzer-cmake/CMakeLists.txt b/contrib/libfuzzer-cmake/CMakeLists.txt new file mode 100644 index 00000000000..ff3a91d828e --- /dev/null +++ b/contrib/libfuzzer-cmake/CMakeLists.txt @@ -0,0 +1,35 @@ +set(COMPILER_RT_FUZZER_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/compiler-rt/lib/fuzzer") + +set(FUZZER_SRCS + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerCrossOver.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerDataFlowTrace.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerDriver.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsDlsym.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsWeak.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCounters.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCountersDarwin.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCountersWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerFork.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIO.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIOPosix.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIOWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerLoop.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMerge.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMutate.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerSHA1.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerTracePC.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtil.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilDarwin.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilFuchsia.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilLinux.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilPosix.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilWindows.cpp" +) + +add_library(_fuzzer_no_main STATIC ${FUZZER_SRCS}) +add_library(ch_contrib::fuzzer_no_main ALIAS _fuzzer_no_main) + +add_library(_fuzzer STATIC ${FUZZER_SRCS} "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMain.cpp") +add_library(ch_contrib::fuzzer ALIAS _fuzzer) + diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 2cfa748d585..421918eb922 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -37,3 +37,5 @@ clickhouse_embed_binaries( RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js ) add_dependencies(clickhouse-server-lib clickhouse_server_configs) + +add_subdirectory(fuzzers) diff --git a/programs/server/fuzzers/CMakeLists.txt b/programs/server/fuzzers/CMakeLists.txt new file mode 100644 index 00000000000..b8f57c37e31 --- /dev/null +++ b/programs/server/fuzzers/CMakeLists.txt @@ -0,0 +1,19 @@ +clickhouse_add_executable(tcp_protocol_fuzzer tcp_protocol_fuzzer.cpp ../Server.cpp ../MetricsTransmitter.cpp) + +set (TCP_PROTOCOL_FUZZER_LINK + PRIVATE + daemon + clickhouse_aggregate_functions + clickhouse_functions + clickhouse_table_functions + ch_contrib::fuzzer + "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}" +) + +if (TARGET ch_contrib::jemalloc) + list(APPEND TCP_PROTOCOL_FUZZER_LINK PRIVATE ch_contrib::jemalloc) +endif() + +target_link_libraries(tcp_protocol_fuzzer ${TCP_PROTOCOL_FUZZER_LINK}) + +add_dependencies(tcp_protocol_fuzzer clickhouse_server_configs) diff --git a/programs/server/fuzzers/tcp_protocol_fuzzer.cpp b/programs/server/fuzzers/tcp_protocol_fuzzer.cpp new file mode 100644 index 00000000000..2d1023ba804 --- /dev/null +++ b/programs/server/fuzzers/tcp_protocol_fuzzer.cpp @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + + +int mainEntryClickHouseServer(int argc, char ** argv); + +static std::string clickhouse("clickhouse-server"); +static std::vector args{clickhouse.data()}; +static std::future main_app; + +static char * host = nullptr; +static int port = 0; + +using namespace std::chrono_literals; + +extern "C" +int LLVMFuzzerInitialize(int * argc, char ***argv) +{ + for (int i = 1; i < *argc; ++i) + { + if ((*argv)[i][0] == '-') + { + if ((*argv)[i][1] == '-') + args.push_back((*argv)[i]); + else + { + if (strncmp((*argv)[i], "-host=", 6) == 0) + { + host = (*argv)[i] + 6; + } + else if (strncmp((*argv)[i], "-port=", 6) == 0) + { + char * p_end = nullptr; + port = strtol((*argv)[i] + 6, &p_end, 10); + } + } + } + } + + args.push_back(nullptr); + + main_app = std::async(std::launch::async, mainEntryClickHouseServer, args.size() - 1, args.data()); + + while (!DB::Context::getGlobalContextInstance() || !DB::Context::getGlobalContextInstance()->isServerCompletelyStarted()) + { + std::this_thread::sleep_for(100ms); + if (main_app.wait_for(0s) == std::future_status::ready) + exit(-1); + } + + return 0; +} + +extern "C" +int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +{ + if (main_app.wait_for(0s) == std::future_status::ready) + exit(-1); + + if (size == 0) + return -1; + + try + { + Poco::Net::SocketAddress address(host, port); + Poco::Net::StreamSocket socket; + + socket.connectNB(address); + + Poco::Net::PollSet ps; + ps.add(socket, Poco::Net::PollSet::POLL_READ | Poco::Net::PollSet::POLL_WRITE); + + std::vector buf(1048576); + size_t sent = 0; + while (true) + { + auto m = ps.poll(Poco::Timespan(1000000)); + if (m.empty()) + continue; + if (m.begin()->second & Poco::Net::PollSet::POLL_READ) + { + if (int n = socket.receiveBytes(buf.data(), buf.size()); n == 0) + { + socket.close(); + break; + } + + continue; + } + + if (sent < size && m.begin()->second & Poco::Net::PollSet::POLL_WRITE) + { + sent += socket.sendBytes(data + sent, size - sent); + if (sent == size) + { + socket.shutdownSend(); + continue; + } + } + } + } + catch (const Poco::Exception &) + { + } + + return 0; +} From 7a1dc0fd4d371b04655ecb6d4d5a42b2abf85c05 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 24 Oct 2022 03:43:14 +0000 Subject: [PATCH 0002/1687] ENABLE_FUZZING and some minor fixes --- programs/server/CMakeLists.txt | 4 +++- programs/server/fuzzers/tcp_protocol_fuzzer.cpp | 12 +++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 421918eb922..d43ae0c48df 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -38,4 +38,6 @@ clickhouse_embed_binaries( ) add_dependencies(clickhouse-server-lib clickhouse_server_configs) -add_subdirectory(fuzzers) +if (ENABLE_FUZZING) + add_subdirectory(fuzzers) +endif() diff --git a/programs/server/fuzzers/tcp_protocol_fuzzer.cpp b/programs/server/fuzzers/tcp_protocol_fuzzer.cpp index 2d1023ba804..aa3730b3e2f 100644 --- a/programs/server/fuzzers/tcp_protocol_fuzzer.cpp +++ b/programs/server/fuzzers/tcp_protocol_fuzzer.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -18,12 +19,13 @@ static std::string clickhouse("clickhouse-server"); static std::vector args{clickhouse.data()}; static std::future main_app; -static char * host = nullptr; -static int port = 0; +static std::string s_host("0.0.0.0"); +static char * host = s_host.data(); +static int64_t port = 9000; using namespace std::chrono_literals; -extern "C" +extern "C" int LLVMFuzzerInitialize(int * argc, char ***argv) { for (int i = 1; i < *argc; ++i) @@ -89,7 +91,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) continue; if (m.begin()->second & Poco::Net::PollSet::POLL_READ) { - if (int n = socket.receiveBytes(buf.data(), buf.size()); n == 0) + if (int n = socket.receiveBytes(buf.data(), static_cast(buf.size())); n == 0) { socket.close(); break; @@ -100,7 +102,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) if (sent < size && m.begin()->second & Poco::Net::PollSet::POLL_WRITE) { - sent += socket.sendBytes(data + sent, size - sent); + sent += socket.sendBytes(data + sent, static_cast(size - sent)); if (sent == size) { socket.shutdownSend(); From 21fec05242df9ee8342a1620b0b045ae2fc4780b Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Sun, 30 Oct 2022 17:36:46 +0100 Subject: [PATCH 0003/1687] Add a step to build fuzzers --- .github/workflows/pull_request.yml | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 2795dc62d6d..c9c649cc797 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3491,6 +3491,37 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" ############################################################################################# +#################################### OSS FUZZ ############################################### +############################################################################################# + Fuzzing: + if: contains(github.event.pull_request.labels.*.name, 'build-fuzzers') + runs-on: [self-hosted, fuzzer-unit-tester] + strategy: + fail-fast: false + matrix: + sanitizer: [address, undefined, memory] + steps: + - name: Build Fuzzers (${{ matrix.sanitizer }}) + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: 'clickhouse' + language: c++ + sanitizer: ${{ matrix.sanitizer }} + - name: Run Fuzzers (${{ matrix.sanitizer }}) + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: 'clickhouse' + language: c++ + fuzz-seconds: 600 + sanitizer: ${{ matrix.sanitizer }} + - name: Upload Crash + uses: actions/upload-artifact@v1 + if: failure() && steps.build.outcome == 'success' + with: + name: ${{ matrix.sanitizer }}-artifacts + path: ./out/artifacts +############################################################################################# ###################################### JEPSEN TESTS ######################################### ############################################################################################# Jepsen: From 2fbff660d7f606b4dc842e9a3f5f7a73d9686630 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Mon, 31 Oct 2022 09:45:38 -0400 Subject: [PATCH 0004/1687] fix indentation --- .github/workflows/pull_request.yml | 40 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c9c649cc797..15d1f508e1b 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3501,26 +3501,26 @@ jobs: matrix: sanitizer: [address, undefined, memory] steps: - - name: Build Fuzzers (${{ matrix.sanitizer }}) - id: build - uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master - with: - oss-fuzz-project-name: 'clickhouse' - language: c++ - sanitizer: ${{ matrix.sanitizer }} - - name: Run Fuzzers (${{ matrix.sanitizer }}) - uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master - with: - oss-fuzz-project-name: 'clickhouse' - language: c++ - fuzz-seconds: 600 - sanitizer: ${{ matrix.sanitizer }} - - name: Upload Crash - uses: actions/upload-artifact@v1 - if: failure() && steps.build.outcome == 'success' - with: - name: ${{ matrix.sanitizer }}-artifacts - path: ./out/artifacts + - name: Build Fuzzers (${{ matrix.sanitizer }}) + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: 'clickhouse' + language: c++ + sanitizer: ${{ matrix.sanitizer }} + - name: Run Fuzzers (${{ matrix.sanitizer }}) + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: 'clickhouse' + language: c++ + fuzz-seconds: 600 + sanitizer: ${{ matrix.sanitizer }} + - name: Upload Crash + uses: actions/upload-artifact@v1 + if: failure() && steps.build.outcome == 'success' + with: + name: ${{ matrix.sanitizer }}-artifacts + path: ./out/artifacts ############################################################################################# ###################################### JEPSEN TESTS ######################################### ############################################################################################# From 0821358c04d70b9171e4232a9352db22ac126806 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Tue, 1 Nov 2022 23:30:40 +0100 Subject: [PATCH 0005/1687] Update pull_request.yml --- .github/workflows/pull_request.yml | 78 +++++++++++++++--------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 15d1f508e1b..1129ee20d1f 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -242,9 +242,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -######################################################################################### -#################################### ORDINARY BUILDS #################################### -######################################################################################### + ######################################################################################### + #################################### ORDINARY BUILDS #################################### + ######################################################################################### BuilderDebRelease: needs: [DockerHubPush, FastTest, StyleCheck] runs-on: [self-hosted, builder] @@ -593,9 +593,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" -########################################################################################## -##################################### SPECIAL BUILDS ##################################### -########################################################################################## + ########################################################################################## + ##################################### SPECIAL BUILDS ##################################### + ########################################################################################## BuilderDebShared: needs: [DockerHubPush, FastTest, StyleCheck] runs-on: [self-hosted, builder] @@ -983,9 +983,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" -############################################################################################ -##################################### Docker images ####################################### -############################################################################################ + ############################################################################################ + ##################################### Docker images ####################################### + ############################################################################################ DockerServerImages: needs: - BuilderDebRelease @@ -1011,9 +1011,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################ -##################################### BUILD REPORTER ####################################### -############################################################################################ + ############################################################################################ + ##################################### BUILD REPORTER ####################################### + ############################################################################################ BuilderReport: needs: - BuilderBinRelease @@ -1106,9 +1106,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################## -########################### FUNCTIONAl STATELESS TESTS ####################################### -############################################################################################## + ############################################################################################## + ########################### FUNCTIONAl STATELESS TESTS ####################################### + ############################################################################################## FunctionalStatelessTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -2067,9 +2067,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################## -############################ FUNCTIONAl STATEFUL TESTS ####################################### -############################################################################################## + ############################################################################################## + ############################ FUNCTIONAl STATEFUL TESTS ####################################### + ############################################################################################## FunctionalStatefulTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -2315,9 +2315,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################## -######################################### STRESS TESTS ####################################### -############################################################################################## + ############################################################################################## + ######################################### STRESS TESTS ####################################### + ############################################################################################## StressTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -2492,9 +2492,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################## -##################################### AST FUZZERS ############################################ -############################################################################################## + ############################################################################################## + ##################################### AST FUZZERS ############################################ + ############################################################################################## ASTFuzzerTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2665,9 +2665,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################# -############################# INTEGRATION TESTS ############################################# -############################################################################################# + ############################################################################################# + ############################# INTEGRATION TESTS ############################################# + ############################################################################################# IntegrationTestsAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -3026,9 +3026,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################# -#################################### UNIT TESTS ############################################# -############################################################################################# + ############################################################################################# + #################################### UNIT TESTS ############################################# + ############################################################################################# UnitTestsAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -3199,9 +3199,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################# -#################################### PERFORMANCE TESTS ###################################### -############################################################################################# + ############################################################################################# + #################################### PERFORMANCE TESTS ###################################### + ############################################################################################# PerformanceComparisonX86-0: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] @@ -3490,9 +3490,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################# -#################################### OSS FUZZ ############################################### -############################################################################################# + ############################################################################################# + #################################### OSS FUZZ ############################################### + ############################################################################################# Fuzzing: if: contains(github.event.pull_request.labels.*.name, 'build-fuzzers') runs-on: [self-hosted, fuzzer-unit-tester] @@ -3521,9 +3521,9 @@ jobs: with: name: ${{ matrix.sanitizer }}-artifacts path: ./out/artifacts -############################################################################################# -###################################### JEPSEN TESTS ######################################### -############################################################################################# + ############################################################################################# + ###################################### JEPSEN TESTS ######################################### + ############################################################################################# Jepsen: # This is special test NOT INCLUDED in FinishCheck # When it's skipped, all dependent tasks will be skipped too. From 2b90141c42574cedf48aa285b24e7d53b142ede8 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 28 Feb 2023 13:17:31 +0000 Subject: [PATCH 0006/1687] Integrate DiskObjectStorage with IO Scheduler --- src/Common/CurrentThread.h | 8 ++++ src/Disks/DiskLocal.cpp | 4 +- .../registerDiskAzureBlobStorage.cpp | 4 +- .../ObjectStorages/DiskObjectStorage.cpp | 47 +++++++++++++++++-- src/Disks/ObjectStorages/DiskObjectStorage.h | 11 ++++- .../ObjectStorages/HDFS/registerDiskHDFS.cpp | 5 +- .../ObjectStorages/S3/registerDiskS3.cpp | 4 +- .../Web/registerDiskWebServer.cpp | 4 +- src/IO/IResourceManager.h | 2 +- src/Interpreters/Context.cpp | 4 +- src/Interpreters/Context.h | 4 ++ 11 files changed, 85 insertions(+), 12 deletions(-) diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h index f36b92e319d..1571f52558c 100644 --- a/src/Common/CurrentThread.h +++ b/src/Common/CurrentThread.h @@ -93,6 +93,14 @@ public: return current_thread->getQueryId(); } + /// Returns attached query context + static ContextPtr getQueryContext() + { + if (isInitialized()) + return get().getQueryContext(); + return {}; + } + /// Non-master threads call this method in destructor automatically static void detachQuery(); static void detachQueryIfNotDetached(); diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index a3b7e413014..088e44357b3 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -613,7 +613,9 @@ DiskObjectStoragePtr DiskLocal::createDiskObjectStorage() metadata_storage, object_storage, false, - /* threadpool_size */16 + /* threadpool_size */ 16, + /* read_resource_name */ "", + /* write_resource_name */ "" ); } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 562b2b2fec0..e0c0a525d49 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -43,7 +43,9 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access std::move(metadata_storage), std::move(azure_object_storage), send_metadata, - copy_thread_pool_size + copy_thread_pool_size, + config.getString(config_prefix + ".read_resource", ""), + config.getString(config_prefix + ".write_resource", "") ); bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index d55b1c91c07..825ed03b1ba 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -109,7 +110,9 @@ DiskObjectStorage::DiskObjectStorage( MetadataStoragePtr metadata_storage_, ObjectStoragePtr object_storage_, bool send_metadata_, - uint64_t thread_pool_size_) + uint64_t thread_pool_size_, + const String & read_resource_name_, + const String & write_resource_name_) : IDisk(name_, getAsyncExecutor(log_name, thread_pool_size_)) , object_storage_root_path(object_storage_root_path_) , log (&Poco::Logger::get("DiskObjectStorage(" + log_name + ")")) @@ -117,6 +120,8 @@ DiskObjectStorage::DiskObjectStorage( , object_storage(std::move(object_storage_)) , send_metadata(send_metadata_) , threadpool_size(thread_pool_size_) + , read_resource_name(read_resource_name_) + , write_resource_name(write_resource_name_) , metadata_helper(std::make_unique(this, ReadSettings{})) {} @@ -517,7 +522,9 @@ DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage() metadata_storage, object_storage, send_metadata, - threadpool_size); + threadpool_size, + getReadResourceName(), + getWriteResourceName()); } void DiskObjectStorage::wrapWithCache(FileCachePtr cache, const FileCacheSettings & cache_settings, const String & layer_name) @@ -546,6 +553,32 @@ NameSet DiskObjectStorage::getCacheLayersNames() const return cache_layers; } +template +static inline Settings updateResourceLink(const Settings & settings, const String & resource_name) +{ + if (resource_name.empty()) + return settings; + if (auto query_context = CurrentThread::getQueryContext()) + { + Settings result(settings); + result.resource_link = query_context->getClassifier()->get(resource_name); + return result; + } + return settings; +} + +String DiskObjectStorage::getReadResourceName() const +{ + std::unique_lock lock(resource_mutex); + return read_resource_name; +} + +String DiskObjectStorage::getWriteResourceName() const +{ + std::unique_lock lock(resource_mutex); + return write_resource_name; +} + std::unique_ptr DiskObjectStorage::readFile( const String & path, const ReadSettings & settings, @@ -554,7 +587,7 @@ std::unique_ptr DiskObjectStorage::readFile( { return object_storage->readObjects( metadata_storage->getStorageObjects(path), - object_storage->getAdjustedSettingsFromMetadataFile(settings, path), + object_storage->getAdjustedSettingsFromMetadataFile(updateResourceLink(settings, getReadResourceName()), path), read_hint, file_size); } @@ -572,7 +605,7 @@ std::unique_ptr DiskObjectStorage::writeFile( path, buf_size, mode, - object_storage->getAdjustedSettingsFromMetadataFile(settings, path)); + object_storage->getAdjustedSettingsFromMetadataFile(updateResourceLink(settings, getWriteResourceName()), path)); return result; } @@ -585,6 +618,12 @@ void DiskObjectStorage::applyNewSettings( if (AsyncThreadPoolExecutor * exec = dynamic_cast(&getExecutor())) exec->setMaxThreads(config.getInt(config_prefix + ".thread_pool_size", 16)); + + std::unique_lock lock(resource_mutex); + if (String new_read_resource_name = config.getString(config_prefix + ".read_resource", ""); new_read_resource_name != read_resource_name) + read_resource_name = new_read_resource_name; + if (String new_write_resource_name = config.getString(config_prefix + ".write_resource", ""); new_write_resource_name != write_resource_name) + write_resource_name = new_write_resource_name; } void DiskObjectStorage::restoreMetadataIfNeeded( diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index a24acc270c0..f5b9fdf54d3 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -34,7 +34,9 @@ public: MetadataStoragePtr metadata_storage_, ObjectStoragePtr object_storage_, bool send_metadata_, - uint64_t thread_pool_size_); + uint64_t thread_pool_size_, + const String & read_resource_name_, + const String & write_resource_name_); /// Create fake transaction DiskTransactionPtr createTransaction() override; @@ -211,6 +213,9 @@ private: /// execution. DiskTransactionPtr createObjectStorageTransaction(); + String getReadResourceName() const; + String getWriteResourceName() const; + const String object_storage_root_path; Poco::Logger * log; @@ -226,6 +231,10 @@ private: const bool send_metadata; size_t threadpool_size; + mutable std::mutex resource_mutex; + String read_resource_name; + String write_resource_name; + std::unique_ptr metadata_helper; }; diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index 693b966caf2..b7f703b278d 100644 --- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -54,7 +54,10 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check) std::move(metadata_storage), std::move(hdfs_storage), /* send_metadata = */ false, - copy_thread_pool_size); + copy_thread_pool_size, + config.getString(config_prefix + ".read_resource", ""), + config.getString(config_prefix + ".write_resource", "") + ); disk->startup(context, skip_access_check); return disk; diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index 1c192a0d89c..bce1e640a7c 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -163,7 +163,9 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) std::move(metadata_storage), std::move(s3_storage), send_metadata, - copy_thread_pool_size); + copy_thread_pool_size, + config.getString(config_prefix + ".read_resource", ""), + config.getString(config_prefix + ".write_resource", "")); s3disk->startup(context, skip_access_check); diff --git a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp index 8a54de81815..19005727fa7 100644 --- a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp +++ b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp @@ -53,7 +53,9 @@ void registerDiskWebServer(DiskFactory & factory, bool global_skip_access_check) metadata_storage, object_storage, /* send_metadata */false, - /* threadpool_size */16); + /* threadpool_size */16, + config.getString(config_prefix + ".read_resource", ""), + config.getString(config_prefix + ".write_resource", "")); disk->startup(context, skip_access_check); return disk; }; diff --git a/src/IO/IResourceManager.h b/src/IO/IResourceManager.h index f084a903cb1..ff372698ed5 100644 --- a/src/IO/IResourceManager.h +++ b/src/IO/IResourceManager.h @@ -23,7 +23,7 @@ class IClassifier : private boost::noncopyable public: virtual ~IClassifier() {} - /// Returns ResouceLink that should be used to access resource. + /// Returns ResourceLink that should be used to access resource. /// Returned link is valid until classifier destruction. virtual ResourceLink get(const String & resource_name) = 0; }; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 33505e41789..72a472fcb4a 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1165,7 +1165,9 @@ ResourceManagerPtr Context::getResourceManager() const ClassifierPtr Context::getClassifier() const { auto lock = getLock(); - return getResourceManager()->acquire(getSettingsRef().workload); + if (!classifier) + classifier = getResourceManager()->acquire(getSettingsRef().workload); + return classifier; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 19bb6868331..9ea8d253439 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -402,6 +402,10 @@ private: /// Temporary data for query execution accounting. TemporaryDataOnDiskScopePtr temp_data_on_disk; + /// Resource classifier for a query, holds smart pointers required for ResourceLink + /// NOTE: all resource links became invalid after `classifier` destruction + mutable ClassifierPtr classifier; + public: /// Some counters for current query execution. /// Most of them are workarounds and should be removed in the future. From 21b6ccc677ba040acc62fc0c63cadbe53634ce3f Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 2 Mar 2023 15:35:28 +0000 Subject: [PATCH 0007/1687] fix comment --- src/IO/ResourceRequest.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/IO/ResourceRequest.h b/src/IO/ResourceRequest.h index 989349148cf..3d2230746f9 100644 --- a/src/IO/ResourceRequest.h +++ b/src/IO/ResourceRequest.h @@ -45,8 +45,7 @@ class ResourceRequest { public: /// Cost of request execution; should be filled before request enqueueing. - /// NOTE: If cost is not known in advance, credit model can be used: - /// NOTE: for the first request use 1 and + /// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it) ResourceCost cost; /// Request outcome From 51ea0888e58edfdf0bfe5d233ff2222da21baf44 Mon Sep 17 00:00:00 2001 From: cangyin Date: Fri, 17 Mar 2023 10:00:50 +0800 Subject: [PATCH 0008/1687] fix invalid segment id bug after mutation skip hardlinking inverted index files in mutation --- src/Storages/MergeTree/MutateTask.cpp | 10 ++++++++ .../02346_inverted_index_mutation.reference | 3 +++ .../02346_inverted_index_mutation.sql | 23 +++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 tests/queries/0_stateless/02346_inverted_index_mutation.reference create mode 100644 tests/queries/0_stateless/02346_inverted_index_mutation.sql diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 9f7a12745c6..30fa50e325f 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -551,6 +552,15 @@ static NameSet collectFilesToSkip( /// Since MinMax index has .idx2 extension, we need to add correct extension. files_to_skip.insert(index->getFileName() + index->getSerializedFileExtension()); files_to_skip.insert(index->getFileName() + mrk_extension); + + // skip all inverted index files, for they will be re-built + if (dynamic_cast(&*index) != nullptr) + { + files_to_skip.insert(index->getFileName() + ".gin_dict"); + files_to_skip.insert(index->getFileName() + ".gin_post"); + files_to_skip.insert(index->getFileName() + ".gin_sed"); + files_to_skip.insert(index->getFileName() + ".gin_sid"); + } } for (const auto & projection : projections_to_recalc) diff --git a/tests/queries/0_stateless/02346_inverted_index_mutation.reference b/tests/queries/0_stateless/02346_inverted_index_mutation.reference new file mode 100644 index 00000000000..b80f66e4c05 --- /dev/null +++ b/tests/queries/0_stateless/02346_inverted_index_mutation.reference @@ -0,0 +1,3 @@ +1 +2 +I am not inverted diff --git a/tests/queries/0_stateless/02346_inverted_index_mutation.sql b/tests/queries/0_stateless/02346_inverted_index_mutation.sql new file mode 100644 index 00000000000..0987b4aa43c --- /dev/null +++ b/tests/queries/0_stateless/02346_inverted_index_mutation.sql @@ -0,0 +1,23 @@ +SET allow_experimental_inverted_index=1; + +DROP TABLE IF EXISTS t; +CREATE TABLE t +( + `timestamp` UInt64, + `s` String, + INDEX idx s TYPE inverted(3) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1; + +INSERT INTO t (s) VALUES ('I am inverted'); + +SELECT data_version FROM system.parts WHERE database=currentDatabase() AND table='t' AND active=1; + +-- do update column synchronously +ALTER TABLE t UPDATE s='I am not inverted' WHERE 1 SETTINGS mutations_sync=1; + +SELECT data_version FROM system.parts WHERE database=currentDatabase() AND table='t' AND active=1; + +SELECT s FROM t WHERE s LIKE '%inverted%' SETTINGS force_data_skipping_indices='idx'; From af66d44106a40bd48d5320098de416fa40fe6455 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Mon, 10 Apr 2023 13:51:39 +0000 Subject: [PATCH 0009/1687] Add settings for real-time updates during query execution --- src/Client/ClientBase.cpp | 32 +++++++++++++- src/Client/ClientBase.h | 6 +++ src/Core/BlockInfo.h | 8 +++- src/Core/Settings.h | 3 ++ src/Processors/Chunk.cpp | 17 ++++---- src/Processors/Chunk.h | 18 ++++++-- .../PullingAsyncPipelineExecutor.cpp | 1 + src/Processors/Formats/IOutputFormat.cpp | 20 ++++++++- src/Processors/Formats/IOutputFormat.h | 2 + .../Formats/Impl/PrettyBlockOutputFormat.cpp | 6 ++- .../Impl/PrettyCompactBlockOutputFormat.cpp | 3 +- src/Processors/LimitTransform.cpp | 5 +++ src/Processors/QueryPlan/SortingStep.cpp | 6 ++- src/Processors/QueryPlan/SortingStep.h | 2 + .../Transforms/LimitsCheckingTransform.cpp | 2 +- .../Transforms/MergeSortingTransform.cpp | 42 ++++++++++++++++++- .../Transforms/MergeSortingTransform.h | 11 ++++- 17 files changed, 162 insertions(+), 22 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 6a2961d69da..aa413c4d749 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -440,7 +440,9 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) if (!block) return; - processed_rows += block.rows(); + if (!block.info.has_partial_result) + processed_rows += block.rows(); + /// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset. initOutputFormat(block, parsed_query); @@ -450,14 +452,35 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100)) return; + if (received_first_full_result && block.info.has_partial_result) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Server shouldn't send partial results after the first block with a non-empty full result"); + + if (!is_interactive && block.info.has_partial_result) + return; + + bool first_full_result = false; + if (!received_first_full_result && !block.info.has_partial_result) + { + received_first_full_result = true; + first_full_result = true; + } + /// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker. if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout)) progress_indication.clearProgressOutput(*tty_buf); + if (is_interactive && first_full_result && has_partial_result_setting) + std::cout << "Full result:" << std::endl; + try { + /// Clear previous partial results to write new partial results if needed + if (!received_first_full_result && written_first_block) + output_format->clearLastLines(prev_block_rows + 2); + output_format->write(materializeBlock(block)); written_first_block = true; + prev_block_rows = block.rows(); } catch (const Exception &) { @@ -865,6 +888,10 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa const auto & settings = global_context->getSettingsRef(); const Int32 signals_before_stop = settings.stop_reading_on_first_cancel ? 2 : 1; + has_partial_result_setting = settings.partial_result_update_duration_ms.totalMilliseconds() > 0; + + if (is_interactive && has_partial_result_setting) + std::cout << "Partial result:" << std::endl; int retries_left = 10; while (retries_left) @@ -1614,6 +1641,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin } processed_rows = 0; + prev_block_rows = 0; + has_partial_result_setting = false; + received_first_full_result = false; written_first_block = false; progress_indication.resetProgress(); profile_events.watch.restart(); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index faf3fa8653a..1a785cc390f 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -259,6 +259,12 @@ protected: size_t processed_rows = 0; /// How many rows have been read or written. bool print_num_processed_rows = false; /// Whether to print the number of processed rows at + /// If `received_first_full_result` set to true then + /// client has already received all updates with a partial result + bool received_first_full_result = false; + bool has_partial_result_setting = false; + size_t prev_block_rows = 0; /// How many rows were in the previously received block of data + bool print_stack_trace = false; /// The last exception that was received from the server. Is used for the /// return code in batch mode. diff --git a/src/Core/BlockInfo.h b/src/Core/BlockInfo.h index d431303ca39..a0357b292cd 100644 --- a/src/Core/BlockInfo.h +++ b/src/Core/BlockInfo.h @@ -28,9 +28,15 @@ struct BlockInfo * Otherwise -1. */ + /** has_partial_result: + * If user wants to receive updates containing partial results during query execution, + * then data will be sent with has_partial_result flag set to true. + */ + #define APPLY_FOR_BLOCK_INFO_FIELDS(M) \ M(bool, is_overflows, false, 1) \ - M(Int32, bucket_num, -1, 2) + M(Int32, bucket_num, -1, 2) \ + M(bool, has_partial_result, false, 3) #define DECLARE_FIELD(TYPE, NAME, DEFAULT, FIELD_NUM) \ TYPE NAME = DEFAULT; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index eaed800ae88..5099803c60c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -281,6 +281,9 @@ class IColumn; M(Bool, final, false, "Query with the FINAL modifier by default. If the engine does not support final, it does not have any effect. On queries with multiple tables final is applied only on those that support it. It also works on distributed tables", 0) \ \ M(Bool, stop_reading_on_first_cancel, false, "Allows query to return a partial result after cancel.", 0) \ + \ + M(Milliseconds, partial_result_update_duration_ms, 0, "Duration of time in milliseconds between real-time updates of result table sent to the client during query execution.", 0) \ + M(UInt64, max_rows_in_partial_result, 10, "Max rows displayed to user after each real-time update of output table during query execution.", 0) \ /** Settings for testing hedged requests */ \ M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \ M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \ diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index bbfa1683cf6..040beceeb7d 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -12,13 +12,14 @@ namespace ErrorCodes extern const int POSITION_OUT_OF_BOUND; } -Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns_)), num_rows(num_rows_) +Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_, bool has_partial_result_) + : columns(std::move(columns_)), num_rows(num_rows_), has_partial_result(has_partial_result_) { checkNumRowsIsConsistent(); } -Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(std::move(columns_)), num_rows(num_rows_), chunk_info(std::move(chunk_info_)) +Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_, bool has_partial_result_) + : columns(std::move(columns_)), num_rows(num_rows_), chunk_info(std::move(chunk_info_)), has_partial_result(has_partial_result_) { checkNumRowsIsConsistent(); } @@ -33,21 +34,21 @@ static Columns unmuteColumns(MutableColumns && mut_columns) return columns; } -Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) - : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_) +Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, bool has_partial_result_) + : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), has_partial_result(has_partial_result_) { checkNumRowsIsConsistent(); } -Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), chunk_info(std::move(chunk_info_)) +Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_, bool has_partial_result_) + : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), chunk_info(std::move(chunk_info_)), has_partial_result(has_partial_result_) { checkNumRowsIsConsistent(); } Chunk Chunk::clone() const { - return Chunk(getColumns(), getNumRows(), chunk_info); + return Chunk(getColumns(), getNumRows(), chunk_info, has_partial_result); } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 15d91431b68..f9cc4a4e087 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -38,14 +38,16 @@ public: : columns(std::move(other.columns)) , num_rows(other.num_rows) , chunk_info(std::move(other.chunk_info)) + , has_partial_result(other.has_partial_result) { other.num_rows = 0; + other.has_partial_result = false; } - Chunk(Columns columns_, UInt64 num_rows_); - Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); - Chunk(MutableColumns columns_, UInt64 num_rows_); - Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); + Chunk(Columns columns_, UInt64 num_rows_, bool has_partial_result_ = false); + Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_, bool has_partial_result_ = false); + Chunk(MutableColumns columns_, UInt64 num_rows_, bool has_partial_result_ = false); + Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_, bool has_partial_result_ = false); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept @@ -54,6 +56,8 @@ public: chunk_info = std::move(other.chunk_info); num_rows = other.num_rows; other.num_rows = 0; + has_partial_result = other.has_partial_result; + other.has_partial_result = false; return *this; } @@ -64,6 +68,7 @@ public: columns.swap(other.columns); chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); + std::swap(has_partial_result, other.has_partial_result); } void clear() @@ -71,6 +76,7 @@ public: num_rows = 0; columns.clear(); chunk_info.reset(); + has_partial_result = false; } const Columns & getColumns() const { return columns; } @@ -103,10 +109,14 @@ public: void append(const Chunk & chunk); + void changePartialResultStatus(bool has_partial_result_) { has_partial_result = has_partial_result_; } + bool hasPartialResult() const { return has_partial_result; } + private: Columns columns; UInt64 num_rows = 0; ChunkInfoPtr chunk_info; + bool has_partial_result = false; void checkNumRowsIsConsistent(); }; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index abe0cd6320b..ad068a01636 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -160,6 +160,7 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) } block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); + block.info.has_partial_result = chunk.hasPartialResult(); if (auto chunk_info = chunk.getChunkInfo()) { diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index 88a6fb1e92f..cd292d7795e 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -113,7 +113,7 @@ void IOutputFormat::flush() void IOutputFormat::write(const Block & block) { writePrefixIfNeeded(); - consume(Chunk(block.getColumns(), block.rows())); + consume(Chunk(block.getColumns(), block.rows(), block.info.has_partial_result)); if (auto_flush) flush(); @@ -130,4 +130,22 @@ void IOutputFormat::finalize() finalized = true; } +void IOutputFormat::clearLastLines(size_t lines_number) +{ + /// http://en.wikipedia.org/wiki/ANSI_escape_code + #define MOVE_TO_PREV_LINE "\033[A" + #define CLEAR_TO_END_OF_LINE "\033[K" + + static const char * clear_prev_line = MOVE_TO_PREV_LINE \ + CLEAR_TO_END_OF_LINE; + + /// Move cursor to the beginning of line + writeCString("\r", out); + + for (size_t line = 0; line < lines_number; ++line) + { + writeCString(clear_prev_line, out); + } +} + } diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 02e91d5b28b..80403c49912 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -102,6 +102,8 @@ public: } } + void clearLastLines(size_t lines_number); + protected: friend class ParallelFormattingOutputFormat; diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index cefe3ee4a98..208b851b7df 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -140,7 +140,8 @@ void PrettyBlockOutputFormat::write(Chunk chunk, PortKind port_kind) { if (total_rows >= format_settings.pretty.max_rows) { - total_rows += chunk.getNumRows(); + if (!chunk.hasPartialResult()) + total_rows += chunk.getNumRows(); return; } if (mono_block) @@ -321,7 +322,8 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind } writeString(bottom_separator_s, out); - total_rows += num_rows; + if (!chunk.hasPartialResult()) + total_rows += num_rows; } diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index 2ba9ec725e2..bf6e4cb21f1 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -194,7 +194,8 @@ void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind po writeBottom(max_widths); - total_rows += num_rows; + if (!chunk.hasPartialResult()) + total_rows += num_rows; } diff --git a/src/Processors/LimitTransform.cpp b/src/Processors/LimitTransform.cpp index 2feee7e65b1..a6a0e8ab18a 100644 --- a/src/Processors/LimitTransform.cpp +++ b/src/Processors/LimitTransform.cpp @@ -180,6 +180,11 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data) return Status::NeedData; data.current_chunk = input.pull(true); + if (data.current_chunk.hasPartialResult()) + { + output.push(std::move(data.current_chunk)); + return Status::PortFull; + } auto rows = data.current_chunk.getNumRows(); diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 0ab8e091e05..2643c315785 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -33,6 +33,8 @@ SortingStep::Settings::Settings(const Context & context) max_bytes_before_external_sort = settings.max_bytes_before_external_sort; tmp_data = context.getTempDataOnDisk(); min_free_disk_space = settings.min_free_disk_space_for_temporary_data; + partial_result_limit = settings.max_rows_in_partial_result; + partial_result_duration_ms = settings.partial_result_update_duration_ms.totalMilliseconds(); } SortingStep::Settings::Settings(size_t max_block_size_) @@ -214,7 +216,9 @@ void SortingStep::mergeSorting( sort_settings.remerge_lowered_memory_bytes_ratio, sort_settings.max_bytes_before_external_sort, std::move(tmp_data_on_disk), - sort_settings.min_free_disk_space); + sort_settings.min_free_disk_space, + sort_settings.partial_result_limit, + sort_settings.partial_result_duration_ms); }); } diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index eed1f26e110..0c616c7ce23 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -27,6 +27,8 @@ public: size_t max_bytes_before_external_sort = 0; TemporaryDataOnDiskScopePtr tmp_data = nullptr; size_t min_free_disk_space = 0; + UInt64 partial_result_limit = 0; + UInt64 partial_result_duration_ms = 0; explicit Settings(const Context & context); explicit Settings(size_t max_block_size_); diff --git a/src/Processors/Transforms/LimitsCheckingTransform.cpp b/src/Processors/Transforms/LimitsCheckingTransform.cpp index 02d2fef808c..7edab0d417c 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.cpp +++ b/src/Processors/Transforms/LimitsCheckingTransform.cpp @@ -38,7 +38,7 @@ void LimitsCheckingTransform::transform(Chunk & chunk) return; } - if (chunk) + if (chunk && !chunk.hasPartialResult()) { info.update(chunk); diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index efd9249066c..36de7165344 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -85,13 +85,18 @@ MergeSortingTransform::MergeSortingTransform( double remerge_lowered_memory_bytes_ratio_, size_t max_bytes_before_external_sort_, TemporaryDataOnDiskPtr tmp_data_, - size_t min_free_disk_space_) + size_t min_free_disk_space_, + UInt64 partial_result_limit_, + UInt64 partial_result_duration_ms_) : SortingTransform(header, description_, max_merged_block_size_, limit_, increase_sort_description_compile_attempts) , max_bytes_before_remerge(max_bytes_before_remerge_) , remerge_lowered_memory_bytes_ratio(remerge_lowered_memory_bytes_ratio_) , max_bytes_before_external_sort(max_bytes_before_external_sort_) , tmp_data(std::move(tmp_data_)) , min_free_disk_space(min_free_disk_space_) + , partial_result_limit(partial_result_limit_) + , partial_result_duration_ms(partial_result_duration_ms_) + , watch(CLOCK_MONOTONIC) { } @@ -124,6 +129,35 @@ Processors MergeSortingTransform::expandPipeline() return std::move(processors); } +void MergeSortingTransform::updatePartialResult() +{ + /// Sort all input data + remerge(); + /// Add a copy of the first `partial_result_limit` rows to a generated_chunk + /// to send it as a partial result in the next prepare stage + auto generated_columns = chunks[0].cloneEmptyColumns(); + size_t total_rows = 0; + for (const auto & merged_chunk : chunks) + { + size_t rows = std::min(merged_chunk.getNumRows(), partial_result_limit - total_rows); + if (rows == 0) + break; + + for (size_t position = 0; position < generated_columns.size(); ++position) + { + auto column = merged_chunk.getColumns()[position]; + generated_columns[position]->insertRangeFrom(*column, 0, rows); + } + + total_rows += rows; + } + + generated_chunk.setColumns(std::move(generated_columns), total_rows); + generated_chunk.setChunkInfo(chunks[0].getChunkInfo()); + generated_chunk.changePartialResultStatus(true /*is_chunk_partial*/); + enrichChunkWithConstants(generated_chunk); +} + void MergeSortingTransform::consume(Chunk chunk) { /** Algorithm: @@ -159,6 +193,12 @@ void MergeSortingTransform::consume(Chunk chunk) remerge(); } + if (partial_result_duration_ms && partial_result_duration_ms < watch.elapsedMilliseconds() && !chunks.empty()) + { + updatePartialResult(); + watch.restart(); + } + /** If too many of them and if external sorting is enabled, * will merge blocks that we have in memory at this moment and write merged stream to temporary (compressed) file. * NOTE. It's possible to check free space in filesystem. diff --git a/src/Processors/Transforms/MergeSortingTransform.h b/src/Processors/Transforms/MergeSortingTransform.h index c64c93393ce..2b2db6fae16 100644 --- a/src/Processors/Transforms/MergeSortingTransform.h +++ b/src/Processors/Transforms/MergeSortingTransform.h @@ -30,7 +30,9 @@ public: double remerge_lowered_memory_bytes_ratio_, size_t max_bytes_before_external_sort_, TemporaryDataOnDiskPtr tmp_data_, - size_t min_free_disk_space_); + size_t min_free_disk_space_, + UInt64 partial_result_limit_ = 0, + UInt64 partial_result_duration_ms_ = 0); String getName() const override { return "MergeSortingTransform"; } @@ -51,6 +53,11 @@ private: size_t sum_rows_in_blocks = 0; size_t sum_bytes_in_blocks = 0; + UInt64 partial_result_limit; + UInt64 partial_result_duration_ms; + + Stopwatch watch; + Poco::Logger * log = &Poco::Logger::get("MergeSortingTransform"); /// If remerge doesn't save memory at least several times, mark it as useless and don't do it anymore. @@ -59,6 +66,8 @@ private: /// Merge all accumulated blocks to keep no more than limit rows. void remerge(); + void updatePartialResult(); + ProcessorPtr external_merging_sorted; }; From df1601bfa9c09f9b4b42cad71ed48aa9396f5611 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Mon, 10 Apr 2023 14:06:19 +0000 Subject: [PATCH 0010/1687] Change to correct name of setting partial_result_on_first_cancel --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f1732000a5e..d02ca58de82 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -283,7 +283,7 @@ class IColumn; \ M(Bool, final, false, "Query with the FINAL modifier by default. If the engine does not support final, it does not have any effect. On queries with multiple tables final is applied only on those that support it. It also works on distributed tables", 0) \ \ - M(Bool, stop_reading_on_first_cancel, false, "Allows query to return a partial result after cancel.", 0) \ + M(Bool, partial_result_on_first_cancel, false, "Allows query to return a partial result after cancel.", 0) \ \ M(Milliseconds, partial_result_update_duration_ms, 0, "Duration of time in milliseconds between real-time updates of result table sent to the client during query execution.", 0) \ M(UInt64, max_rows_in_partial_result, 10, "Max rows displayed to user after each real-time update of output table during query execution.", 0) \ From 9754e4e9b34948a359ca99f7350c1ad24318572f Mon Sep 17 00:00:00 2001 From: alexX512 Date: Tue, 11 Apr 2023 09:20:12 +0000 Subject: [PATCH 0011/1687] Add supporting of partial result in LimitTransform --- src/Client/ClientBase.cpp | 2 +- src/Processors/LimitTransform.cpp | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 18912d22497..bf8ab8be626 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -885,7 +885,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa } const auto & settings = global_context->getSettingsRef(); - const Int32 signals_before_stop = settings.stop_reading_on_first_cancel ? 2 : 1; + const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1; has_partial_result_setting = settings.partial_result_update_duration_ms.totalMilliseconds() > 0; if (is_interactive && has_partial_result_setting) diff --git a/src/Processors/LimitTransform.cpp b/src/Processors/LimitTransform.cpp index 4e517ef6064..07de5795938 100644 --- a/src/Processors/LimitTransform.cpp +++ b/src/Processors/LimitTransform.cpp @@ -187,8 +187,9 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data) } auto rows = data.current_chunk.getNumRows(); + bool is_current_data_partial = data.current_chunk.hasPartialResult(); - if (rows_before_limit_at_least && !data.input_port_has_counter) + if (rows_before_limit_at_least && !data.input_port_has_counter && !is_current_data_partial) rows_before_limit_at_least->add(rows); /// Skip block (for 'always_read_till_end' case). @@ -212,6 +213,9 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data) if (rows_read <= offset) { + if (is_current_data_partial) + rows_read = 0; + data.current_chunk.clear(); if (input.isFinished()) @@ -240,9 +244,15 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data) bool may_need_more_data_for_ties = previous_row_chunk || rows_read - rows <= offset + limit; /// No more data is needed. - if (!always_read_till_end && !limit_is_unreachable && rows_read >= offset + limit && !may_need_more_data_for_ties) + if (!always_read_till_end && !limit_is_unreachable && rows_read >= offset + limit && !may_need_more_data_for_ties && !is_current_data_partial) input.close(); + if (is_current_data_partial) + { + rows_read = 0; + previous_row_chunk = {}; + } + output.push(std::move(data.current_chunk)); return Status::PortFull; From 006af1dfa166231a379cb24b829fc76e755c5c05 Mon Sep 17 00:00:00 2001 From: Rory Crispin Date: Mon, 17 Apr 2023 17:41:02 +0200 Subject: [PATCH 0012/1687] validate direct dictionary lifetime is unset during creation --- .../getDictionaryConfigurationFromAST.cpp | 12 ++++++++++++ ..._create_direct_dict_with_lifetime_crash.reference | 0 .../02716_create_direct_dict_with_lifetime_crash.sql | 3 +++ 3 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/02716_create_direct_dict_with_lifetime_crash.reference create mode 100644 tests/queries/0_stateless/02716_create_direct_dict_with_lifetime_crash.sql diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 0de8b843604..9db17428dbd 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -571,11 +571,23 @@ void checkPrimaryKey(const AttributeNameToConfiguration & all_attrs, const Names } +void checkLifetime(const ASTCreateQuery & query) +{ + if (query.dictionary -> layout && query.dictionary->layout->layout_type == "direct") { + if (query.dictionary -> lifetime) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "'lifetime' parameter is redundant for the dictionary' of layout '{}'", + query.dictionary->layout->layout_type); + } +} + DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr context, const std::string & database_) { checkAST(query); + checkLifetime(query); AutoPtr xml_document(new Poco::XML::Document()); AutoPtr document_root(xml_document->createElement("dictionaries")); diff --git a/tests/queries/0_stateless/02716_create_direct_dict_with_lifetime_crash.reference b/tests/queries/0_stateless/02716_create_direct_dict_with_lifetime_crash.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02716_create_direct_dict_with_lifetime_crash.sql b/tests/queries/0_stateless/02716_create_direct_dict_with_lifetime_crash.sql new file mode 100644 index 00000000000..d96f6249e43 --- /dev/null +++ b/tests/queries/0_stateless/02716_create_direct_dict_with_lifetime_crash.sql @@ -0,0 +1,3 @@ +CREATE TABLE IF NOT EXISTS dict_source (key UInt64, value String) ENGINE=MergeTree ORDER BY key; + +CREATE DICTIONARY dict(`key` UInt64,`value` String) PRIMARY KEY key SOURCE(CLICKHOUSE(table 'dict_source')) LAYOUT(DIRECT()) LIFETIME(0); -- { serverError 36 } From 5e80e9263e73ec3fa66ea87f62662e6fb2736551 Mon Sep 17 00:00:00 2001 From: Rory Crispin Date: Sat, 22 Apr 2023 15:29:20 +0100 Subject: [PATCH 0013/1687] Remove spaces around -> --- src/Dictionaries/getDictionaryConfigurationFromAST.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 9db17428dbd..d0180c2a3d7 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -573,8 +573,8 @@ void checkPrimaryKey(const AttributeNameToConfiguration & all_attrs, const Names void checkLifetime(const ASTCreateQuery & query) { - if (query.dictionary -> layout && query.dictionary->layout->layout_type == "direct") { - if (query.dictionary -> lifetime) + if (query.dictionary->layout && query.dictionary->layout->layout_type == "direct") { + if (query.dictionary->lifetime) throw Exception( ErrorCodes::BAD_ARGUMENTS, "'lifetime' parameter is redundant for the dictionary' of layout '{}'", From 66300402eed123afa40bfb932b9f1c4ca35dca01 Mon Sep 17 00:00:00 2001 From: Rory Crispin Date: Sat, 22 Apr 2023 15:42:57 +0100 Subject: [PATCH 0014/1687] Bracket on newline --- src/Dictionaries/getDictionaryConfigurationFromAST.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index d0180c2a3d7..32cb313ee97 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -573,7 +573,8 @@ void checkPrimaryKey(const AttributeNameToConfiguration & all_attrs, const Names void checkLifetime(const ASTCreateQuery & query) { - if (query.dictionary->layout && query.dictionary->layout->layout_type == "direct") { + if (query.dictionary->layout && query.dictionary->layout->layout_type == "direct") + { if (query.dictionary->lifetime) throw Exception( ErrorCodes::BAD_ARGUMENTS, From 99175aefae1eb445ed7db63b43c13a31e25e6f3c Mon Sep 17 00:00:00 2001 From: Rory Crispin Date: Sat, 22 Apr 2023 17:02:05 +0100 Subject: [PATCH 0015/1687] trailing whitespace --- src/Dictionaries/getDictionaryConfigurationFromAST.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 32cb313ee97..e87e78619d4 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -573,7 +573,7 @@ void checkPrimaryKey(const AttributeNameToConfiguration & all_attrs, const Names void checkLifetime(const ASTCreateQuery & query) { - if (query.dictionary->layout && query.dictionary->layout->layout_type == "direct") + if (query.dictionary->layout && query.dictionary->layout->layout_type == "direct") { if (query.dictionary->lifetime) throw Exception( From e94285d4683065fc21949ebb3adad72d3ce07eb3 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Mon, 1 May 2023 09:25:15 +0000 Subject: [PATCH 0016/1687] Send partial result with the new architectuin the special pipeline --- src/Client/ClientBase.cpp | 54 ++++---- src/Client/ClientBase.h | 12 +- src/Core/BlockInfo.h | 8 +- src/Processors/Chunk.cpp | 18 +-- src/Processors/Chunk.h | 18 +-- src/Processors/Executors/ExecutorTasks.cpp | 4 +- .../PullingAsyncPipelineExecutor.cpp | 1 - src/Processors/Formats/IOutputFormat.cpp | 105 +++++++++++--- src/Processors/Formats/IOutputFormat.h | 9 +- .../Formats/Impl/PrettyBlockOutputFormat.cpp | 13 +- .../Formats/Impl/PrettyBlockOutputFormat.h | 3 + .../Impl/PrettyCompactBlockOutputFormat.cpp | 2 +- src/Processors/Formats/LazyOutputFormat.h | 1 + src/Processors/IProcessor.h | 8 ++ src/Processors/LimitTransform.cpp | 28 ++-- src/Processors/LimitTransform.h | 3 + .../QueryPlan/BuildQueryPipelineSettings.cpp | 7 +- .../QueryPlan/BuildQueryPipelineSettings.h | 3 + src/Processors/QueryPlan/QueryPlan.cpp | 4 + src/Processors/QueryPlan/SortingStep.cpp | 6 +- .../Transforms/ExpressionTransform.cpp | 6 + .../Transforms/ExpressionTransform.h | 3 + .../Transforms/LimitPartialResultTransform.h | 53 +++++++ .../Transforms/LimitsCheckingTransform.cpp | 9 +- .../Transforms/LimitsCheckingTransform.h | 3 + .../Transforms/MergeSortingTransform.cpp | 122 ++++++++++------ .../Transforms/MergeSortingTransform.h | 15 +- .../Transforms/PartialResultTransform.cpp | 67 +++++++++ .../Transforms/PartialResultTransform.h | 51 +++++++ src/QueryPipeline/Pipe.cpp | 131 +++++++++++++----- src/QueryPipeline/Pipe.h | 22 ++- src/QueryPipeline/QueryPipeline.cpp | 49 ++++++- src/QueryPipeline/QueryPipeline.h | 4 +- src/QueryPipeline/QueryPipelineBuilder.h | 6 + 34 files changed, 639 insertions(+), 209 deletions(-) create mode 100644 src/Processors/Transforms/LimitPartialResultTransform.h create mode 100644 src/Processors/Transforms/PartialResultTransform.cpp create mode 100644 src/Processors/Transforms/PartialResultTransform.h diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index bf8ab8be626..3fe2670fc98 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -438,7 +438,18 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) if (!block) return; - if (!block.info.has_partial_result) + if (block.rows() == 0 && partial_result_mode == PartialResultMode::Active) + { + partial_result_mode = PartialResultMode::Inactive; + if (is_interactive) + { + progress_indication.clearProgressOutput(*tty_buf); + std::cout << "Full result:" << std::endl; + progress_indication.writeProgress(*tty_buf); + } + } + + if (partial_result_mode == PartialResultMode::Inactive) processed_rows += block.rows(); /// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset. @@ -450,35 +461,21 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100)) return; - if (received_first_full_result && block.info.has_partial_result) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Server shouldn't send partial results after the first block with a non-empty full result"); - - if (!is_interactive && block.info.has_partial_result) + if (!is_interactive && partial_result_mode == PartialResultMode::Active) return; - bool first_full_result = false; - if (!received_first_full_result && !block.info.has_partial_result) - { - received_first_full_result = true; - first_full_result = true; - } - /// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker. if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout)) progress_indication.clearProgressOutput(*tty_buf); - if (is_interactive && first_full_result && has_partial_result_setting) - std::cout << "Full result:" << std::endl; - try { - /// Clear previous partial results to write new partial results if needed - if (!received_first_full_result && written_first_block) - output_format->clearLastLines(prev_block_rows + 2); + if (partial_result_mode == PartialResultMode::Active) + output_format->writePartialResult(materializeBlock(block)); + else + output_format->write(materializeBlock(block)); - output_format->write(materializeBlock(block)); written_first_block = true; - prev_block_rows = block.rows(); } catch (const Exception &) { @@ -541,6 +538,9 @@ void ClientBase::onProfileInfo(const ProfileInfo & profile_info) void ClientBase::initOutputFormat(const Block & block, ASTPtr parsed_query) try { + if (partial_result_mode == PartialResultMode::NotInit) + partial_result_mode = PartialResultMode::Active; + if (!output_format) { /// Ignore all results when fuzzing as they can be huge. @@ -886,10 +886,14 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa const auto & settings = global_context->getSettingsRef(); const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1; - has_partial_result_setting = settings.partial_result_update_duration_ms.totalMilliseconds() > 0; + bool has_partial_result_setting = settings.partial_result_update_duration_ms.totalMilliseconds() > 0; - if (is_interactive && has_partial_result_setting) - std::cout << "Partial result:" << std::endl; + if (has_partial_result_setting) + { + partial_result_mode = PartialResultMode::NotInit; + if (is_interactive) + std::cout << "Partial result:" << std::endl; + } int retries_left = 10; while (retries_left) @@ -1645,9 +1649,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin } processed_rows = 0; - prev_block_rows = 0; - has_partial_result_setting = false; - received_first_full_result = false; + partial_result_mode = PartialResultMode::Inactive; written_first_block = false; progress_indication.resetProgress(); profile_events.watch.restart(); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 1c128c0700d..dee1761fa43 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -259,11 +259,13 @@ protected: size_t processed_rows = 0; /// How many rows have been read or written. bool print_num_processed_rows = false; /// Whether to print the number of processed rows at - /// If `received_first_full_result` set to true then - /// client has already received all updates with a partial result - bool received_first_full_result = false; - bool has_partial_result_setting = false; - size_t prev_block_rows = 0; /// How many rows were in the previously received block of data + enum class PartialResultMode: UInt8 + { + NotInit, /// Query doesn't show partial result before the first block with 0 rows. + Active, /// Query shows partial result after the first and before the second block with 0 rows. + Inactive /// Query doesn't show partial result at all. + }; + PartialResultMode partial_result_mode = PartialResultMode::Inactive; bool print_stack_trace = false; /// The last exception that was received from the server. Is used for the diff --git a/src/Core/BlockInfo.h b/src/Core/BlockInfo.h index a0357b292cd..d431303ca39 100644 --- a/src/Core/BlockInfo.h +++ b/src/Core/BlockInfo.h @@ -28,15 +28,9 @@ struct BlockInfo * Otherwise -1. */ - /** has_partial_result: - * If user wants to receive updates containing partial results during query execution, - * then data will be sent with has_partial_result flag set to true. - */ - #define APPLY_FOR_BLOCK_INFO_FIELDS(M) \ M(bool, is_overflows, false, 1) \ - M(Int32, bucket_num, -1, 2) \ - M(bool, has_partial_result, false, 3) + M(Int32, bucket_num, -1, 2) #define DECLARE_FIELD(TYPE, NAME, DEFAULT, FIELD_NUM) \ TYPE NAME = DEFAULT; diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index d63e4c14db0..ccb72cb977f 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -12,14 +12,14 @@ namespace ErrorCodes extern const int POSITION_OUT_OF_BOUND; } -Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_, bool has_partial_result_) - : columns(std::move(columns_)), num_rows(num_rows_), has_partial_result(has_partial_result_) +Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) + : columns(std::move(columns_)), num_rows(num_rows_) { checkNumRowsIsConsistent(); } -Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_, bool has_partial_result_) - : columns(std::move(columns_)), num_rows(num_rows_), chunk_info(std::move(chunk_info_)), has_partial_result(has_partial_result_) +Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(std::move(columns_)), num_rows(num_rows_), chunk_info(std::move(chunk_info_)) { checkNumRowsIsConsistent(); } @@ -34,21 +34,21 @@ static Columns unmuteColumns(MutableColumns && mutable_columns) return columns; } -Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, bool has_partial_result_) - : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), has_partial_result(has_partial_result_) +Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) + : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_) { checkNumRowsIsConsistent(); } -Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_, bool has_partial_result_) - : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), chunk_info(std::move(chunk_info_)), has_partial_result(has_partial_result_) +Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(unmuteColumns(std::move(columns_))), num_rows(num_rows_), chunk_info(std::move(chunk_info_)) { checkNumRowsIsConsistent(); } Chunk Chunk::clone() const { - return Chunk(getColumns(), getNumRows(), chunk_info, has_partial_result); + return Chunk(getColumns(), getNumRows(), chunk_info); } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index e3f9b17dfe9..6f2097b71f1 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -38,16 +38,14 @@ public: : columns(std::move(other.columns)) , num_rows(other.num_rows) , chunk_info(std::move(other.chunk_info)) - , has_partial_result(other.has_partial_result) { other.num_rows = 0; - other.has_partial_result = false; } - Chunk(Columns columns_, UInt64 num_rows_, bool has_partial_result_ = false); - Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_, bool has_partial_result_ = false); - Chunk(MutableColumns columns_, UInt64 num_rows_, bool has_partial_result_ = false); - Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_, bool has_partial_result_ = false); + Chunk(Columns columns_, UInt64 num_rows_); + Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); + Chunk(MutableColumns columns_, UInt64 num_rows_); + Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept @@ -56,8 +54,6 @@ public: chunk_info = std::move(other.chunk_info); num_rows = other.num_rows; other.num_rows = 0; - has_partial_result = other.has_partial_result; - other.has_partial_result = false; return *this; } @@ -68,7 +64,6 @@ public: columns.swap(other.columns); chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); - std::swap(has_partial_result, other.has_partial_result); } void clear() @@ -76,7 +71,6 @@ public: num_rows = 0; columns.clear(); chunk_info.reset(); - has_partial_result = false; } const Columns & getColumns() const { return columns; } @@ -110,14 +104,10 @@ public: void append(const Chunk & chunk); void append(const Chunk & chunk, size_t from, size_t length); // append rows [from, from+length) of chunk - void changePartialResultStatus(bool has_partial_result_) { has_partial_result = has_partial_result_; } - bool hasPartialResult() const { return has_partial_result; } - private: Columns columns; UInt64 num_rows = 0; ChunkInfoPtr chunk_info; - bool has_partial_result = false; void checkNumRowsIsConsistent(); }; diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index e61d225a968..496e859746a 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -108,8 +108,8 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea { context.setTask(nullptr); - /// Take local task from queue if has one. - if (!queue.empty() && !context.hasAsyncTasks()) + /// Take local task from queue if has one and it's not a processor which generates partial result. + if (!queue.empty() && !queue.front()->processor->isPartialResultProcessor() && !context.hasAsyncTasks()) { context.setTask(queue.front()); queue.pop(); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index e568ec21dac..73a3142d459 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -160,7 +160,6 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) } block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - block.info.has_partial_result = chunk.hasPartialResult(); if (auto chunk_info = chunk.getChunkInfo()) { diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index cd292d7795e..d3e0ed2fcc6 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -6,7 +6,7 @@ namespace DB { IOutputFormat::IOutputFormat(const Block & header_, WriteBuffer & out_) - : IProcessor({header_, header_, header_}, {}), out(out_) + : IProcessor({header_, header_, header_, header_}, {}), out(out_) { } @@ -15,26 +15,13 @@ IOutputFormat::Status IOutputFormat::prepare() if (has_input) return Status::Ready; - for (auto kind : {Main, Totals, Extremes}) - { - auto & input = getPort(kind); + auto status = prepareMainAndPartialResult(); + if (status != Status::Finished) + return status; - if (kind != Main && !input.isConnected()) - continue; - - if (input.isFinished()) - continue; - - input.setNeeded(); - - if (!input.hasData()) - return Status::NeedData; - - current_chunk = input.pull(true); - current_block_kind = kind; - has_input = true; - return Status::Ready; - } + status = prepareTotalsAndExtremes(); + if (status != Status::Finished) + return status; finished = true; @@ -44,6 +31,65 @@ IOutputFormat::Status IOutputFormat::prepare() return Status::Finished; } +IOutputFormat::Status IOutputFormat::prepareMainAndPartialResult() +{ + bool need_data = false; + for (auto kind : {Main, PartialResult}) + { + auto & input = getPort(kind); + + if (input.isFinished()) + continue; + + if (kind == PartialResult && was_main_input) + { + input.close(); + continue; + } + + input.setNeeded(); + need_data = true; + + if (!input.hasData()) + continue; + + setCurrentChunk(input, kind); + return Status::Ready; + } + + if (need_data) + return Status::NeedData; + + return Status::Finished; +} + +IOutputFormat::Status IOutputFormat::prepareTotalsAndExtremes() +{ + for (auto kind : {Totals, Extremes}) + { + auto & input = getPort(kind); + + if (!input.isConnected() || input.isFinished()) + continue; + + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + + setCurrentChunk(input, kind); + return Status::Ready; + } + + return Status::Finished; +} + +void IOutputFormat::setCurrentChunk(InputPort & input, PortKind kind) +{ + current_chunk = input.pull(true); + current_block_kind = kind; + has_input = true; +} + static Chunk prepareTotals(Chunk chunk) { if (!chunk.hasRows()) @@ -83,8 +129,16 @@ void IOutputFormat::work() case Main: result_rows += current_chunk.getNumRows(); result_bytes += current_chunk.allocatedBytes(); + if (!was_main_input && current_chunk.hasRows()) + { + consume(Chunk(current_chunk.cloneEmptyColumns(), 0)); + was_main_input = true; + } consume(std::move(current_chunk)); break; + case PartialResult: + consumePartialResult(std::move(current_chunk)); + break; case Totals: writeSuffixIfNeeded(); if (auto totals = prepareTotals(std::move(current_chunk))) @@ -113,7 +167,16 @@ void IOutputFormat::flush() void IOutputFormat::write(const Block & block) { writePrefixIfNeeded(); - consume(Chunk(block.getColumns(), block.rows(), block.info.has_partial_result)); + consume(Chunk(block.getColumns(), block.rows())); + + if (auto_flush) + flush(); +} + +void IOutputFormat::writePartialResult(const Block & block) +{ + writePrefixIfNeeded(); + consumePartialResult(Chunk(block.getColumns(), block.rows())); if (auto_flush) flush(); diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 04cf38b3038..2c598e2620b 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -23,7 +23,7 @@ class WriteBuffer; class IOutputFormat : public IProcessor { public: - enum PortKind { Main = 0, Totals = 1, Extremes = 2 }; + enum PortKind { Main = 0, Totals = 1, Extremes = 2, PartialResult = 3 }; IOutputFormat(const Block & header_, WriteBuffer & out_); @@ -54,6 +54,7 @@ public: /// TODO: separate formats and processors. void write(const Block & block); + void writePartialResult(const Block & block); void finalize(); @@ -120,6 +121,7 @@ protected: virtual void consume(Chunk) = 0; virtual void consumeTotals(Chunk) {} virtual void consumeExtremes(Chunk) {} + virtual void consumePartialResult(Chunk) {} virtual void finalizeImpl() {} virtual void finalizeBuffers() {} virtual void writePrefix() {} @@ -168,6 +170,7 @@ protected: Chunk current_chunk; PortKind current_block_kind = PortKind::Main; + bool was_main_input = false; bool has_input = false; bool finished = false; bool finalized = false; @@ -182,6 +185,10 @@ protected: Statistics statistics; private: + IOutputFormat::Status prepareMainAndPartialResult(); + IOutputFormat::Status prepareTotalsAndExtremes(); + void setCurrentChunk(InputPort & input, PortKind kind); + size_t rows_read_before = 0; bool are_totals_written = false; diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 208b851b7df..3e468ceaf75 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -140,7 +140,7 @@ void PrettyBlockOutputFormat::write(Chunk chunk, PortKind port_kind) { if (total_rows >= format_settings.pretty.max_rows) { - if (!chunk.hasPartialResult()) + if (port_kind != PortKind::PartialResult) total_rows += chunk.getNumRows(); return; } @@ -322,7 +322,7 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind } writeString(bottom_separator_s, out); - if (!chunk.hasPartialResult()) + if (port_kind != PortKind::PartialResult) total_rows += num_rows; } @@ -396,6 +396,15 @@ void PrettyBlockOutputFormat::consumeExtremes(Chunk chunk) write(std::move(chunk), PortKind::Extremes); } +void PrettyBlockOutputFormat::consumePartialResult(Chunk chunk) +{ + if (prev_partial_block_rows > 0) + clearLastLines(prev_partial_block_rows + 2); + + prev_partial_block_rows = chunk.getNumRows(); + write(std::move(chunk), PortKind::PartialResult); +} + void PrettyBlockOutputFormat::writeMonoChunkIfNeeded() { diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index 95c72d15fa9..f9e69ebad51 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -27,9 +27,11 @@ protected: void consume(Chunk) override; void consumeTotals(Chunk) override; void consumeExtremes(Chunk) override; + void consumePartialResult(Chunk) override; size_t total_rows = 0; size_t terminal_width = 0; + size_t prev_partial_block_rows = 0; size_t row_number_width = 7; // "10000. " @@ -57,6 +59,7 @@ protected: void resetFormatterImpl() override { total_rows = 0; + prev_partial_block_rows = 0; } private: diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index bf6e4cb21f1..3a04d86b1ad 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -194,7 +194,7 @@ void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind po writeBottom(max_widths); - if (!chunk.hasPartialResult()) + if (port_kind != PortKind::PartialResult) total_rows += num_rows; } diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index b539a8494c7..81b9d3e464a 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -49,6 +49,7 @@ protected: void consumeTotals(Chunk chunk) override { totals = std::move(chunk); } void consumeExtremes(Chunk chunk) override { extremes = std::move(chunk); } + void consumePartialResult(Chunk chunk) override { consume(std::move(chunk)); } private: diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index c9dd7d8d77d..cb819c00ff2 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -234,6 +234,14 @@ public: { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'expandPipeline' is not implemented for {} processor", getName()); } + + virtual bool isPartialResultProcessor() const { return false; } + virtual bool supportPartialResultProcessor() const { return false; } + + virtual ProcessorPtr getPartialResultProcessor(ProcessorPtr /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getPartialResultProcessor' is not implemented for {} processor", getName()); + } /// In case if query was cancelled executor will wait till all processors finish their jobs. /// Generally, there is no reason to check this flag. However, it may be reasonable for long operations (e.g. i/o). diff --git a/src/Processors/LimitTransform.cpp b/src/Processors/LimitTransform.cpp index 07de5795938..0d43b1e4fac 100644 --- a/src/Processors/LimitTransform.cpp +++ b/src/Processors/LimitTransform.cpp @@ -1,5 +1,5 @@ #include - +#include namespace DB { @@ -180,16 +180,9 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data) return Status::NeedData; data.current_chunk = input.pull(true); - if (data.current_chunk.hasPartialResult()) - { - output.push(std::move(data.current_chunk)); - return Status::PortFull; - } - auto rows = data.current_chunk.getNumRows(); - bool is_current_data_partial = data.current_chunk.hasPartialResult(); - if (rows_before_limit_at_least && !data.input_port_has_counter && !is_current_data_partial) + if (rows_before_limit_at_least && !data.input_port_has_counter) rows_before_limit_at_least->add(rows); /// Skip block (for 'always_read_till_end' case). @@ -213,9 +206,6 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data) if (rows_read <= offset) { - if (is_current_data_partial) - rows_read = 0; - data.current_chunk.clear(); if (input.isFinished()) @@ -244,15 +234,9 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data) bool may_need_more_data_for_ties = previous_row_chunk || rows_read - rows <= offset + limit; /// No more data is needed. - if (!always_read_till_end && !limit_is_unreachable && rows_read >= offset + limit && !may_need_more_data_for_ties && !is_current_data_partial) + if (!always_read_till_end && !limit_is_unreachable && rows_read >= offset + limit && !may_need_more_data_for_ties) input.close(); - if (is_current_data_partial) - { - rows_read = 0; - previous_row_chunk = {}; - } - output.push(std::move(data.current_chunk)); return Status::PortFull; @@ -382,5 +366,11 @@ bool LimitTransform::sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort return true; } +ProcessorPtr LimitTransform::getPartialResultProcessor(ProcessorPtr /*current_processor*/, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +{ + const auto & header = inputs.front().getHeader(); + return std::make_shared(header, partial_result_limit, partial_result_duration_ms, limit, offset); +} + } diff --git a/src/Processors/LimitTransform.h b/src/Processors/LimitTransform.h index 33ff968985f..7ad358c5d41 100644 --- a/src/Processors/LimitTransform.h +++ b/src/Processors/LimitTransform.h @@ -55,6 +55,9 @@ private: ColumnRawPtrs extractSortColumns(const Columns & columns) const; bool sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, UInt64 current_chunk_row_num) const; + bool supportPartialResultProcessor() const override { return true; } + ProcessorPtr getPartialResultProcessor(ProcessorPtr current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + public: LimitTransform( const Block & header_, UInt64 limit_, UInt64 offset_, size_t num_streams = 1, diff --git a/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp b/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp index fb3ed7f80fc..60ac30389a1 100644 --- a/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp +++ b/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp @@ -9,7 +9,12 @@ namespace DB BuildQueryPipelineSettings BuildQueryPipelineSettings::fromContext(ContextPtr from) { BuildQueryPipelineSettings settings; - settings.actions_settings = ExpressionActionsSettings::fromSettings(from->getSettingsRef(), CompileExpressions::yes); + + const auto & context_settings = from->getSettingsRef(); + settings.partial_result_limit = context_settings.max_rows_in_partial_result; + settings.partial_result_duration_ms = context_settings.partial_result_update_duration_ms.totalMilliseconds(); + + settings.actions_settings = ExpressionActionsSettings::fromSettings(context_settings, CompileExpressions::yes); settings.process_list_element = from->getProcessListElement(); settings.progress_callback = from->getProgressCallback(); return settings; diff --git a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h index 3b5e4e06953..0410bf925d1 100644 --- a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h +++ b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h @@ -19,6 +19,9 @@ struct BuildQueryPipelineSettings QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; + UInt64 partial_result_limit = 0; + UInt64 partial_result_duration_ms = 0; + const ExpressionActionsSettings & getActionsSettings() const { return actions_settings; } static BuildQueryPipelineSettings fromContext(ContextPtr from); }; diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 3fbe3d89845..1e31ce0711d 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -168,6 +168,7 @@ QueryPipelineBuilderPtr QueryPlan::buildQueryPipeline( QueryPipelineBuilderPtr last_pipeline; + bool has_partial_result_setting = build_pipeline_settings.partial_result_duration_ms > 0; std::stack stack; stack.push(Frame{.node = root}); @@ -195,6 +196,9 @@ QueryPipelineBuilderPtr QueryPlan::buildQueryPipeline( } else stack.push(Frame{.node = frame.node->children[next_child]}); + + if (last_pipeline && has_partial_result_setting) + last_pipeline->activatePartialResult(build_pipeline_settings.partial_result_limit, build_pipeline_settings.partial_result_duration_ms); } last_pipeline->setProgressCallback(build_pipeline_settings.progress_callback); diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index ad09afa6ea2..db44da5a0fc 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -33,8 +33,6 @@ SortingStep::Settings::Settings(const Context & context) max_bytes_before_external_sort = settings.max_bytes_before_external_sort; tmp_data = context.getTempDataOnDisk(); min_free_disk_space = settings.min_free_disk_space_for_temporary_data; - partial_result_limit = settings.max_rows_in_partial_result; - partial_result_duration_ms = settings.partial_result_update_duration_ms.totalMilliseconds(); } SortingStep::Settings::Settings(size_t max_block_size_) @@ -219,9 +217,7 @@ void SortingStep::mergeSorting( sort_settings.remerge_lowered_memory_bytes_ratio, sort_settings.max_bytes_before_external_sort, std::move(tmp_data_on_disk), - sort_settings.min_free_disk_space, - sort_settings.partial_result_limit, - sort_settings.partial_result_duration_ms); + sort_settings.min_free_disk_space); }); } diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 49988932947..5ec76dc95b8 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -25,6 +25,12 @@ void ExpressionTransform::transform(Chunk & chunk) chunk.setColumns(block.getColumns(), num_rows); } +ProcessorPtr ExpressionTransform::getPartialResultProcessor(ProcessorPtr /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) +{ + const auto & header = getInputPort().getHeader(); + return std::make_shared(header, expression); +} + ConvertingTransform::ConvertingTransform(const Block & header_, ExpressionActionsPtr expression_) : ExceptionKeepingTransform(header_, ExpressionTransform::transformHeader(header_, expression_->getActionsDAG())) , expression(std::move(expression_)) diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h index 791c7d7ba73..6f9b585ae1f 100644 --- a/src/Processors/Transforms/ExpressionTransform.h +++ b/src/Processors/Transforms/ExpressionTransform.h @@ -29,6 +29,9 @@ public: protected: void transform(Chunk & chunk) override; + bool supportPartialResultProcessor() const override { return true; } + ProcessorPtr getPartialResultProcessor(ProcessorPtr current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + private: ExpressionActionsPtr expression; }; diff --git a/src/Processors/Transforms/LimitPartialResultTransform.h b/src/Processors/Transforms/LimitPartialResultTransform.h new file mode 100644 index 00000000000..8a919615088 --- /dev/null +++ b/src/Processors/Transforms/LimitPartialResultTransform.h @@ -0,0 +1,53 @@ +#pragma once + +#include + +namespace DB +{ + +class LimitPartialResultTransform : public PartialResultTransform +{ +public: + LimitPartialResultTransform( + const Block & header, + UInt64 partial_result_limit_, + UInt64 partial_result_duration_ms_, + UInt64 limit_, + UInt64 offset_) + : PartialResultTransform(header, partial_result_limit_, partial_result_duration_ms_) + , limit(limit_) + , offset(offset_) + {} + + String getName() const override { return "MergeSortingPartialResult"; } + + void transformPartialResult(Chunk & chunk) override + { + UInt64 num_rows = chunk.getNumRows(); + if (num_rows < offset || limit == 0) + { + chunk = {}; + return; + } + + UInt64 length = std::min(limit, num_rows - offset); + + /// Check if some rows should be removed + if (length < num_rows) + { + auto columns = chunk.detachColumns(); + UInt64 num_columns = chunk.getNumColumns(); + + for (UInt64 i = 0; i < num_columns; ++i) + columns[i] = columns[i]->cut(offset, limit); + + chunk.setColumns(std::move(columns), length); + } + } + +private: + UInt64 limit; + UInt64 offset; +}; + +} diff --git a/src/Processors/Transforms/LimitsCheckingTransform.cpp b/src/Processors/Transforms/LimitsCheckingTransform.cpp index 7edab0d417c..b426d050543 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.cpp +++ b/src/Processors/Transforms/LimitsCheckingTransform.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB @@ -38,7 +39,7 @@ void LimitsCheckingTransform::transform(Chunk & chunk) return; } - if (chunk && !chunk.hasPartialResult()) + if (chunk) { info.update(chunk); @@ -74,4 +75,10 @@ void LimitsCheckingTransform::checkQuota(Chunk & chunk) } } +ProcessorPtr LimitsCheckingTransform::getPartialResultProcessor(ProcessorPtr /*current_processor*/, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +{ + const auto & header = inputs.front().getHeader(); + return std::make_shared(header, partial_result_limit, partial_result_duration_ms); +} + } diff --git a/src/Processors/Transforms/LimitsCheckingTransform.h b/src/Processors/Transforms/LimitsCheckingTransform.h index 2f96a17c17b..663eb774edf 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.h +++ b/src/Processors/Transforms/LimitsCheckingTransform.h @@ -36,6 +36,9 @@ public: protected: void transform(Chunk & chunk) override; + bool supportPartialResultProcessor() const override { return true; } + ProcessorPtr getPartialResultProcessor(ProcessorPtr current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + private: StreamLocalLimits limits; diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index c44633fb80c..c4c6743c775 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -26,6 +27,11 @@ namespace ProfileEvents namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + class BufferingToFileTransform : public IAccumulatingTransform { public: @@ -75,6 +81,58 @@ private: Poco::Logger * log; }; +class MergeSortingPartialResultTransform : public PartialResultTransform +{ +public: + using MergeSortingTransformPtr = std::shared_ptr; + + MergeSortingPartialResultTransform(const Block & header, MergeSortingTransformPtr merge_sorting_transform_, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) + : PartialResultTransform(header, partial_result_limit_, partial_result_duration_ms_) + , merge_sorting_transform(std::move(merge_sorting_transform_)) + { + } + + String getName() const override { return "MergeSortingPartialResultTransform"; } + + ShaphotResult getRealProcessorSnapshot() override + { + std::lock_guard lock(merge_sorting_transform->snapshot_mutex); + if (merge_sorting_transform->generated_prefix) + return {{}, SnaphotStatus::Stopped}; + + if (merge_sorting_transform->chunks.empty()) + return {{}, SnaphotStatus::NotReady}; + + /// Sort all input data + merge_sorting_transform->remerge(); + /// Add a copy of the first `partial_result_limit` rows to a generated_chunk + /// to send it as a partial result in the next prepare stage + auto generated_columns = merge_sorting_transform->chunks[0].cloneEmptyColumns(); + size_t total_rows = 0; + for (const auto & merged_chunk : merge_sorting_transform->chunks) + { + size_t rows = std::min(merged_chunk.getNumRows(), partial_result_limit - total_rows); + if (rows == 0) + break; + + for (size_t position = 0; position < generated_columns.size(); ++position) + { + auto column = merged_chunk.getColumns()[position]; + generated_columns[position]->insertRangeFrom(*column, 0, rows); + } + + total_rows += rows; + } + + auto partial_result = Chunk(std::move(generated_columns), total_rows, merge_sorting_transform->chunks[0].getChunkInfo()); + merge_sorting_transform->enrichChunkWithConstants(partial_result); + return {std::move(partial_result), SnaphotStatus::Ready}; + } + +private: + MergeSortingTransformPtr merge_sorting_transform; +}; + MergeSortingTransform::MergeSortingTransform( const Block & header, const SortDescription & description_, @@ -85,18 +143,13 @@ MergeSortingTransform::MergeSortingTransform( double remerge_lowered_memory_bytes_ratio_, size_t max_bytes_before_external_sort_, TemporaryDataOnDiskPtr tmp_data_, - size_t min_free_disk_space_, - UInt64 partial_result_limit_, - UInt64 partial_result_duration_ms_) + size_t min_free_disk_space_) : SortingTransform(header, description_, max_merged_block_size_, limit_, increase_sort_description_compile_attempts) , max_bytes_before_remerge(max_bytes_before_remerge_) , remerge_lowered_memory_bytes_ratio(remerge_lowered_memory_bytes_ratio_) , max_bytes_before_external_sort(max_bytes_before_external_sort_) , tmp_data(std::move(tmp_data_)) , min_free_disk_space(min_free_disk_space_) - , partial_result_limit(partial_result_limit_) - , partial_result_duration_ms(partial_result_duration_ms_) - , watch(CLOCK_MONOTONIC) { } @@ -129,35 +182,6 @@ Processors MergeSortingTransform::expandPipeline() return std::move(processors); } -void MergeSortingTransform::updatePartialResult() -{ - /// Sort all input data - remerge(); - /// Add a copy of the first `partial_result_limit` rows to a generated_chunk - /// to send it as a partial result in the next prepare stage - auto generated_columns = chunks[0].cloneEmptyColumns(); - size_t total_rows = 0; - for (const auto & merged_chunk : chunks) - { - size_t rows = std::min(merged_chunk.getNumRows(), partial_result_limit - total_rows); - if (rows == 0) - break; - - for (size_t position = 0; position < generated_columns.size(); ++position) - { - auto column = merged_chunk.getColumns()[position]; - generated_columns[position]->insertRangeFrom(*column, 0, rows); - } - - total_rows += rows; - } - - generated_chunk.setColumns(std::move(generated_columns), total_rows); - generated_chunk.setChunkInfo(chunks[0].getChunkInfo()); - generated_chunk.changePartialResultStatus(true /*is_chunk_partial*/); - enrichChunkWithConstants(generated_chunk); -} - void MergeSortingTransform::consume(Chunk chunk) { /** Algorithm: @@ -169,6 +193,8 @@ void MergeSortingTransform::consume(Chunk chunk) /// If there were only const columns in sort description, then there is no need to sort. /// Return the chunk as is. + std::lock_guard lock(snapshot_mutex); + if (description.empty()) { generated_chunk = std::move(chunk); @@ -193,12 +219,6 @@ void MergeSortingTransform::consume(Chunk chunk) remerge(); } - if (partial_result_duration_ms && partial_result_duration_ms < watch.elapsedMilliseconds() && !chunks.empty()) - { - updatePartialResult(); - watch.restart(); - } - /** If too many of them and if external sorting is enabled, * will merge blocks that we have in memory at this moment and write merged stream to temporary (compressed) file. * NOTE. It's possible to check free space in filesystem. @@ -251,6 +271,8 @@ void MergeSortingTransform::serialize() void MergeSortingTransform::generate() { + std::lock_guard lock(snapshot_mutex); + if (!generated_prefix) { size_t num_tmp_files = tmp_data ? tmp_data->getStreams().size() : 0; @@ -311,4 +333,22 @@ void MergeSortingTransform::remerge() sum_bytes_in_blocks = new_sum_bytes_in_blocks; } +ProcessorPtr MergeSortingTransform::getPartialResultProcessor(ProcessorPtr current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +{ + if (getName() != current_processor->getName() || current_processor.get() != this) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "To create partial result processor variable current_processor should use " \ + "the same class and pointer as in the original processor with class {} and pointer {}. " \ + "But current_processor has another class {} or pointer {} then original.", + getName(), + static_cast(this), + current_processor->getName(), + static_cast(current_processor.get())); + + const auto & header = inputs.front().getHeader(); + const auto & merge_sorting_processor = std::static_pointer_cast(current_processor); + return std::make_shared(header, std::move(merge_sorting_processor), partial_result_limit, partial_result_duration_ms); +} + } diff --git a/src/Processors/Transforms/MergeSortingTransform.h b/src/Processors/Transforms/MergeSortingTransform.h index 2b2db6fae16..0b9766674a5 100644 --- a/src/Processors/Transforms/MergeSortingTransform.h +++ b/src/Processors/Transforms/MergeSortingTransform.h @@ -30,9 +30,7 @@ public: double remerge_lowered_memory_bytes_ratio_, size_t max_bytes_before_external_sort_, TemporaryDataOnDiskPtr tmp_data_, - size_t min_free_disk_space_, - UInt64 partial_result_limit_ = 0, - UInt64 partial_result_duration_ms_ = 0); + size_t min_free_disk_space_); String getName() const override { return "MergeSortingTransform"; } @@ -43,6 +41,9 @@ protected: Processors expandPipeline() override; + bool supportPartialResultProcessor() const override { return true; } + ProcessorPtr getPartialResultProcessor(ProcessorPtr current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + private: size_t max_bytes_before_remerge; double remerge_lowered_memory_bytes_ratio; @@ -53,11 +54,6 @@ private: size_t sum_rows_in_blocks = 0; size_t sum_bytes_in_blocks = 0; - UInt64 partial_result_limit; - UInt64 partial_result_duration_ms; - - Stopwatch watch; - Poco::Logger * log = &Poco::Logger::get("MergeSortingTransform"); /// If remerge doesn't save memory at least several times, mark it as useless and don't do it anymore. @@ -66,7 +62,8 @@ private: /// Merge all accumulated blocks to keep no more than limit rows. void remerge(); - void updatePartialResult(); + friend class MergeSortingPartialResult; + std::mutex snapshot_mutex; ProcessorPtr external_merging_sorted; }; diff --git a/src/Processors/Transforms/PartialResultTransform.cpp b/src/Processors/Transforms/PartialResultTransform.cpp new file mode 100644 index 00000000000..4fda35085c3 --- /dev/null +++ b/src/Processors/Transforms/PartialResultTransform.cpp @@ -0,0 +1,67 @@ +#include + +namespace DB +{ + +PartialResultTransform::PartialResultTransform(const Block & header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) + : IProcessor({header}, {header}) + , input(inputs.front()) + , output(outputs.front()) + , partial_result_limit(partial_result_limit_) + , partial_result_duration_ms(partial_result_duration_ms_) + , watch(CLOCK_MONOTONIC) + {} + +IProcessor::Status PartialResultTransform::prepare() +{ + if (output.isFinished()) + { + input.close(); + return Status::Finished; + } + + if (finished_getting_snapshots) + { + output.finish(); + return Status::Finished; + } + + if (input.hasData()) + partial_result = {input.pull(), SnaphotStatus::Ready}; + + /// Send partial result from real processor snapshot or from previous partial result processor if possible + if (partial_result.snapshot_status == SnaphotStatus::Ready && output.canPush()) + { + transformPartialResult(partial_result.chunk); + partial_result.snapshot_status = SnaphotStatus::NotReady; + if (partial_result.chunk.getNumRows() > 0) + { + output.push(std::move(partial_result.chunk)); + return Status::PortFull; + } + } + + /// If input data from previous partial result processor is finished then + /// PartialResultTransform ready to create snapshots and send them as a partial result + if (input.isFinished()) + { + return Status::Ready; + } + + input.setNeeded(); + return Status::NeedData; +} + +void PartialResultTransform::work() +{ + if (partial_result_duration_ms < watch.elapsedMilliseconds()) + { + partial_result = getRealProcessorSnapshot(); + if (partial_result.snapshot_status == SnaphotStatus::Stopped) + finished_getting_snapshots = true; + + watch.restart(); + } +} + +} diff --git a/src/Processors/Transforms/PartialResultTransform.h b/src/Processors/Transforms/PartialResultTransform.h new file mode 100644 index 00000000000..0e3c503968f --- /dev/null +++ b/src/Processors/Transforms/PartialResultTransform.h @@ -0,0 +1,51 @@ +#pragma once + +#include + +namespace DB +{ + +class PartialResultTransform : public IProcessor +{ +public: + PartialResultTransform(const Block & header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); + + String getName() const override { return "PartialResultTransform"; } + + Status prepare() override; + void work() override; + + bool isPartialResultProcessor() const override { return true; } + +protected: + enum class SnaphotStatus + { + NotReady, + Ready, + Stopped, + }; + + struct ShaphotResult + { + Chunk chunk; + SnaphotStatus snapshot_status; + }; + + InputPort & input; + OutputPort & output; + + UInt64 partial_result_limit; + UInt64 partial_result_duration_ms; + + ShaphotResult partial_result = {{}, SnaphotStatus::NotReady}; + + bool finished_getting_snapshots = false; + + virtual void transformPartialResult(Chunk & /*chunk*/) {} + virtual ShaphotResult getRealProcessorSnapshot() { return {{}, SnaphotStatus::Stopped}; } + +private: + Stopwatch watch; +}; + +} diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 91ba01c479f..b84cb279acd 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB { @@ -167,12 +168,9 @@ Pipe::Pipe(ProcessorPtr source) { checkSource(*source); - if (collected_processors) - collected_processors->emplace_back(source); - output_ports.push_back(&source->getOutputs().front()); header = output_ports.front()->getHeader(); - processors->emplace_back(std::move(source)); + addProcessor(std::move(source)); max_parallel_streams = 1; } @@ -319,6 +317,19 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow res.processors->insert(res.processors->end(), pipe.processors->begin(), pipe.processors->end()); res.output_ports.insert(res.output_ports.end(), pipe.output_ports.begin(), pipe.output_ports.end()); + if (res.isPartialResultActive() && pipe.isPartialResultActive()) + { + res.partial_result_ports.insert( + res.partial_result_ports.end(), + pipe.partial_result_ports.begin(), + pipe.partial_result_ports.end()); + } + else + { + res.is_partial_result_active = false; + res.partial_result_ports.clear(); + } + res.max_parallel_streams += pipe.max_parallel_streams; if (pipe.totals_port) @@ -352,11 +363,11 @@ void Pipe::addSource(ProcessorPtr source) else assertBlocksHaveEqualStructure(header, source_header, "Pipes"); - if (collected_processors) - collected_processors->emplace_back(source); - output_ports.push_back(&source->getOutputs().front()); - processors->emplace_back(std::move(source)); + if (isPartialResultActive()) + partial_result_ports.push_back(nullptr); + + addProcessor(std::move(source)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -374,11 +385,9 @@ void Pipe::addTotalsSource(ProcessorPtr source) assertBlocksHaveEqualStructure(header, source_header, "Pipes"); - if (collected_processors) - collected_processors->emplace_back(source); - totals_port = &source->getOutputs().front(); - processors->emplace_back(std::move(source)); + + addProcessor(std::move(source)); } void Pipe::addExtremesSource(ProcessorPtr source) @@ -394,11 +403,20 @@ void Pipe::addExtremesSource(ProcessorPtr source) assertBlocksHaveEqualStructure(header, source_header, "Pipes"); - if (collected_processors) - collected_processors->emplace_back(source); - extremes_port = &source->getOutputs().front(); - processors->emplace_back(std::move(source)); + + addProcessor(std::move(source)); +} + +void Pipe::activatePartialResult(UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) +{ + if (!is_partial_result_active) + { + is_partial_result_active = true; + partial_result_limit = partial_result_limit_; + partial_result_duration_ms = partial_result_duration_ms_; + partial_result_ports.assign(output_ports.size(), nullptr); + } } static void dropPort(OutputPort *& port, Processors & processors, Processors * collected_processors) @@ -426,6 +444,15 @@ void Pipe::dropExtremes() dropPort(extremes_port, *processors, collected_processors); } +void Pipe::dropPartialResult() +{ + for (auto & port : partial_result_ports) + dropPort(port, *processors, collected_processors); + + is_partial_result_active = false; + partial_result_ports.clear(); +} + void Pipe::addTransform(ProcessorPtr transform) { addTransform(std::move(transform), static_cast(nullptr), static_cast(nullptr)); @@ -506,10 +533,7 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort if (extremes_port) assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes"); - if (collected_processors) - collected_processors->emplace_back(transform); - - processors->emplace_back(std::move(transform)); + addProcessor(std::move(transform)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -595,14 +619,41 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * if (extremes_port) assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes"); - if (collected_processors) - collected_processors->emplace_back(transform); + addProcessor(std::move(transform)); - processors->emplace_back(std::move(transform)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } +void Pipe::addPartialResultTransformIfNeeded(ProcessorPtr transform, size_t partial_result_port_id) +{ + if (isPartialResultActive()) + { + auto & partial_result_port = partial_result_ports[partial_result_port_id]; + + if (!transform->supportPartialResultProcessor()) + { + dropPort(partial_result_port, *processors, collected_processors); + return; + } + + if (partial_result_port == nullptr) + { + auto source = std::make_shared(getHeader()); + partial_result_port = &source->getPort(); + + addProcessor(std::move(source)); + } + + auto partial_result_transform = transform->getPartialResultProcessor(std::move(transform), partial_result_limit, partial_result_duration_ms); + + connect(*partial_result_port, partial_result_transform->getInputs().front()); + partial_result_port = &partial_result_transform->getOutputs().front(); + + addProcessor(std::move(partial_result_transform)); + } +} + void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) { if (output_ports.empty()) @@ -610,7 +661,7 @@ void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) Block new_header; - auto add_transform = [&](OutputPort *& port, StreamType stream_type) + auto add_transform = [&](OutputPort *& port, size_t partial_result_port_id, StreamType stream_type) { if (!port) return; @@ -646,19 +697,22 @@ void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) { connect(*port, transform->getInputs().front()); port = &transform->getOutputs().front(); + if (stream_type == StreamType::Main) + addPartialResultTransformIfNeeded(transform, partial_result_port_id); - if (collected_processors) - collected_processors->emplace_back(transform); - - processors->emplace_back(std::move(transform)); + addProcessor(std::move(transform)); } }; + size_t partial_result_port_id = 0; for (auto & port : output_ports) - add_transform(port, StreamType::Main); + { + add_transform(port, partial_result_port_id, StreamType::Main); + ++partial_result_port_id; + } - add_transform(totals_port, StreamType::Totals); - add_transform(extremes_port, StreamType::Extremes); + add_transform(totals_port, 0, StreamType::Totals); + add_transform(extremes_port, 0, StreamType::Extremes); header = std::move(new_header); } @@ -697,18 +751,21 @@ void Pipe::addChains(std::vector chains) auto added_processors = Chain::getProcessors(std::move(chains[i])); for (auto & transform : added_processors) - { - if (collected_processors) - collected_processors->emplace_back(transform); - - processors->emplace_back(std::move(transform)); - } + addProcessor(std::move(transform)); } header = std::move(new_header); max_parallel_streams = std::max(max_parallel_streams, max_parallel_streams_for_chains); } +void Pipe::addProcessor(ProcessorPtr processor) +{ + if (collected_processors) + collected_processors->emplace_back(processor); + + processors->emplace_back(std::move(processor)); +} + void Pipe::resize(size_t num_streams, bool force, bool strict) { if (output_ports.empty()) diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index 09931e38578..10800434678 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -48,6 +48,9 @@ public: OutputPort * getOutputPort(size_t pos) const { return output_ports[pos]; } OutputPort * getTotalsPort() const { return totals_port; } OutputPort * getExtremesPort() const { return extremes_port; } + OutputPort * getPartialResultPort(size_t pos) const { return partial_result_ports.empty() ? nullptr : partial_result_ports[pos]; } + + bool isPartialResultActive() { return is_partial_result_active; } /// Add processor to list, add it output ports to output_ports. /// Processor shouldn't have input ports, output ports shouldn't be connected. @@ -58,9 +61,13 @@ public: void addTotalsSource(ProcessorPtr source); void addExtremesSource(ProcessorPtr source); - /// Drop totals and extremes (create NullSink for them). + /// Activate sending partial result during main pipeline execution + void activatePartialResult(UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); + + /// Drop totals, extremes and partial result (create NullSink for them). void dropTotals(); void dropExtremes(); + void dropPartialResult(); /// Add processor to list. It should have size() input ports with compatible header. /// Output ports should have same headers. @@ -69,6 +76,8 @@ public: void addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes); void addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes); + void addPartialResultTransformIfNeeded(ProcessorPtr transform, size_t partial_result_port_id); + enum class StreamType { Main = 0, /// Stream for query data. There may be several streams of this type. @@ -109,10 +118,17 @@ private: Block header; std::shared_ptr processors; - /// Output ports. Totals and extremes are allowed to be empty. + /// If is true, then on each addition of processor also try + /// to add processor which will send partial result from original processor + bool is_partial_result_active = false; + UInt64 partial_result_limit = 0; + UInt64 partial_result_duration_ms = 0; + + /// Output ports. Totals, extremes and partial results are allowed to be empty. OutputPortRawPtrs output_ports; OutputPort * totals_port = nullptr; OutputPort * extremes_port = nullptr; + OutputPortRawPtrs partial_result_ports; /// It is the max number of processors which can be executed in parallel for each step. /// Usually, it's the same as the number of output ports. @@ -127,6 +143,8 @@ private: bool isCompleted() const { return !empty() && output_ports.empty(); } static Pipe unitePipes(Pipes pipes, Processors * collected_processors, bool allow_empty_header); void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); + + void addProcessor(ProcessorPtr processor); friend class QueryPipelineBuilder; friend class QueryPipeline; diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index f060f2f508f..5acc80dcb18 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -63,7 +63,8 @@ static void checkPulling( Processors & processors, OutputPort * output, OutputPort * totals, - OutputPort * extremes) + OutputPort * extremes, + OutputPort * partial_result) { if (!output || output->isConnected()) throw Exception( @@ -80,9 +81,15 @@ static void checkPulling( ErrorCodes::LOGICAL_ERROR, "Cannot create pulling QueryPipeline because its extremes port is connected"); + if (partial_result && partial_result->isConnected()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pulling QueryPipeline because its partial_result port is connected"); + bool found_output = false; bool found_totals = false; bool found_extremes = false; + bool found_partial_result = false; for (const auto & processor : processors) { for (const auto & in : processor->getInputs()) @@ -96,6 +103,8 @@ static void checkPulling( found_totals = true; else if (extremes && &out == extremes) found_extremes = true; + else if (partial_result && &out == partial_result) + found_partial_result = true; else checkOutput(out, processor); } @@ -113,6 +122,10 @@ static void checkPulling( throw Exception( ErrorCodes::LOGICAL_ERROR, "Cannot create pulling QueryPipeline because its extremes port does not belong to any processor"); + if (partial_result && !found_partial_result) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create pulling QueryPipeline because its partial_result port does not belong to any processor"); } static void checkCompleted(Processors & processors) @@ -315,14 +328,16 @@ QueryPipeline::QueryPipeline( std::shared_ptr processors_, OutputPort * output_, OutputPort * totals_, - OutputPort * extremes_) + OutputPort * extremes_, + OutputPort * partial_result_) : resources(std::move(resources_)) , processors(std::move(processors_)) , output(output_) , totals(totals_) , extremes(extremes_) + , partial_result(partial_result_) { - checkPulling(*processors, output, totals, extremes); + checkPulling(*processors, output, totals, extremes, partial_result); } QueryPipeline::QueryPipeline(Pipe pipe) @@ -333,9 +348,10 @@ QueryPipeline::QueryPipeline(Pipe pipe) output = pipe.getOutputPort(0); totals = pipe.getTotalsPort(); extremes = pipe.getExtremesPort(); + partial_result = pipe.getPartialResultPort(0); processors = std::move(pipe.processors); - checkPulling(*processors, output, totals, extremes); + checkPulling(*processors, output, totals, extremes, partial_result); } else { @@ -367,6 +383,7 @@ QueryPipeline::QueryPipeline(std::shared_ptr format) auto & format_main = format->getPort(IOutputFormat::PortKind::Main); auto & format_totals = format->getPort(IOutputFormat::PortKind::Totals); auto & format_extremes = format->getPort(IOutputFormat::PortKind::Extremes); + auto & format_partial_result = format->getPort(IOutputFormat::PortKind::PartialResult); if (!totals) { @@ -382,12 +399,21 @@ QueryPipeline::QueryPipeline(std::shared_ptr format) processors->emplace_back(std::move(source)); } + if (!partial_result) + { + auto source = std::make_shared(format_partial_result.getHeader()); + partial_result = &source->getPort(); + processors->emplace_back(std::move(source)); + } + connect(*totals, format_totals); connect(*extremes, format_extremes); + connect(*partial_result, format_partial_result); input = &format_main; totals = nullptr; extremes = nullptr; + partial_result = nullptr; output_format = format.get(); @@ -415,6 +441,7 @@ void QueryPipeline::complete(std::shared_ptr sink) drop(totals, *processors); drop(extremes, *processors); + drop(partial_result, *processors); connect(*output, sink->getPort()); processors->emplace_back(std::move(sink)); @@ -430,6 +457,7 @@ void QueryPipeline::complete(Chain chain) drop(totals, *processors); drop(extremes, *processors); + drop(partial_result, *processors); processors->reserve(processors->size() + chain.getProcessors().size() + 1); for (auto processor : chain.getProcessors()) @@ -455,6 +483,7 @@ void QueryPipeline::complete(Pipe pipe) pipe.resize(1); pipe.dropExtremes(); pipe.dropTotals(); + pipe.dropPartialResult(); connect(*pipe.getOutputPort(0), *input); input = nullptr; @@ -483,11 +512,13 @@ void QueryPipeline::complete(std::shared_ptr format) addMaterializing(output, *processors); addMaterializing(totals, *processors); addMaterializing(extremes, *processors); + addMaterializing(partial_result, *processors); } auto & format_main = format->getPort(IOutputFormat::PortKind::Main); auto & format_totals = format->getPort(IOutputFormat::PortKind::Totals); auto & format_extremes = format->getPort(IOutputFormat::PortKind::Extremes); + auto & format_partial_result = format->getPort(IOutputFormat::PortKind::PartialResult); if (!totals) { @@ -503,13 +534,22 @@ void QueryPipeline::complete(std::shared_ptr format) processors->emplace_back(std::move(source)); } + if (!partial_result) + { + auto source = std::make_shared(format_partial_result.getHeader()); + partial_result = &source->getPort(); + processors->emplace_back(std::move(source)); + } + connect(*output, format_main); connect(*totals, format_totals); connect(*extremes, format_extremes); + connect(*partial_result, format_partial_result); output = nullptr; totals = nullptr; extremes = nullptr; + partial_result = nullptr; initRowsBeforeLimit(format.get()); output_format = format.get(); @@ -642,6 +682,7 @@ void QueryPipeline::convertStructureTo(const ColumnsWithTypeAndName & columns) addExpression(output, actions, *processors); addExpression(totals, actions, *processors); addExpression(extremes, actions, *processors); + addExpression(partial_result, actions, *processors); } std::unique_ptr QueryPipeline::getReadProgressCallback() const diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index 55c78ca78ed..177cf0af822 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -75,7 +75,8 @@ public: std::shared_ptr processors_, OutputPort * output_, OutputPort * totals_ = nullptr, - OutputPort * extremes_ = nullptr); + OutputPort * extremes_ = nullptr, + OutputPort * partial_result_ = nullptr); bool initialized() const { return !processors->empty(); } /// When initialized, exactly one of the following is true. @@ -147,6 +148,7 @@ private: OutputPort * output = nullptr; OutputPort * totals = nullptr; OutputPort * extremes = nullptr; + OutputPort * partial_result = nullptr; QueryStatusPtr process_list_element; diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index 3a5d65d4388..ed9aa0c3035 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -79,6 +79,12 @@ public: /// Pipeline will be completed after this transformation. void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); + /// Activate building separate pipeline for sending partial result. + void activatePartialResult(UInt64 partial_result_limit, UInt64 partial_result_duration_ms) { pipe.activatePartialResult(partial_result_limit, partial_result_duration_ms); } + + /// Check if building of a pipeline for sending partial result active. + bool isPartialResultActive() { return pipe.isPartialResultActive(); } + /// Add totals which returns one chunk with single row with defaults. void addDefaultTotals(); From 6a32bbfcd85f428cf21729778f5f2231e60c148c Mon Sep 17 00:00:00 2001 From: alexX512 Date: Mon, 1 May 2023 13:27:08 +0000 Subject: [PATCH 0017/1687] Fix style check --- src/Processors/IProcessor.h | 2 +- src/Processors/Transforms/LimitPartialResultTransform.h | 2 +- src/Processors/Transforms/MergeSortingTransform.h | 2 +- src/Processors/Transforms/PartialResultTransform.cpp | 4 ++-- src/Processors/Transforms/PartialResultTransform.h | 2 +- src/QueryPipeline/Pipe.cpp | 7 +++++++ 6 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index cb819c00ff2..26a1ce3cddc 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -234,7 +234,7 @@ public: { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'expandPipeline' is not implemented for {} processor", getName()); } - + virtual bool isPartialResultProcessor() const { return false; } virtual bool supportPartialResultProcessor() const { return false; } diff --git a/src/Processors/Transforms/LimitPartialResultTransform.h b/src/Processors/Transforms/LimitPartialResultTransform.h index 8a919615088..e5d772db289 100644 --- a/src/Processors/Transforms/LimitPartialResultTransform.h +++ b/src/Processors/Transforms/LimitPartialResultTransform.h @@ -43,7 +43,7 @@ public: chunk.setColumns(std::move(columns), length); } - } + } private: UInt64 limit; diff --git a/src/Processors/Transforms/MergeSortingTransform.h b/src/Processors/Transforms/MergeSortingTransform.h index 0b9766674a5..0c42798c0a5 100644 --- a/src/Processors/Transforms/MergeSortingTransform.h +++ b/src/Processors/Transforms/MergeSortingTransform.h @@ -62,7 +62,7 @@ private: /// Merge all accumulated blocks to keep no more than limit rows. void remerge(); - friend class MergeSortingPartialResult; + friend class MergeSortingPartialResultTransform; std::mutex snapshot_mutex; ProcessorPtr external_merging_sorted; diff --git a/src/Processors/Transforms/PartialResultTransform.cpp b/src/Processors/Transforms/PartialResultTransform.cpp index 4fda35085c3..7cf2473684a 100644 --- a/src/Processors/Transforms/PartialResultTransform.cpp +++ b/src/Processors/Transforms/PartialResultTransform.cpp @@ -19,7 +19,7 @@ IProcessor::Status PartialResultTransform::prepare() input.close(); return Status::Finished; } - + if (finished_getting_snapshots) { output.finish(); @@ -40,7 +40,7 @@ IProcessor::Status PartialResultTransform::prepare() return Status::PortFull; } } - + /// If input data from previous partial result processor is finished then /// PartialResultTransform ready to create snapshots and send them as a partial result if (input.isFinished()) diff --git a/src/Processors/Transforms/PartialResultTransform.h b/src/Processors/Transforms/PartialResultTransform.h index 0e3c503968f..3f9089f8cce 100644 --- a/src/Processors/Transforms/PartialResultTransform.h +++ b/src/Processors/Transforms/PartialResultTransform.h @@ -38,7 +38,7 @@ protected: UInt64 partial_result_duration_ms; ShaphotResult partial_result = {{}, SnaphotStatus::NotReady}; - + bool finished_getting_snapshots = false; virtual void transformPartialResult(Chunk & /*chunk*/) {} diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index b84cb279acd..b1a740b3390 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -483,6 +483,9 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort if (extremes) extremes_port = extremes; + /// TODO: Add support for partial result in multithreading mode + dropPartialResult(); + size_t next_output = 0; for (auto & input : inputs) { @@ -570,6 +573,9 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes_port = nullptr; } + /// TODO: Add support for partial result in multithreading mode + dropPartialResult(); + bool found_totals = false; bool found_extremes = false; @@ -733,6 +739,7 @@ void Pipe::addChains(std::vector chains) dropTotals(); dropExtremes(); + dropPartialResult(); size_t max_parallel_streams_for_chains = 0; From c0c8fa7b8e419fd8696d23da2783c4e350664cba Mon Sep 17 00:00:00 2001 From: alexX512 Date: Mon, 1 May 2023 13:42:26 +0000 Subject: [PATCH 0018/1687] style fix --- src/QueryPipeline/Pipe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index 10800434678..7200cf4944a 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -143,7 +143,7 @@ private: bool isCompleted() const { return !empty() && output_ports.empty(); } static Pipe unitePipes(Pipes pipes, Processors * collected_processors, bool allow_empty_header); void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); - + void addProcessor(ProcessorPtr processor); friend class QueryPipelineBuilder; From 2cd1b37f9b5b6576478a44846f5fa4b78de8f381 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Tue, 2 May 2023 07:26:36 +0000 Subject: [PATCH 0019/1687] Add support for connection of partial result processors with multiple ports --- .../PullingAsyncPipelineExecutor.cpp | 5 +- .../Executors/PullingAsyncPipelineExecutor.h | 2 +- src/Processors/Formats/IOutputFormat.cpp | 2 +- src/Processors/Formats/IOutputFormat.h | 4 + .../Transforms/LimitPartialResultTransform.h | 1 + src/QueryPipeline/Pipe.cpp | 82 +++++++++++++++---- src/QueryPipeline/Pipe.h | 4 +- src/Server/TCPHandler.cpp | 3 +- 8 files changed, 82 insertions(+), 21 deletions(-) diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 73a3142d459..0b28ed67cd1 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -41,12 +41,15 @@ struct PullingAsyncPipelineExecutor::Data } }; -PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) +PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipeline_, bool /*has_partial_result_setting*/) : pipeline(pipeline_) { if (!pipeline.pulling()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for PullingAsyncPipelineExecutor must be pulling"); lazy_format = std::make_shared(pipeline.output->getHeader()); + // if (has_partial_result_setting) + // lazy_format->activatePartialResultProtocol(); + pipeline.complete(lazy_format); } diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.h b/src/Processors/Executors/PullingAsyncPipelineExecutor.h index 361bcc0155c..202ecbf281b 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.h +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.h @@ -21,7 +21,7 @@ struct ProfileInfo; class PullingAsyncPipelineExecutor { public: - explicit PullingAsyncPipelineExecutor(QueryPipeline & pipeline_); + explicit PullingAsyncPipelineExecutor(QueryPipeline & pipeline_, bool has_partial_result_setting = false); ~PullingAsyncPipelineExecutor(); /// Get structure of returned block or chunk. diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index d3e0ed2fcc6..6496f5357e7 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -129,7 +129,7 @@ void IOutputFormat::work() case Main: result_rows += current_chunk.getNumRows(); result_bytes += current_chunk.allocatedBytes(); - if (!was_main_input && current_chunk.hasRows()) + if (is_partial_result_protocol_active && !was_main_input && current_chunk.hasRows()) { consume(Chunk(current_chunk.cloneEmptyColumns(), 0)); was_main_input = true; diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 2c598e2620b..6e5e21055a3 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -105,6 +105,8 @@ public: void clearLastLines(size_t lines_number); + void activatePartialResultProtocol() { is_partial_result_protocol_active = true; } + protected: friend class ParallelFormattingOutputFormat; @@ -192,6 +194,8 @@ private: size_t rows_read_before = 0; bool are_totals_written = false; + bool is_partial_result_protocol_active = false; + /// Counters for consumed chunks. Are used for QueryLog. size_t result_rows = 0; size_t result_bytes = 0; diff --git a/src/Processors/Transforms/LimitPartialResultTransform.h b/src/Processors/Transforms/LimitPartialResultTransform.h index e5d772db289..8ed60d20da3 100644 --- a/src/Processors/Transforms/LimitPartialResultTransform.h +++ b/src/Processors/Transforms/LimitPartialResultTransform.h @@ -5,6 +5,7 @@ namespace DB { +/// Currently support only single thread implementation with one input and one output ports class LimitPartialResultTransform : public PartialResultTransform { public: diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index b1a740b3390..ff86d8df776 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -483,8 +483,7 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort if (extremes) extremes_port = extremes; - /// TODO: Add support for partial result in multithreading mode - dropPartialResult(); + addPartialResultTransform(transform); size_t next_output = 0; for (auto & input : inputs) @@ -573,8 +572,7 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes_port = nullptr; } - /// TODO: Add support for partial result in multithreading mode - dropPartialResult(); + addPartialResultTransform(transform); bool found_totals = false; bool found_extremes = false; @@ -627,11 +625,10 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * addProcessor(std::move(transform)); - max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } -void Pipe::addPartialResultTransformIfNeeded(ProcessorPtr transform, size_t partial_result_port_id) +void Pipe::addPartialResultSimpleTransform(ProcessorPtr transform, size_t partial_result_port_id) { if (isPartialResultActive()) { @@ -643,23 +640,75 @@ void Pipe::addPartialResultTransformIfNeeded(ProcessorPtr transform, size_t part return; } - if (partial_result_port == nullptr) - { - auto source = std::make_shared(getHeader()); - partial_result_port = &source->getPort(); - - addProcessor(std::move(source)); - } - auto partial_result_transform = transform->getPartialResultProcessor(std::move(transform), partial_result_limit, partial_result_duration_ms); - connect(*partial_result_port, partial_result_transform->getInputs().front()); + connectPartialResultPort(partial_result_port, partial_result_transform->getInputs().front()); + partial_result_port = &partial_result_transform->getOutputs().front(); addProcessor(std::move(partial_result_transform)); } } +void Pipe::addPartialResultTransform(ProcessorPtr transform) +{ + if (isPartialResultActive()) + { + size_t new_outputs_size = transform->getOutputs().size(); + + if (!transform->supportPartialResultProcessor()) + { + for (auto & partial_result_port : partial_result_ports) + dropPort(partial_result_port, *processors, collected_processors); + + partial_result_ports.assign(new_outputs_size, nullptr); + return; + } + + auto partial_result_transform = transform->getPartialResultProcessor(std::move(transform), partial_result_limit, partial_result_duration_ms); + auto & inputs = partial_result_transform->getInputs(); + + if (inputs.size() != partial_result_ports.size()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot add transform {} to Pipe because it has {} input ports, but {} expected", + partial_result_transform->getName(), + inputs.size(), + partial_result_ports.size()); + + size_t next_port = 0; + for (auto & input : inputs) + { + connectPartialResultPort(partial_result_ports[next_port], input); + ++next_port; + } + + partial_result_ports.assign(new_outputs_size, nullptr); + + next_port = 0; + for (auto & new_partial_result_port : partial_result_transform->getOutputs()) + { + partial_result_ports[next_port] = &new_partial_result_port; + ++next_port; + } + + addProcessor(std::move(partial_result_transform)); + } +} + +void Pipe::connectPartialResultPort(OutputPort * partial_result_port, InputPort & partial_result_transform_port) +{ + if (partial_result_port == nullptr) + { + auto source = std::make_shared(getHeader()); + partial_result_port = &source->getPort(); + + addProcessor(std::move(source)); + } + + connect(*partial_result_port, partial_result_transform_port); +} + void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) { if (output_ports.empty()) @@ -704,7 +753,7 @@ void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) connect(*port, transform->getInputs().front()); port = &transform->getOutputs().front(); if (stream_type == StreamType::Main) - addPartialResultTransformIfNeeded(transform, partial_result_port_id); + addPartialResultSimpleTransform(transform, partial_result_port_id); addProcessor(std::move(transform)); } @@ -832,6 +881,7 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) add_transform(totals_port, StreamType::Totals); add_transform(extremes_port, StreamType::Extremes); + dropPartialResult(); output_ports.clear(); header.clear(); diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index 7200cf4944a..650e4d664a5 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -76,7 +76,9 @@ public: void addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes); void addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes); - void addPartialResultTransformIfNeeded(ProcessorPtr transform, size_t partial_result_port_id); + void addPartialResultTransform(ProcessorPtr transform); + void addPartialResultSimpleTransform(ProcessorPtr transform, size_t partial_result_port_id); + void connectPartialResultPort(OutputPort * partial_result_port, InputPort & partial_result_transform_port); enum class StreamType { diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a608219ed63..67ed1d071e4 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -815,7 +815,8 @@ void TCPHandler::processOrdinaryQueryWithProcessors() std::unique_lock progress_lock(task_callback_mutex, std::defer_lock); { - PullingAsyncPipelineExecutor executor(pipeline); + bool has_partial_result_setting = query_context->getSettingsRef().partial_result_update_duration_ms.totalMilliseconds() > 0; + PullingAsyncPipelineExecutor executor(pipeline, has_partial_result_setting); CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread}; Block block; From 40c0b461bf7a421e71076f042414e3eff60fa722 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Tue, 2 May 2023 07:37:01 +0000 Subject: [PATCH 0020/1687] Style fix --- src/QueryPipeline/Pipe.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index ff86d8df776..21eb5c92d4f 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -660,11 +660,11 @@ void Pipe::addPartialResultTransform(ProcessorPtr transform) { for (auto & partial_result_port : partial_result_ports) dropPort(partial_result_port, *processors, collected_processors); - + partial_result_ports.assign(new_outputs_size, nullptr); return; } - + auto partial_result_transform = transform->getPartialResultProcessor(std::move(transform), partial_result_limit, partial_result_duration_ms); auto & inputs = partial_result_transform->getInputs(); From 9fa2c8f652ff19c9b82fa0b359011adfc2f6f990 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Tue, 2 May 2023 18:16:15 +0000 Subject: [PATCH 0021/1687] Update test with a new pipeline format --- tests/queries/0_stateless/02210_processors_profile_log.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02210_processors_profile_log.reference b/tests/queries/0_stateless/02210_processors_profile_log.reference index 181022d2421..4592d13c9a5 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.reference +++ b/tests/queries/0_stateless/02210_processors_profile_log.reference @@ -38,4 +38,5 @@ LazyOutputFormat 1 1 1 0 0 LimitsCheckingTransform 1 1 1 1 1 NullSource 1 0 0 0 0 NullSource 1 0 0 0 0 +NullSource 0 0 0 0 0 SourceFromSingleChunk 1 0 0 1 1 From ecd93bee6b1e10e6783a430809563ece313dcd87 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Wed, 3 May 2023 06:44:32 +0000 Subject: [PATCH 0022/1687] Change copy of shared pointer to const reference of shared pointer --- src/Processors/IProcessor.h | 2 +- src/Processors/LimitTransform.cpp | 2 +- src/Processors/LimitTransform.h | 2 +- src/Processors/Transforms/ExpressionTransform.cpp | 2 +- src/Processors/Transforms/ExpressionTransform.h | 2 +- src/Processors/Transforms/LimitsCheckingTransform.cpp | 2 +- src/Processors/Transforms/LimitsCheckingTransform.h | 2 +- src/Processors/Transforms/MergeSortingTransform.cpp | 4 ++-- src/Processors/Transforms/MergeSortingTransform.h | 2 +- src/QueryPipeline/Pipe.cpp | 8 ++++---- src/QueryPipeline/Pipe.h | 4 ++-- 11 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 26a1ce3cddc..dd08075fd01 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -238,7 +238,7 @@ public: virtual bool isPartialResultProcessor() const { return false; } virtual bool supportPartialResultProcessor() const { return false; } - virtual ProcessorPtr getPartialResultProcessor(ProcessorPtr /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) + virtual ProcessorPtr getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getPartialResultProcessor' is not implemented for {} processor", getName()); } diff --git a/src/Processors/LimitTransform.cpp b/src/Processors/LimitTransform.cpp index 0d43b1e4fac..b2bf3c28eee 100644 --- a/src/Processors/LimitTransform.cpp +++ b/src/Processors/LimitTransform.cpp @@ -366,7 +366,7 @@ bool LimitTransform::sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort return true; } -ProcessorPtr LimitTransform::getPartialResultProcessor(ProcessorPtr /*current_processor*/, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +ProcessorPtr LimitTransform::getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) { const auto & header = inputs.front().getHeader(); return std::make_shared(header, partial_result_limit, partial_result_duration_ms, limit, offset); diff --git a/src/Processors/LimitTransform.h b/src/Processors/LimitTransform.h index 7ad358c5d41..b0ec7600406 100644 --- a/src/Processors/LimitTransform.h +++ b/src/Processors/LimitTransform.h @@ -56,7 +56,7 @@ private: bool sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, UInt64 current_chunk_row_num) const; bool supportPartialResultProcessor() const override { return true; } - ProcessorPtr getPartialResultProcessor(ProcessorPtr current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; public: LimitTransform( diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 5ec76dc95b8..da03e882931 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -25,7 +25,7 @@ void ExpressionTransform::transform(Chunk & chunk) chunk.setColumns(block.getColumns(), num_rows); } -ProcessorPtr ExpressionTransform::getPartialResultProcessor(ProcessorPtr /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) +ProcessorPtr ExpressionTransform::getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) { const auto & header = getInputPort().getHeader(); return std::make_shared(header, expression); diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h index 6f9b585ae1f..bf8425b2300 100644 --- a/src/Processors/Transforms/ExpressionTransform.h +++ b/src/Processors/Transforms/ExpressionTransform.h @@ -30,7 +30,7 @@ protected: void transform(Chunk & chunk) override; bool supportPartialResultProcessor() const override { return true; } - ProcessorPtr getPartialResultProcessor(ProcessorPtr current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; private: ExpressionActionsPtr expression; diff --git a/src/Processors/Transforms/LimitsCheckingTransform.cpp b/src/Processors/Transforms/LimitsCheckingTransform.cpp index b426d050543..487592f9809 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.cpp +++ b/src/Processors/Transforms/LimitsCheckingTransform.cpp @@ -75,7 +75,7 @@ void LimitsCheckingTransform::checkQuota(Chunk & chunk) } } -ProcessorPtr LimitsCheckingTransform::getPartialResultProcessor(ProcessorPtr /*current_processor*/, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +ProcessorPtr LimitsCheckingTransform::getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) { const auto & header = inputs.front().getHeader(); return std::make_shared(header, partial_result_limit, partial_result_duration_ms); diff --git a/src/Processors/Transforms/LimitsCheckingTransform.h b/src/Processors/Transforms/LimitsCheckingTransform.h index 663eb774edf..6e8d5547cc5 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.h +++ b/src/Processors/Transforms/LimitsCheckingTransform.h @@ -37,7 +37,7 @@ protected: void transform(Chunk & chunk) override; bool supportPartialResultProcessor() const override { return true; } - ProcessorPtr getPartialResultProcessor(ProcessorPtr current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; private: StreamLocalLimits limits; diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index c4c6743c775..23096f785ad 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -333,7 +333,7 @@ void MergeSortingTransform::remerge() sum_bytes_in_blocks = new_sum_bytes_in_blocks; } -ProcessorPtr MergeSortingTransform::getPartialResultProcessor(ProcessorPtr current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +ProcessorPtr MergeSortingTransform::getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) { if (getName() != current_processor->getName() || current_processor.get() != this) throw Exception( @@ -347,7 +347,7 @@ ProcessorPtr MergeSortingTransform::getPartialResultProcessor(ProcessorPtr curre static_cast(current_processor.get())); const auto & header = inputs.front().getHeader(); - const auto & merge_sorting_processor = std::static_pointer_cast(current_processor); + auto merge_sorting_processor = std::dynamic_pointer_cast(current_processor); return std::make_shared(header, std::move(merge_sorting_processor), partial_result_limit, partial_result_duration_ms); } diff --git a/src/Processors/Transforms/MergeSortingTransform.h b/src/Processors/Transforms/MergeSortingTransform.h index 0c42798c0a5..17f79efe5d6 100644 --- a/src/Processors/Transforms/MergeSortingTransform.h +++ b/src/Processors/Transforms/MergeSortingTransform.h @@ -42,7 +42,7 @@ protected: Processors expandPipeline() override; bool supportPartialResultProcessor() const override { return true; } - ProcessorPtr getPartialResultProcessor(ProcessorPtr current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; private: size_t max_bytes_before_remerge; diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 21eb5c92d4f..6de45d67133 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -628,7 +628,7 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } -void Pipe::addPartialResultSimpleTransform(ProcessorPtr transform, size_t partial_result_port_id) +void Pipe::addPartialResultSimpleTransform(const ProcessorPtr & transform, size_t partial_result_port_id) { if (isPartialResultActive()) { @@ -640,7 +640,7 @@ void Pipe::addPartialResultSimpleTransform(ProcessorPtr transform, size_t partia return; } - auto partial_result_transform = transform->getPartialResultProcessor(std::move(transform), partial_result_limit, partial_result_duration_ms); + auto partial_result_transform = transform->getPartialResultProcessor(transform, partial_result_limit, partial_result_duration_ms); connectPartialResultPort(partial_result_port, partial_result_transform->getInputs().front()); @@ -650,7 +650,7 @@ void Pipe::addPartialResultSimpleTransform(ProcessorPtr transform, size_t partia } } -void Pipe::addPartialResultTransform(ProcessorPtr transform) +void Pipe::addPartialResultTransform(const ProcessorPtr & transform) { if (isPartialResultActive()) { @@ -665,7 +665,7 @@ void Pipe::addPartialResultTransform(ProcessorPtr transform) return; } - auto partial_result_transform = transform->getPartialResultProcessor(std::move(transform), partial_result_limit, partial_result_duration_ms); + auto partial_result_transform = transform->getPartialResultProcessor(transform, partial_result_limit, partial_result_duration_ms); auto & inputs = partial_result_transform->getInputs(); if (inputs.size() != partial_result_ports.size()) diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index 650e4d664a5..8b26435fd30 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -76,8 +76,8 @@ public: void addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes); void addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes); - void addPartialResultTransform(ProcessorPtr transform); - void addPartialResultSimpleTransform(ProcessorPtr transform, size_t partial_result_port_id); + void addPartialResultTransform(const ProcessorPtr & transform); + void addPartialResultSimpleTransform(const ProcessorPtr & transform, size_t partial_result_port_id); void connectPartialResultPort(OutputPort * partial_result_port, InputPort & partial_result_transform_port); enum class StreamType From a695d6227d8e2331178c7383b70b9b40e9d5dd7a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 06:16:30 +0200 Subject: [PATCH 0023/1687] Make concurrency control controllable --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 +-- src/Client/ClientBase.cpp | 5 ++-- src/Client/Connection.cpp | 2 +- src/Core/ExternalTable.cpp | 2 -- src/Core/Settings.h | 1 + src/Dictionaries/DirectDictionary.cpp | 4 ++- .../ExecuteScalarSubqueriesVisitor.cpp | 9 ++++--- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 1 + src/Interpreters/InterpreterSelectQuery.cpp | 5 ---- src/Interpreters/MutationsInterpreter.cpp | 4 +-- .../Executors/CompletedPipelineExecutor.cpp | 15 +++++++---- src/Processors/Executors/PipelineExecutor.cpp | 25 +++++++++++-------- src/Processors/Executors/PipelineExecutor.h | 8 +++--- .../PullingAsyncPipelineExecutor.cpp | 7 +++--- .../PushingAsyncPipelineExecutor.cpp | 7 +++--- src/Processors/QueryPlan/QueryPlan.cpp | 1 - src/Processors/QueryPlan/QueryPlan.h | 4 +++ .../Transforms/buildPushingToViewsChain.cpp | 1 + .../gtest_exception_on_incorrect_pipeline.cpp | 4 +-- src/QueryPipeline/Chain.h | 4 +++ src/QueryPipeline/QueryPipeline.h | 4 +++ src/QueryPipeline/QueryPipelineBuilder.cpp | 8 ++++++ src/QueryPipeline/QueryPipelineBuilder.h | 12 +++++++++ src/Server/GRPCServer.cpp | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 1 + src/Storages/Kafka/StorageKafka.cpp | 1 + src/Storages/LiveView/StorageLiveView.cpp | 8 +++--- src/Storages/WindowView/StorageWindowView.cpp | 4 +-- 29 files changed, 100 insertions(+), 55 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 7ab0261850b..e5684113014 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1880,7 +1880,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden Block block; - while (block.rows() == 0 && executor.pull(block)) + while (block.rows() == 0 && executor.pull(block, 0)) { } @@ -1911,7 +1911,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); Block tmp_block; - while (tmp_block.rows() == 0 && executor.pull(tmp_block)) + while (tmp_block.rows() == 0 && executor.pull(tmp_block, 0)) { } diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7ae75ba250d..571781ab817 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1377,8 +1377,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des sendDataFromPipe( std::move(pipe), parsed_query, - have_data_in_stdin - ); + have_data_in_stdin); } catch (Exception & e) { @@ -1457,7 +1456,7 @@ try } Block block; - while (executor.pull(block)) + while (executor.pull(block, 0)) { if (!cancelled && QueryInterruptHandler::cancelled()) { diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index d39148d3016..019d2a8c695 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -866,7 +866,7 @@ void Connection::sendExternalTablesData(ExternalTablesData & data) return sink; }); executor = pipeline.execute(); - executor->execute(/*num_threads = */ 1); + executor->execute(/*num_threads = */ 1, false); auto read_rows = sink->getNumReadRows(); rows += read_rows; diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 0f880ed967f..2a306851df6 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -18,7 +17,6 @@ #include #include -#include #include diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 42ae645615a..97eec7ac6dd 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -46,6 +46,7 @@ class IColumn; M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \ M(MaxThreads, max_final_threads, 0, "The maximum number of threads to read from table with FINAL.", 0) \ M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \ + M(Bool, max_threads_use_concurrency_control, true, "Respect the server's concurrency control (see the `concurrent_threads_soft_limit_num` and `concurrent_threads_soft_limit_ratio_to_cores` global server settings). If disabled, it allows using a larger number of threads even if the server is overloaded (not recommended for normal usage, and needed mostly for tests).", 0) \ M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \ M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \ M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \ diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index d84967fbae6..7b80cbd29b5 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -288,7 +288,8 @@ public: : ISource(pipeline_.getHeader()) , pipeline(std::move(pipeline_)) , executor(pipeline) - {} + { + } std::string getName() const override { @@ -310,6 +311,7 @@ public: private: QueryPipeline pipeline; TExecutor executor; + bool concurrency_control; }; template diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 3d5f14f4723..63a5720d6ab 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -189,7 +189,9 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr PullingAsyncPipelineExecutor executor(io.pipeline); io.pipeline.setProgressCallback(data.getContext()->getProgressCallback()); - while (block.rows() == 0 && executor.pull(block)); + while (block.rows() == 0 && executor.pull(block, 0)) + { + } if (block.rows() == 0) { @@ -220,8 +222,9 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); Block tmp_block; - while (tmp_block.rows() == 0 && executor.pull(tmp_block)) - ; + while (tmp_block.rows() == 0 && executor.pull(tmp_block, 0)) + { + } if (tmp_block.rows() != 0) throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 96a86df7ffd..b6b288d4a2e 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -478,7 +478,7 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_ set->setHeader(executor.getHeader().getColumnsWithTypeAndName()); Block block; - while (executor.pull(block)) + while (executor.pull(block, 0)) { if (block.rows() == 0) continue; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index e78a61831a1..7676db6f5ea 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -554,6 +554,7 @@ BlockIO InterpreterInsertQuery::execute() { res.pipeline = QueryPipeline(std::move(out_chains.at(0))); res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); + res.pipeline.setConcurrencyControl(settings.max_threads_use_concurrency_control); if (query.hasInlinedData() && !async_insert) { diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index afd34cb044e..895db55f5af 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -67,7 +67,6 @@ #include #include #include -#include #include #include @@ -83,12 +82,9 @@ #include #include #include -#include #include #include #include -#include -#include #include #include #include @@ -96,7 +92,6 @@ #include #include -#include "config_version.h" namespace DB { diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 2f05ba5d7f8..4a6c3ac885b 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -268,7 +268,7 @@ bool isStorageTouchedByMutations( PullingAsyncPipelineExecutor executor(io.pipeline); Block block; - while (block.rows() == 0 && executor.pull(block)); + while (block.rows() == 0 && executor.pull(block, 0)); if (!block.rows()) return false; @@ -276,7 +276,7 @@ bool isStorageTouchedByMutations( throw Exception(ErrorCodes::LOGICAL_ERROR, "count() expression returned {} rows, not 1", block.rows()); Block tmp_block; - while (executor.pull(tmp_block)); + while (executor.pull(tmp_block, 0)); auto count = (*block.getByName("count()").column)[0].get(); return count != 0; diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp index 2964d9b6aa2..dd7b96c01e5 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.cpp +++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp @@ -32,7 +32,8 @@ struct CompletedPipelineExecutor::Data } }; -static void threadFunction(CompletedPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads) +static void threadFunction( + CompletedPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads, bool concurrency_control) { SCOPE_EXIT_SAFE( if (thread_group) @@ -45,7 +46,7 @@ static void threadFunction(CompletedPipelineExecutor::Data & data, ThreadGroupPt if (thread_group) CurrentThread::attachToGroup(thread_group); - data.executor->execute(num_threads); + data.executor->execute(num_threads, concurrency_control); } catch (...) { @@ -79,9 +80,13 @@ void CompletedPipelineExecutor::execute() /// Avoid passing this to lambda, copy ptr to data instead. /// Destructor of unique_ptr copy raw ptr into local variable first, only then calls object destructor. - auto func = [data_ptr = data.get(), num_threads = pipeline.getNumThreads(), thread_group = CurrentThread::getGroup()] + auto func = [ + data_ptr = data.get(), + num_threads = pipeline.getNumThreads(), + thread_group = CurrentThread::getGroup(), + concurrency_control = pipeline.getConcurrencyControl()] { - threadFunction(*data_ptr, thread_group, num_threads); + threadFunction(*data_ptr, thread_group, num_threads, concurrency_control); }; data->thread = ThreadFromGlobalPool(std::move(func)); @@ -102,7 +107,7 @@ void CompletedPipelineExecutor::execute() { PipelineExecutor executor(pipeline.processors, pipeline.process_list_element); executor.setReadProgressCallback(pipeline.getReadProgressCallback()); - executor.execute(pipeline.getNumThreads()); + executor.execute(pipeline.getNumThreads(), pipeline.getConcurrencyControl()); } } diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index abb98412b12..3d22e51e288 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -99,7 +99,7 @@ void PipelineExecutor::finish() tasks.finish(); } -void PipelineExecutor::execute(size_t num_threads) +void PipelineExecutor::execute(size_t num_threads, bool concurrency_control) { checkTimeLimit(); if (num_threads < 1) @@ -110,7 +110,7 @@ void PipelineExecutor::execute(size_t num_threads) try { - executeImpl(num_threads); + executeImpl(num_threads, concurrency_control); /// Execution can be stopped because of exception. Check and rethrow if any. for (auto & node : graph->nodes) @@ -137,7 +137,7 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) { if (!is_execution_initialized) { - initializeExecution(1); + initializeExecution(1, false); // Acquire slot until we are done single_thread_slot = slots->tryAcquire(); @@ -297,14 +297,19 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie #endif } -void PipelineExecutor::initializeExecution(size_t num_threads) +void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_control) { is_execution_initialized = true; - /// Allocate CPU slots from concurrency control - constexpr size_t min_threads = 1; - slots = ConcurrencyControl::instance().allocate(min_threads, num_threads); - size_t use_threads = slots->grantedCount(); + size_t use_threads = num_threads; + + if (concurrency_control) + { + /// Allocate CPU slots from concurrency control + constexpr size_t min_threads = 1; + slots = ConcurrencyControl::instance().allocate(min_threads, num_threads); + use_threads = slots->grantedCount(); + } Queue queue; graph->initializeExecution(queue); @@ -352,9 +357,9 @@ void PipelineExecutor::spawnThreads() } } -void PipelineExecutor::executeImpl(size_t num_threads) +void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) { - initializeExecution(num_threads); + initializeExecution(num_threads, concurrency_control); bool finished_flag = false; diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 1e7d52d8290..dee12dad282 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -38,7 +38,7 @@ public: /// Execute pipeline in multiple threads. Must be called once. /// In case of exception during execution throws any occurred. - void execute(size_t num_threads); + void execute(size_t num_threads, bool concurrency_control); /// Execute single step. Step will be stopped when yield_flag is true. /// Execution is happened in a single thread. @@ -67,7 +67,7 @@ private: ExecutorTasks tasks; - // Concurrency control related + /// Concurrency control related ConcurrencyControl::AllocationPtr slots; ConcurrencyControl::SlotPtr single_thread_slot; // slot for single-thread mode to work using executeStep() std::unique_ptr pool; @@ -92,12 +92,12 @@ private: using Queue = std::queue; - void initializeExecution(size_t num_threads); /// Initialize executor contexts and task_queue. + void initializeExecution(size_t num_threads, bool concurrency_control); /// Initialize executor contexts and task_queue. void finalizeExecution(); /// Check all processors are finished. void spawnThreads(); /// Methods connected to execution. - void executeImpl(size_t num_threads); + void executeImpl(size_t num_threads, bool concurrency_control); void executeStepImpl(size_t thread_num, std::atomic_bool * yield_flag = nullptr); void executeSingleThread(size_t thread_num); void finish(); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index b2608f665b7..345bec395b2 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -67,7 +67,8 @@ const Block & PullingAsyncPipelineExecutor::getHeader() const return lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader(); } -static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads) +static void threadFunction( + PullingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads, bool concurrency_control) { SCOPE_EXIT_SAFE( if (thread_group) @@ -80,7 +81,7 @@ static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGrou if (thread_group) CurrentThread::attachToGroup(thread_group); - data.executor->execute(num_threads); + data.executor->execute(num_threads, concurrency_control); } catch (...) { @@ -108,7 +109,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) auto func = [&, thread_group = CurrentThread::getGroup()]() { - threadFunction(*data, thread_group, pipeline.getNumThreads()); + threadFunction(*data, thread_group, pipeline.getNumThreads(), pipeline.getConcurrencyControl()); }; data->thread = ThreadFromGlobalPool(std::move(func)); diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index 59d33cbffed..a816ab9ca7f 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -98,7 +98,8 @@ struct PushingAsyncPipelineExecutor::Data } }; -static void threadFunction(PushingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads) +static void threadFunction( + PushingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads, bool concurrency_control) { SCOPE_EXIT_SAFE( if (thread_group) @@ -111,7 +112,7 @@ static void threadFunction(PushingAsyncPipelineExecutor::Data & data, ThreadGrou if (thread_group) CurrentThread::attachToGroup(thread_group); - data.executor->execute(num_threads); + data.executor->execute(num_threads, concurrency_control); } catch (...) { @@ -172,7 +173,7 @@ void PushingAsyncPipelineExecutor::start() auto func = [&, thread_group = CurrentThread::getGroup()]() { - threadFunction(*data, thread_group, pipeline.getNumThreads()); + threadFunction(*data, thread_group, pipeline.getNumThreads(), pipeline.getConcurrencyControl()); }; data->thread = ThreadFromGlobalPool(std::move(func)); diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 3fbe3d89845..6e449013da3 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -168,7 +168,6 @@ QueryPipelineBuilderPtr QueryPlan::buildQueryPipeline( QueryPipelineBuilderPtr last_pipeline; - std::stack stack; stack.push(Frame{.node = root}); diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index 19d87b101de..04170bee26b 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -98,6 +98,9 @@ public: void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; } size_t getMaxThreads() const { return max_threads; } + void setConcurrencyControl(bool concurrency_control_) { concurrency_control = concurrency_control_; } + bool getConcurrencyControl() const { return concurrency_control; } + /// Tree node. Step and it's children. struct Node { @@ -119,6 +122,7 @@ private: /// Those fields are passed to QueryPipeline. size_t max_threads = 0; + bool concurrency_control = false; }; std::string debugExplainStep(const IQueryPlanStep & step); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 31bab46b868..3dcfd203424 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -427,6 +427,7 @@ Chain buildPushingToViewsChain( processors.emplace_back(std::move(finalizing_views)); result_chain = Chain(std::move(processors)); result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams)); + result_chain.setConcurrencyControl(settings.max_threads_use_concurrency_control); } if (auto * live_view = dynamic_cast(storage.get())) diff --git a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp index 40718bd968a..ce5992c2548 100644 --- a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp +++ b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp @@ -29,7 +29,7 @@ TEST(Processors, PortsConnected) QueryStatusPtr element; PipelineExecutor executor(processors, element); - executor.execute(1); + executor.execute(1, false); } TEST(Processors, PortsNotConnected) @@ -55,7 +55,7 @@ TEST(Processors, PortsNotConnected) { QueryStatusPtr element; PipelineExecutor executor(processors, element); - executor.execute(1); + executor.execute(1, false); ASSERT_TRUE(false) << "Should have thrown."; } catch (DB::Exception & e) diff --git a/src/QueryPipeline/Chain.h b/src/QueryPipeline/Chain.h index d6139281990..5323f5dc333 100644 --- a/src/QueryPipeline/Chain.h +++ b/src/QueryPipeline/Chain.h @@ -25,6 +25,9 @@ public: size_t getNumThreads() const { return num_threads; } void setNumThreads(size_t num_threads_) { num_threads = num_threads_; } + bool getConcurrencyControl() const { return concurrency_control; } + void setConcurrencyControl(bool concurrency_control_) { concurrency_control = concurrency_control_; } + void addSource(ProcessorPtr processor); void addSink(ProcessorPtr processor); @@ -57,6 +60,7 @@ private: /// input port output port std::list processors; size_t num_threads = 0; + bool concurrency_control = false; }; } diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index 0476b8e4bbf..f14cf61aac2 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -100,6 +100,9 @@ public: size_t getNumThreads() const { return num_threads; } void setNumThreads(size_t num_threads_) { num_threads = num_threads_; } + bool getConcurrencyControl() const { return concurrency_control; } + void setConcurrencyControl(bool concurrency_control_) { concurrency_control = concurrency_control_; } + void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(const ProgressCallback & callback); void setLimitsAndQuota(const StreamLocalLimits & limits, std::shared_ptr quota_); @@ -157,6 +160,7 @@ private: IOutputFormat * output_format = nullptr; size_t num_threads = 0; + bool concurrency_control = false; friend class PushingPipelineExecutor; friend class PullingPipelineExecutor; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index a4edf107b2f..b9b0b27872c 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -278,6 +278,7 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( /// Note: it may be > than settings.max_threads, so we should apply this limit again. bool will_limit_max_threads = true; size_t max_threads = 0; + bool concurrency_control = false; Pipes pipes; QueryPlanResourceHolder resources; @@ -297,6 +298,9 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( /// It may happen if max_distributed_connections > max_threads if (pipeline.max_threads > max_threads_limit) max_threads_limit = pipeline.max_threads; + + if (pipeline.useConcurrencyControl()) + concurrency_control = true; } QueryPipelineBuilder pipeline; @@ -307,6 +311,9 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( { pipeline.setMaxThreads(max_threads); pipeline.limitMaxThreads(max_threads_limit); + + if (concurrency_control) + pipeline.enableConcurrencyControl(); } pipeline.setCollectedProcessors(nullptr); @@ -639,6 +646,7 @@ QueryPipeline QueryPipelineBuilder::getPipeline(QueryPipelineBuilder builder) QueryPipeline res(std::move(builder.pipe)); res.addResources(std::move(builder.resources)); res.setNumThreads(builder.getNumThreads()); + res.setConcurrencyControl(builder.useConcurrencyControl()); res.setProcessListElement(builder.process_list_element); res.setProgressCallback(builder.progress_callback); return res; diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index 3a5d65d4388..4beb6ca404c 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -172,6 +172,16 @@ public: max_threads = max_threads_; } + void enableConcurrencyControl() + { + concurrency_control = true; + } + + bool useConcurrencyControl() + { + return concurrency_control; + } + void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } @@ -189,6 +199,8 @@ private: /// Sometimes, more streams are created then the number of threads for more optimal execution. size_t max_threads = 0; + bool concurrency_control = false; + QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 7b8eaa21947..5301abac2b2 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -1134,7 +1134,7 @@ namespace }); auto executor = cur_pipeline.execute(); - executor->execute(1); + executor->execute(1, false); } } diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index dae6f6a7ca9..414a2f43504 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -719,6 +719,7 @@ bool StorageFileLog::streamToViews() { block_io.pipeline.complete(std::move(input)); block_io.pipeline.setNumThreads(max_streams_number); + block_io.pipeline.setConcurrencyControl(new_context->getSettingsRef().max_threads_use_concurrency_control); block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); CompletedPipelineExecutor executor(block_io.pipeline); executor.execute(); diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 3381561eb1b..aae6ae2a066 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -808,6 +808,7 @@ bool StorageKafka::streamToViews() // we need to read all consumers in parallel (sequential read may lead to situation // when some of consumers are not used, and will break some Kafka consumer invariants) block_io.pipeline.setNumThreads(stream_count); + block_io.pipeline.setConcurrencyControl(kafka_context->getSettingsRef().max_threads_use_concurrency_control); block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); CompletedPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 173bb128c4a..4398afea27a 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -457,7 +457,7 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) PullingAsyncPipelineExecutor executor(pipeline); Block this_block; - while (executor.pull(this_block)) + while (executor.pull(this_block, 0)) new_mergeable_blocks->push_back(this_block); if (new_mergeable_blocks->empty()) @@ -478,7 +478,7 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) }); auto executor = pipeline.execute(); - executor->execute(pipeline.getNumThreads()); + executor->execute(pipeline.getNumThreads(), local_context->getSettingsRef().max_threads_use_concurrency_control); } void StorageLiveView::refresh() @@ -593,7 +593,7 @@ MergeableBlocksPtr StorageLiveView::collectMergeableBlocks(ContextPtr local_cont PullingAsyncPipelineExecutor executor(pipeline); Block this_block; - while (executor.pull(this_block)) + while (executor.pull(this_block, 0)) base_blocks->push_back(this_block); new_blocks->push_back(base_blocks); @@ -698,7 +698,7 @@ bool StorageLiveView::getNewBlocks(const std::lock_guard & lock) PullingAsyncPipelineExecutor executor(pipeline); Block block; - while (executor.pull(block)) + while (executor.pull(block, 0)) { if (block.rows() == 0) continue; diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 94d5db170a8..87a29864c23 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -630,7 +630,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) Block block; BlocksPtr new_blocks = std::make_shared(); - while (executor.pull(block)) + while (executor.pull(block, 0)) { if (block.rows() == 0) continue; @@ -1571,7 +1571,7 @@ void StorageWindowView::writeIntoWindowView( }); auto executor = builder.execute(); - executor->execute(builder.getNumThreads()); + executor->execute(builder.getNumThreads(), local_context->getSettingsRef().max_threads_use_concurrency_control); } void StorageWindowView::startup() From 6fddb5bad35073235cf5f92d2e7644cf3ad693fd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 06:29:04 +0200 Subject: [PATCH 0024/1687] Simplification --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 ++-- src/Client/ClientBase.cpp | 2 +- src/Core/Settings.h | 2 +- src/Dictionaries/DirectDictionary.cpp | 1 - src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp | 4 ++-- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 ++ src/Interpreters/MutationsInterpreter.cpp | 4 ++-- src/Planner/PlannerJoinTree.cpp | 2 ++ src/Processors/Transforms/buildPushingToViewsChain.cpp | 2 +- src/QueryPipeline/QueryPipelineBuilder.cpp | 9 +++------ src/QueryPipeline/QueryPipelineBuilder.h | 6 +++--- src/Storages/FileLog/StorageFileLog.cpp | 2 +- src/Storages/Kafka/StorageKafka.cpp | 2 +- src/Storages/LiveView/StorageLiveView.cpp | 8 ++++---- src/Storages/WindowView/StorageWindowView.cpp | 4 ++-- 17 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index e5684113014..7ab0261850b 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1880,7 +1880,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden Block block; - while (block.rows() == 0 && executor.pull(block, 0)) + while (block.rows() == 0 && executor.pull(block)) { } @@ -1911,7 +1911,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); Block tmp_block; - while (tmp_block.rows() == 0 && executor.pull(tmp_block, 0)) + while (tmp_block.rows() == 0 && executor.pull(tmp_block)) { } diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 571781ab817..d701d2426f7 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1456,7 +1456,7 @@ try } Block block; - while (executor.pull(block, 0)) + while (executor.pull(block)) { if (!cancelled && QueryInterruptHandler::cancelled()) { diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 97eec7ac6dd..9b03f7ab147 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -46,7 +46,7 @@ class IColumn; M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \ M(MaxThreads, max_final_threads, 0, "The maximum number of threads to read from table with FINAL.", 0) \ M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \ - M(Bool, max_threads_use_concurrency_control, true, "Respect the server's concurrency control (see the `concurrent_threads_soft_limit_num` and `concurrent_threads_soft_limit_ratio_to_cores` global server settings). If disabled, it allows using a larger number of threads even if the server is overloaded (not recommended for normal usage, and needed mostly for tests).", 0) \ + M(Bool, use_concurrency_control, true, "Respect the server's concurrency control (see the `concurrent_threads_soft_limit_num` and `concurrent_threads_soft_limit_ratio_to_cores` global server settings). If disabled, it allows using a larger number of threads even if the server is overloaded (not recommended for normal usage, and needed mostly for tests).", 0) \ M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \ M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \ M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \ diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 7b80cbd29b5..36a0642abce 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -311,7 +311,6 @@ public: private: QueryPipeline pipeline; TExecutor executor; - bool concurrency_control; }; template diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 63a5720d6ab..98a4002836e 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -189,7 +189,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr PullingAsyncPipelineExecutor executor(io.pipeline); io.pipeline.setProgressCallback(data.getContext()->getProgressCallback()); - while (block.rows() == 0 && executor.pull(block, 0)) + while (block.rows() == 0 && executor.pull(block)) { } @@ -222,7 +222,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); Block tmp_block; - while (tmp_block.rows() == 0 && executor.pull(tmp_block, 0)) + while (tmp_block.rows() == 0 && executor.pull(tmp_block)) { } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index b6b288d4a2e..96a86df7ffd 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -478,7 +478,7 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_ set->setHeader(executor.getHeader().getColumnsWithTypeAndName()); Block block; - while (executor.pull(block, 0)) + while (executor.pull(block)) { if (block.rows() == 0) continue; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 7676db6f5ea..93b67514811 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -554,7 +554,7 @@ BlockIO InterpreterInsertQuery::execute() { res.pipeline = QueryPipeline(std::move(out_chains.at(0))); res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); - res.pipeline.setConcurrencyControl(settings.max_threads_use_concurrency_control); + res.pipeline.setConcurrencyControl(settings.use_concurrency_control); if (query.hasInlinedData() && !async_insert) { diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 895db55f5af..5e813aa535a 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2486,6 +2486,8 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc if (!query_plan.getMaxThreads() || is_remote) query_plan.setMaxThreads(max_threads_execute_query); + query_plan.setConcurrencyControl(settings.use_concurrency_control); + /// Aliases in table declaration. if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions) { diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 4a6c3ac885b..2f05ba5d7f8 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -268,7 +268,7 @@ bool isStorageTouchedByMutations( PullingAsyncPipelineExecutor executor(io.pipeline); Block block; - while (block.rows() == 0 && executor.pull(block, 0)); + while (block.rows() == 0 && executor.pull(block)); if (!block.rows()) return false; @@ -276,7 +276,7 @@ bool isStorageTouchedByMutations( throw Exception(ErrorCodes::LOGICAL_ERROR, "count() expression returned {} rows, not 1", block.rows()); Block tmp_block; - while (executor.pull(tmp_block, 0)); + while (executor.pull(tmp_block)); auto count = (*block.getByName("count()").column)[0].get(); return count != 0; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 93a9945b1ca..49808695625 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -746,6 +746,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres */ if (!query_plan.getMaxThreads() || is_remote) query_plan.setMaxThreads(max_threads_execute_query); + + query_plan.setConcurrencyControl(settings.use_concurrency_control); } else { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 3dcfd203424..5f24abce829 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -427,7 +427,7 @@ Chain buildPushingToViewsChain( processors.emplace_back(std::move(finalizing_views)); result_chain = Chain(std::move(processors)); result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams)); - result_chain.setConcurrencyControl(settings.max_threads_use_concurrency_control); + result_chain.setConcurrencyControl(settings.use_concurrency_control); } if (auto * live_view = dynamic_cast(storage.get())) diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index b9b0b27872c..afd086aa5dd 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -299,8 +299,7 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( if (pipeline.max_threads > max_threads_limit) max_threads_limit = pipeline.max_threads; - if (pipeline.useConcurrencyControl()) - concurrency_control = true; + concurrency_control = pipeline.getConcurrencyControl(); } QueryPipelineBuilder pipeline; @@ -311,9 +310,7 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( { pipeline.setMaxThreads(max_threads); pipeline.limitMaxThreads(max_threads_limit); - - if (concurrency_control) - pipeline.enableConcurrencyControl(); + pipeline.setConcurrencyControl(concurrency_control); } pipeline.setCollectedProcessors(nullptr); @@ -646,7 +643,7 @@ QueryPipeline QueryPipelineBuilder::getPipeline(QueryPipelineBuilder builder) QueryPipeline res(std::move(builder.pipe)); res.addResources(std::move(builder.resources)); res.setNumThreads(builder.getNumThreads()); - res.setConcurrencyControl(builder.useConcurrencyControl()); + res.setConcurrencyControl(builder.getConcurrencyControl()); res.setProcessListElement(builder.process_list_element); res.setProgressCallback(builder.progress_callback); return res; diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index 4beb6ca404c..d316b5c04a2 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -172,12 +172,12 @@ public: max_threads = max_threads_; } - void enableConcurrencyControl() + void setConcurrencyControl(bool concurrency_control_) { - concurrency_control = true; + concurrency_control = concurrency_control_; } - bool useConcurrencyControl() + bool getConcurrencyControl() { return concurrency_control; } diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 414a2f43504..5faccefd836 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -719,7 +719,7 @@ bool StorageFileLog::streamToViews() { block_io.pipeline.complete(std::move(input)); block_io.pipeline.setNumThreads(max_streams_number); - block_io.pipeline.setConcurrencyControl(new_context->getSettingsRef().max_threads_use_concurrency_control); + block_io.pipeline.setConcurrencyControl(new_context->getSettingsRef().use_concurrency_control); block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); CompletedPipelineExecutor executor(block_io.pipeline); executor.execute(); diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index aae6ae2a066..035e67515a4 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -808,7 +808,7 @@ bool StorageKafka::streamToViews() // we need to read all consumers in parallel (sequential read may lead to situation // when some of consumers are not used, and will break some Kafka consumer invariants) block_io.pipeline.setNumThreads(stream_count); - block_io.pipeline.setConcurrencyControl(kafka_context->getSettingsRef().max_threads_use_concurrency_control); + block_io.pipeline.setConcurrencyControl(kafka_context->getSettingsRef().use_concurrency_control); block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); CompletedPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 4398afea27a..bd1a5638d4f 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -457,7 +457,7 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) PullingAsyncPipelineExecutor executor(pipeline); Block this_block; - while (executor.pull(this_block, 0)) + while (executor.pull(this_block)) new_mergeable_blocks->push_back(this_block); if (new_mergeable_blocks->empty()) @@ -478,7 +478,7 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) }); auto executor = pipeline.execute(); - executor->execute(pipeline.getNumThreads(), local_context->getSettingsRef().max_threads_use_concurrency_control); + executor->execute(pipeline.getNumThreads(), local_context->getSettingsRef().use_concurrency_control); } void StorageLiveView::refresh() @@ -593,7 +593,7 @@ MergeableBlocksPtr StorageLiveView::collectMergeableBlocks(ContextPtr local_cont PullingAsyncPipelineExecutor executor(pipeline); Block this_block; - while (executor.pull(this_block, 0)) + while (executor.pull(this_block)) base_blocks->push_back(this_block); new_blocks->push_back(base_blocks); @@ -698,7 +698,7 @@ bool StorageLiveView::getNewBlocks(const std::lock_guard & lock) PullingAsyncPipelineExecutor executor(pipeline); Block block; - while (executor.pull(block, 0)) + while (executor.pull(block)) { if (block.rows() == 0) continue; diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 87a29864c23..a2b145827ef 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -630,7 +630,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) Block block; BlocksPtr new_blocks = std::make_shared(); - while (executor.pull(block, 0)) + while (executor.pull(block)) { if (block.rows() == 0) continue; @@ -1571,7 +1571,7 @@ void StorageWindowView::writeIntoWindowView( }); auto executor = builder.execute(); - executor->execute(builder.getNumThreads(), local_context->getSettingsRef().max_threads_use_concurrency_control); + executor->execute(builder.getNumThreads(), local_context->getSettingsRef().use_concurrency_control); } void StorageWindowView::startup() From 47731a5056a5917a18a7e0a5966b7e6d208d0da6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 21:47:35 +0200 Subject: [PATCH 0025/1687] Fix error --- src/Processors/Executors/PipelineExecutor.cpp | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 3d22e51e288..67431ccf97f 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -3,16 +3,13 @@ #include #include #include -#include #include #include #include #include #include #include -#include #include -#include #include #include @@ -137,12 +134,11 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) { if (!is_execution_initialized) { - initializeExecution(1, false); + initializeExecution(1, true); // Acquire slot until we are done single_thread_slot = slots->tryAcquire(); - if (!single_thread_slot) - abort(); // Unable to allocate slot for the first thread, but we just allocated at least one slot + chassert(single_thread_slot && "Unable to allocate slot for the first thread, but we just allocated at least one slot"); if (yield_flag && *yield_flag) return true; @@ -303,13 +299,10 @@ void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_ size_t use_threads = num_threads; - if (concurrency_control) - { - /// Allocate CPU slots from concurrency control - constexpr size_t min_threads = 1; - slots = ConcurrencyControl::instance().allocate(min_threads, num_threads); - use_threads = slots->grantedCount(); - } + /// Allocate CPU slots from concurrency control + size_t min_threads = concurrency_control ? 1uz : num_threads; + slots = ConcurrencyControl::instance().allocate(min_threads, num_threads); + use_threads = slots->grantedCount(); Queue queue; graph->initializeExecution(queue); @@ -325,7 +318,8 @@ void PipelineExecutor::spawnThreads() { while (auto slot = slots->tryAcquire()) { - size_t thread_num = threads++; + size_t thread_num = threads; + ++threads; /// Count of threads in use should be updated for proper finish() condition. /// NOTE: this will not decrease `use_threads` below initially granted count From 39a505e3f38cecc520500ce87f1763d687ee91b5 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Wed, 10 May 2023 14:18:15 +0300 Subject: [PATCH 0026/1687] init --- src/Functions/array/arrayFold.cpp | 181 ++++++++++++++++++++++ src/Functions/array/arrayFold2.cpp | 239 +++++++++++++++++++++++++++++ tests/performance/array_fold.xml | 5 + 3 files changed, 425 insertions(+) create mode 100644 src/Functions/array/arrayFold.cpp create mode 100644 src/Functions/array/arrayFold2.cpp create mode 100644 tests/performance/array_fold.xml diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp new file mode 100644 index 00000000000..d1cc2ae96a5 --- /dev/null +++ b/src/Functions/array/arrayFold.cpp @@ -0,0 +1,181 @@ +#include "FunctionArrayMapped.h" +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; + extern const int TYPE_MISMATCH; +} + + +/** arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) - apply the expression to each element of the array (or set of parallel arrays). + */ +class ArrayFoldOld : public IFunction +{ +public: + static constexpr auto name = "arrayFoldOld"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + void getLambdaArgumentTypes(DataTypes & arguments) const override + { + if (arguments.size() < 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs lambda function, at least one array argument and one accumulator argument.", getName()); + DataTypes nested_types(arguments.size() - 1); + for (size_t i = 0; i < nested_types.size() - 1; ++i) + { + const DataTypeArray * array_type = checkAndGetDataType(&*arguments[i + 1]); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array. Found {} instead.", toString(i + 2), getName(), arguments[i + 1]->getName()); + nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); + } + nested_types[nested_types.size() - 1] = arguments[arguments.size() - 1]; + + const DataTypeFunction * function_type = checkAndGetDataType(arguments[0].get()); + if (!function_type || function_type->getArgumentTypes().size() != nested_types.size()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for this overload of {} must be a function with {} arguments. Found {} instead.", + getName(), toString(nested_types.size()), arguments[0]->getName()); + + arguments[0] = std::make_shared(nested_types); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs at least 2 arguments; passed {}.", getName(), toString(arguments.size())); + const auto * data_type_function = checkAndGetDataType(arguments[0].type.get()); + if (!data_type_function) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName()); + + auto const accumulator_type = arguments.back().type; + auto const lambda_type = data_type_function->getReturnType(); + if (! accumulator_type->equals(*lambda_type)) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Return type of lambda function must be the same as the accumulator type. " + "Inferred type of lambda {}, inferred type of accumulator {}.", lambda_type->getName(), accumulator_type->getName()); + + return DataTypePtr(accumulator_type); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const auto & column_with_type_and_name = arguments[0]; + + if (!column_with_type_and_name.column) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName()); + + const auto * column_function = typeid_cast(column_with_type_and_name.column.get()); + + if (!column_function) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName()); + + ColumnPtr offsets_column; + ColumnPtr column_first_array_ptr; + const ColumnArray * column_first_array = nullptr; + ColumnsWithTypeAndName arrays; + arrays.reserve(arguments.size() - 1); + + for (size_t i = 1; i < arguments.size() - 1; ++i) + { + const auto & array_with_type_and_name = arguments[i]; + ColumnPtr column_array_ptr = array_with_type_and_name.column; + const auto * column_array = checkAndGetColumn(column_array_ptr.get()); + const DataTypePtr & array_type_ptr = array_with_type_and_name.type; + const auto * array_type = checkAndGetDataType(array_type_ptr.get()); + if (!column_array) + { + const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); + if (!column_const_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected array column, found {}", column_array_ptr->getName()); + column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn()); + column_array = checkAndGetColumn(column_array_ptr.get()); + } + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected array type, found {}", array_type_ptr->getName()); + if (!offsets_column) + { + offsets_column = column_array->getOffsetsPtr(); + } + else + { + /// The first condition is optimization: do not compare data if the pointers are equal. + if (column_array->getOffsetsPtr() != offsets_column + && column_array->getOffsets() != typeid_cast(*offsets_column).getData()) + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} must have equal size", getName()); + } + if (i == 1) + { + column_first_array_ptr = column_array_ptr; + column_first_array = column_array; + } + arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), + recursiveRemoveLowCardinality(array_type->getNestedType()), + array_with_type_and_name.name)); + } + arrays.emplace_back(arguments.back()); + + MutableColumnPtr result = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); + size_t arr_cursor = 0; + for (size_t irow = 0; irow < column_first_array->size(); ++irow) // for each row of result + { + // Make accumulator column for this row. We initialize it + // with the starting value given as the last argument. + ColumnWithTypeAndName accumulator_column = arguments.back(); + ColumnPtr acc(accumulator_column.column->cut(irow, 1)); + auto accumulator = ColumnWithTypeAndName(acc, + accumulator_column.type, + accumulator_column.name); + ColumnPtr res(acc); + size_t const arr_next = column_first_array->getOffsets()[irow]; // when we do folding + for (; arr_cursor < arr_next; ++arr_cursor) + { + // Make slice of input arrays and accumulator for lambda + ColumnsWithTypeAndName iter_arrays; + iter_arrays.reserve(arrays.size() + 1); + for (size_t icolumn = 0; icolumn < arrays.size() - 1; ++icolumn) + { + auto const & arr = arrays[icolumn]; + iter_arrays.emplace_back(ColumnWithTypeAndName(arr.column->cut(arr_cursor, 1), + arr.type, + arr.name)); + } + iter_arrays.emplace_back(accumulator); + // Calculate function on arguments + auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(ColumnArray::Offsets(column_first_array->getOffsets().size(), 1))); + auto * replicated_column_function = typeid_cast(replicated_column_function_ptr.get()); + replicated_column_function->appendArguments(iter_arrays); + auto lambda_result = replicated_column_function->reduce().column; + if (lambda_result->lowCardinality()) + lambda_result = lambda_result->convertToFullColumnIfLowCardinality(); + res = lambda_result->cut(0, 1); + accumulator.column = res; + } + result->insert((*res)[0]); + } + return result; + } + +private: + String getName() const override + { + return name; + } +}; + + +REGISTER_FUNCTION(ArrayFoldOld) +{ + factory.registerFunction(); +} + + +} diff --git a/src/Functions/array/arrayFold2.cpp b/src/Functions/array/arrayFold2.cpp new file mode 100644 index 00000000000..b4256aa5818 --- /dev/null +++ b/src/Functions/array/arrayFold2.cpp @@ -0,0 +1,239 @@ +#include "FunctionArrayMapped.h" +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; + extern const int TYPE_MISMATCH; +} + + +/** arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) - apply the expression to each element of the array (or set of parallel arrays). + */ +class ArrayFold : public IFunction +{ +public: + static constexpr auto name = "arrayFold"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + void getLambdaArgumentTypes(DataTypes & arguments) const override + { + if (arguments.size() < 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs lambda function, at least one array argument and one accumulator argument.", getName()); + DataTypes nested_types(arguments.size() - 1); + for (size_t i = 0; i < nested_types.size() - 1; ++i) + { + const DataTypeArray * array_type = checkAndGetDataType(&*arguments[i + 1]); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array. Found {} instead.", toString(i + 2), getName(), arguments[i + 1]->getName()); + nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); + } + nested_types[nested_types.size() - 1] = arguments[arguments.size() - 1]; + + const DataTypeFunction * function_type = checkAndGetDataType(arguments[0].get()); + if (!function_type || function_type->getArgumentTypes().size() != nested_types.size()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for this overload of {} must be a function with {} arguments. Found {} instead.", + getName(), toString(nested_types.size()), arguments[0]->getName()); + + arguments[0] = std::make_shared(nested_types); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs at least 2 arguments; passed {}.", getName(), toString(arguments.size())); + const auto * data_type_function = checkAndGetDataType(arguments[0].type.get()); + if (!data_type_function) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName()); + + auto const accumulator_type = arguments.back().type; + auto const lambda_type = data_type_function->getReturnType(); + if (! accumulator_type->equals(*lambda_type)) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Return type of lambda function must be the same as the accumulator type. " + "Inferred type of lambda {}, inferred type of accumulator {}.", lambda_type->getName(), accumulator_type->getName()); + + return DataTypePtr(accumulator_type); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & column_with_type_and_name = arguments[0]; + + if (!column_with_type_and_name.column) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName()); + + const auto * column_function = typeid_cast(column_with_type_and_name.column.get()); + + if (!column_function) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName()); + + ColumnPtr offsets_column; + ColumnPtr column_first_array_ptr; + const ColumnArray * column_first_array = nullptr; + ColumnsWithTypeAndName arrays; + arrays.reserve(arguments.size() - 1); + + for (size_t i = 1; i < arguments.size() - 1; ++i) + { + const auto & array_with_type_and_name = arguments[i]; + ColumnPtr column_array_ptr = array_with_type_and_name.column; + const auto * column_array = checkAndGetColumn(column_array_ptr.get()); + const DataTypePtr & array_type_ptr = array_with_type_and_name.type; + const auto * array_type = checkAndGetDataType(array_type_ptr.get()); + if (!column_array) + { + const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); + if (!column_const_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected array column, found {}", column_array_ptr->getName()); + column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn()); + column_array = checkAndGetColumn(column_array_ptr.get()); + } + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected array type, found {}", array_type_ptr->getName()); + if (!offsets_column) + { + offsets_column = column_array->getOffsetsPtr(); + } + else + { + /// The first condition is optimization: do not compare data if the pointers are equal. + if (column_array->getOffsetsPtr() != offsets_column + && column_array->getOffsets() != typeid_cast(*offsets_column).getData()) + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} must have equal size", getName()); + } + if (i == 1) + { + column_first_array_ptr = column_array_ptr; + column_first_array = column_array; + } + arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), + recursiveRemoveLowCardinality(array_type->getNestedType()), + array_with_type_and_name.name)); + } + + ssize_t row_count = input_rows_count; + ssize_t data_row_count = arrays[0].column->size(); + MutableColumnPtr current_column; + current_column = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); + current_column->insertMany((*arguments.back().column)[0], row_count); + MutableColumnPtr result_data = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); + IColumn::Permutation inverse_perm; + inverse_perm.reserve(row_count); + size_t inverse_perm_count = 0; + + auto array_count = arrays.size(); + size_t max_array_size = 0; + auto& offsets = column_first_array->getOffsets(); + + for(ssize_t i = 0; i < row_count; i++) { + if (offsets[i] - offsets[i-1] > max_array_size) + max_array_size = offsets[i] - offsets[i-1]; + } + + IColumn::Selector selector(data_row_count); + size_t cur_ind = 0; + ssize_t cur_arr = 0; + + for (ssize_t i = 0; i < data_row_count; i++) + { + selector[i] = cur_ind++; + while (cur_arr < row_count && cur_ind >= offsets[cur_arr] - offsets[cur_arr-1]) + { + ++cur_arr; + cur_ind = 0; + } + } + //if (max_array_size != 2) + // throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected array type, found {} ", max_array_size); + + + std::vector data_arrays; + + data_arrays.reserve(array_count); + + for (size_t i = 0; i < array_count; ++i) + { + data_arrays.push_back(arrays[i].column->scatter(max_array_size, selector)); + + } + + size_t prev_size = row_count; + for (size_t ind = 0; ind < max_array_size; ++ind) { + IColumn::Selector prev_selector(prev_size); + ssize_t prev_ind = 0; + for (ssize_t irow = 0; irow < row_count; ++irow) + { + if (offsets[irow] - offsets[irow-1] > ind) + { + prev_selector[prev_ind++] = 1; + } + else if (offsets[irow] - offsets[irow-1] == ind) + { + inverse_perm[inverse_perm_count++] = irow; + prev_selector[prev_ind++] = 0; + } + } + + if (row_count > 0 && max_array_size > 1 && ind == 0) { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected array type, found {} {} {} ", prev_size, current_column->size() , arrays[0].type->getName()); + } + auto prev = current_column->scatter(2, prev_selector); + + + result_data->insertRangeFrom(*(prev[0]), 0, prev[0]->size()); + + auto res_lambda = column_function->cloneResized(prev[1]->size()); + auto * res_lambda_ptr = typeid_cast(res_lambda.get()); + + for (size_t i = 0; i < array_count; i++) + res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move((data_arrays[i][ind])), arrays[i].type, arrays[i].name)})); + + + res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)})); + current_column = IColumn::mutate(res_lambda_ptr->reduce().column); + prev_size = current_column->size(); + } + + + result_data->insertRangeFrom(*current_column, 0, current_column->size()); + + for (ssize_t irow = 0; irow < row_count; ++irow) + { + if (offsets[irow] - offsets[irow-1] == max_array_size) + inverse_perm[inverse_perm_count++] = irow; + } + + IColumn::Permutation perm(row_count); + for (ssize_t i = 0; i < row_count; i++) { + perm[inverse_perm[i]] = i; + } + + return result_data->permute(perm, 0); + } + +private: + String getName() const override + { + return name; + } +}; + + +REGISTER_FUNCTION(ArrayFold) +{ + factory.registerFunction(); +} + + +} diff --git a/tests/performance/array_fold.xml b/tests/performance/array_fold.xml new file mode 100644 index 00000000000..fae8bd164a7 --- /dev/null +++ b/tests/performance/array_fold.xml @@ -0,0 +1,5 @@ + + SELECT arrayFold((x, acc) -> acc + x, range(number % 100), toUInt64(0)) from numbers(100000) Format Null + SELECT arrayFold((x, acc) -> acc + 1, range(number % 100), toUInt64(0)) from numbers(100000) Format Null + SELECT arrayFold((x, acc) -> acc + x, range(number), toUInt64(0)) from numbers(10000) Format Null + From 213ac1eba43dfced47af3ceeb6c85d609d93b02e Mon Sep 17 00:00:00 2001 From: Lirikl Date: Thu, 11 May 2023 13:14:52 +0300 Subject: [PATCH 0027/1687] basic version --- src/Functions/array/arrayFold2.cpp | 75 +++++++++++++++--------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/src/Functions/array/arrayFold2.cpp b/src/Functions/array/arrayFold2.cpp index b4256aa5818..27be145641b 100644 --- a/src/Functions/array/arrayFold2.cpp +++ b/src/Functions/array/arrayFold2.cpp @@ -108,6 +108,10 @@ public: else { /// The first condition is optimization: do not compare data if the pointers are equal. + if(column_array->lowCardinality() || array_type->getNestedType()) + { + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "low cadinality array"); // test + } if (column_array->getOffsetsPtr() != offsets_column && column_array->getOffsets() != typeid_cast(*offsets_column).getData()) throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} must have equal size", getName()); @@ -121,73 +125,71 @@ public: recursiveRemoveLowCardinality(array_type->getNestedType()), array_with_type_and_name.name)); } - - ssize_t row_count = input_rows_count; + ssize_t rows_count = input_rows_count; ssize_t data_row_count = arrays[0].column->size(); MutableColumnPtr current_column; current_column = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); - current_column->insertMany((*arguments.back().column)[0], row_count); + if (rows_count == 0) { + return current_column; + } + current_column->insertMany((*arguments.back().column)[0], rows_count); + MutableColumnPtr result_data = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); - IColumn::Permutation inverse_perm; - inverse_perm.reserve(row_count); + IColumn::Permutation inverse_perm(rows_count); size_t inverse_perm_count = 0; auto array_count = arrays.size(); size_t max_array_size = 0; auto& offsets = column_first_array->getOffsets(); - - for(ssize_t i = 0; i < row_count; i++) { - if (offsets[i] - offsets[i-1] > max_array_size) - max_array_size = offsets[i] - offsets[i-1]; - } IColumn::Selector selector(data_row_count); size_t cur_ind = 0; ssize_t cur_arr = 0; - - for (ssize_t i = 0; i < data_row_count; i++) + + if (data_row_count) { + while (offsets[cur_arr] == 0) { + ++cur_arr; + } + } + for (ssize_t i = 0; i < data_row_count; ++i) { selector[i] = cur_ind++; - while (cur_arr < row_count && cur_ind >= offsets[cur_arr] - offsets[cur_arr-1]) + if (cur_ind > max_array_size) + max_array_size = cur_ind; + while (cur_arr < rows_count && cur_ind >= offsets[cur_arr] - offsets[cur_arr - 1]) { ++cur_arr; cur_ind = 0; } } - //if (max_array_size != 2) - // throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected array type, found {} ", max_array_size); - - std::vector data_arrays; - - data_arrays.reserve(array_count); - + std::vector data_arrays; + data_arrays.resize(array_count); + for (size_t i = 0; i < array_count; ++i) { - data_arrays.push_back(arrays[i].column->scatter(max_array_size, selector)); - + data_arrays[i] = arrays[i].column->scatter(max_array_size, selector); } - size_t prev_size = row_count; + size_t prev_size = rows_count; + for (size_t ind = 0; ind < max_array_size; ++ind) { IColumn::Selector prev_selector(prev_size); - ssize_t prev_ind = 0; - for (ssize_t irow = 0; irow < row_count; ++irow) + size_t prev_ind = 0; + for (ssize_t irow = 0; irow < rows_count; ++irow) { - if (offsets[irow] - offsets[irow-1] > ind) + if (offsets[irow] - offsets[irow - 1] > ind) { prev_selector[prev_ind++] = 1; } - else if (offsets[irow] - offsets[irow-1] == ind) + else if (offsets[irow] - offsets[irow - 1] == ind) { inverse_perm[inverse_perm_count++] = irow; prev_selector[prev_ind++] = 0; } } - if (row_count > 0 && max_array_size > 1 && ind == 0) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected array type, found {} {} {} ", prev_size, current_column->size() , arrays[0].type->getName()); - } + //remove rows that doesn't have any more elements auto prev = current_column->scatter(2, prev_selector); @@ -197,25 +199,24 @@ public: auto * res_lambda_ptr = typeid_cast(res_lambda.get()); for (size_t i = 0; i < array_count; i++) - res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move((data_arrays[i][ind])), arrays[i].type, arrays[i].name)})); - + res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(data_arrays[i][ind]), arrays[i].type, arrays[i].name)})); res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)})); + current_column = IColumn::mutate(res_lambda_ptr->reduce().column); prev_size = current_column->size(); } - - result_data->insertRangeFrom(*current_column, 0, current_column->size()); + - for (ssize_t irow = 0; irow < row_count; ++irow) + for (ssize_t irow = 0; irow < rows_count; ++irow) { if (offsets[irow] - offsets[irow-1] == max_array_size) inverse_perm[inverse_perm_count++] = irow; } - IColumn::Permutation perm(row_count); - for (ssize_t i = 0; i < row_count; i++) { + IColumn::Permutation perm(rows_count); + for (ssize_t i = 0; i < rows_count; i++) { perm[inverse_perm[i]] = i; } From 67e28aecc3f739ded17d34e077594df74144d841 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Thu, 11 May 2023 14:57:23 +0300 Subject: [PATCH 0028/1687] style --- src/Functions/array/arrayFold.cpp | 132 +++++++++++----- src/Functions/array/arrayFold2.cpp | 240 ----------------------------- 2 files changed, 90 insertions(+), 282 deletions(-) delete mode 100644 src/Functions/array/arrayFold2.cpp diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index d1cc2ae96a5..7f7d8d2787f 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -17,11 +17,11 @@ namespace ErrorCodes /** arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) - apply the expression to each element of the array (or set of parallel arrays). */ -class ArrayFoldOld : public IFunction +class ArrayFold : public IFunction { public: - static constexpr auto name = "arrayFoldOld"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = "arrayFold"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } @@ -66,7 +66,7 @@ public: return DataTypePtr(accumulator_type); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & column_with_type_and_name = arguments[0]; @@ -121,47 +121,95 @@ public: recursiveRemoveLowCardinality(array_type->getNestedType()), array_with_type_and_name.name)); } - arrays.emplace_back(arguments.back()); - MutableColumnPtr result = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); - size_t arr_cursor = 0; - for (size_t irow = 0; irow < column_first_array->size(); ++irow) // for each row of result + ssize_t rows_count = input_rows_count; + ssize_t data_row_count = arrays[0].column->size(); + auto array_count = arrays.size(); + + MutableColumnPtr current_column; + current_column = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); + if (rows_count == 0) + return current_column; + current_column->insertMany((*arguments.back().column)[0], rows_count); + + MutableColumnPtr result_data = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); + + size_t max_array_size = 0; + auto& offsets = column_first_array->getOffsets(); + + //get columns of Nth array elements + IColumn::Selector selector(data_row_count); + size_t cur_ind = 0; + ssize_t cur_arr = 0; + + if (data_row_count) + while (offsets[cur_arr] == 0) + ++cur_arr; + + for (ssize_t i = 0; i < data_row_count; ++i) { - // Make accumulator column for this row. We initialize it - // with the starting value given as the last argument. - ColumnWithTypeAndName accumulator_column = arguments.back(); - ColumnPtr acc(accumulator_column.column->cut(irow, 1)); - auto accumulator = ColumnWithTypeAndName(acc, - accumulator_column.type, - accumulator_column.name); - ColumnPtr res(acc); - size_t const arr_next = column_first_array->getOffsets()[irow]; // when we do folding - for (; arr_cursor < arr_next; ++arr_cursor) + selector[i] = cur_ind++; + if (cur_ind > max_array_size) + max_array_size = cur_ind; + while (cur_arr < rows_count && cur_ind >= offsets[cur_arr] - offsets[cur_arr - 1]) { - // Make slice of input arrays and accumulator for lambda - ColumnsWithTypeAndName iter_arrays; - iter_arrays.reserve(arrays.size() + 1); - for (size_t icolumn = 0; icolumn < arrays.size() - 1; ++icolumn) - { - auto const & arr = arrays[icolumn]; - iter_arrays.emplace_back(ColumnWithTypeAndName(arr.column->cut(arr_cursor, 1), - arr.type, - arr.name)); - } - iter_arrays.emplace_back(accumulator); - // Calculate function on arguments - auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(ColumnArray::Offsets(column_first_array->getOffsets().size(), 1))); - auto * replicated_column_function = typeid_cast(replicated_column_function_ptr.get()); - replicated_column_function->appendArguments(iter_arrays); - auto lambda_result = replicated_column_function->reduce().column; - if (lambda_result->lowCardinality()) - lambda_result = lambda_result->convertToFullColumnIfLowCardinality(); - res = lambda_result->cut(0, 1); - accumulator.column = res; + ++cur_arr; + cur_ind = 0; } - result->insert((*res)[0]); } - return result; + + std::vector data_arrays; + data_arrays.resize(array_count); + + for (size_t i = 0; i < array_count; ++i) + data_arrays[i] = arrays[i].column->scatter(max_array_size, selector); + + size_t prev_size = rows_count; + IColumn::Permutation inverse_permutation(rows_count); + size_t inverse_permutation_count = 0; + + for (size_t ind = 0; ind < max_array_size; ++ind) + { + IColumn::Selector prev_selector(prev_size); + size_t prev_ind = 0; + for (ssize_t irow = 0; irow < rows_count; ++irow) + { + if (offsets[irow] - offsets[irow - 1] > ind) + { + prev_selector[prev_ind++] = 1; + } + else if (offsets[irow] - offsets[irow - 1] == ind) + { + inverse_permutation[inverse_permutation_count++] = irow; + prev_selector[prev_ind++] = 0; + } + } + auto prev = current_column->scatter(2, prev_selector); + + result_data->insertRangeFrom(*(prev[0]), 0, prev[0]->size()); + + auto res_lambda = column_function->cloneResized(prev[1]->size()); + auto * res_lambda_ptr = typeid_cast(res_lambda.get()); + + for (size_t i = 0; i < array_count; i++) + res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(data_arrays[i][ind]), arrays[i].type, arrays[i].name)})); + + res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)})); + + current_column = IColumn::mutate(res_lambda_ptr->reduce().column); + prev_size = current_column->size(); + } + + result_data->insertRangeFrom(*current_column, 0, current_column->size()); + for (ssize_t irow = 0; irow < rows_count; ++irow) + if (offsets[irow] - offsets[irow - 1] == max_array_size) + inverse_permutation[inverse_permutation_count++] = irow; + + IColumn::Permutation perm(rows_count); + for (ssize_t i = 0; i < rows_count; i++) + perm[inverse_permutation[i]] = i; + + return result_data->permute(perm, 0); } private: @@ -172,9 +220,9 @@ private: }; -REGISTER_FUNCTION(ArrayFoldOld) +REGISTER_FUNCTION(ArrayFold) { - factory.registerFunction(); + factory.registerFunction(); } diff --git a/src/Functions/array/arrayFold2.cpp b/src/Functions/array/arrayFold2.cpp deleted file mode 100644 index 27be145641b..00000000000 --- a/src/Functions/array/arrayFold2.cpp +++ /dev/null @@ -1,240 +0,0 @@ -#include "FunctionArrayMapped.h" -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DONT_MATCH; - extern const int TYPE_MISMATCH; -} - - -/** arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) - apply the expression to each element of the array (or set of parallel arrays). - */ -class ArrayFold : public IFunction -{ -public: - static constexpr auto name = "arrayFold"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - void getLambdaArgumentTypes(DataTypes & arguments) const override - { - if (arguments.size() < 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs lambda function, at least one array argument and one accumulator argument.", getName()); - DataTypes nested_types(arguments.size() - 1); - for (size_t i = 0; i < nested_types.size() - 1; ++i) - { - const DataTypeArray * array_type = checkAndGetDataType(&*arguments[i + 1]); - if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array. Found {} instead.", toString(i + 2), getName(), arguments[i + 1]->getName()); - nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); - } - nested_types[nested_types.size() - 1] = arguments[arguments.size() - 1]; - - const DataTypeFunction * function_type = checkAndGetDataType(arguments[0].get()); - if (!function_type || function_type->getArgumentTypes().size() != nested_types.size()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for this overload of {} must be a function with {} arguments. Found {} instead.", - getName(), toString(nested_types.size()), arguments[0]->getName()); - - arguments[0] = std::make_shared(nested_types); - } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - if (arguments.size() < 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs at least 2 arguments; passed {}.", getName(), toString(arguments.size())); - const auto * data_type_function = checkAndGetDataType(arguments[0].type.get()); - if (!data_type_function) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName()); - - auto const accumulator_type = arguments.back().type; - auto const lambda_type = data_type_function->getReturnType(); - if (! accumulator_type->equals(*lambda_type)) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Return type of lambda function must be the same as the accumulator type. " - "Inferred type of lambda {}, inferred type of accumulator {}.", lambda_type->getName(), accumulator_type->getName()); - - return DataTypePtr(accumulator_type); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const auto & column_with_type_and_name = arguments[0]; - - if (!column_with_type_and_name.column) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName()); - - const auto * column_function = typeid_cast(column_with_type_and_name.column.get()); - - if (!column_function) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName()); - - ColumnPtr offsets_column; - ColumnPtr column_first_array_ptr; - const ColumnArray * column_first_array = nullptr; - ColumnsWithTypeAndName arrays; - arrays.reserve(arguments.size() - 1); - - for (size_t i = 1; i < arguments.size() - 1; ++i) - { - const auto & array_with_type_and_name = arguments[i]; - ColumnPtr column_array_ptr = array_with_type_and_name.column; - const auto * column_array = checkAndGetColumn(column_array_ptr.get()); - const DataTypePtr & array_type_ptr = array_with_type_and_name.type; - const auto * array_type = checkAndGetDataType(array_type_ptr.get()); - if (!column_array) - { - const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); - if (!column_const_array) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected array column, found {}", column_array_ptr->getName()); - column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn()); - column_array = checkAndGetColumn(column_array_ptr.get()); - } - if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected array type, found {}", array_type_ptr->getName()); - if (!offsets_column) - { - offsets_column = column_array->getOffsetsPtr(); - } - else - { - /// The first condition is optimization: do not compare data if the pointers are equal. - if(column_array->lowCardinality() || array_type->getNestedType()) - { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "low cadinality array"); // test - } - if (column_array->getOffsetsPtr() != offsets_column - && column_array->getOffsets() != typeid_cast(*offsets_column).getData()) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} must have equal size", getName()); - } - if (i == 1) - { - column_first_array_ptr = column_array_ptr; - column_first_array = column_array; - } - arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), - recursiveRemoveLowCardinality(array_type->getNestedType()), - array_with_type_and_name.name)); - } - ssize_t rows_count = input_rows_count; - ssize_t data_row_count = arrays[0].column->size(); - MutableColumnPtr current_column; - current_column = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); - if (rows_count == 0) { - return current_column; - } - current_column->insertMany((*arguments.back().column)[0], rows_count); - - MutableColumnPtr result_data = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); - IColumn::Permutation inverse_perm(rows_count); - size_t inverse_perm_count = 0; - - auto array_count = arrays.size(); - size_t max_array_size = 0; - auto& offsets = column_first_array->getOffsets(); - - IColumn::Selector selector(data_row_count); - size_t cur_ind = 0; - ssize_t cur_arr = 0; - - if (data_row_count) { - while (offsets[cur_arr] == 0) { - ++cur_arr; - } - } - for (ssize_t i = 0; i < data_row_count; ++i) - { - selector[i] = cur_ind++; - if (cur_ind > max_array_size) - max_array_size = cur_ind; - while (cur_arr < rows_count && cur_ind >= offsets[cur_arr] - offsets[cur_arr - 1]) - { - ++cur_arr; - cur_ind = 0; - } - } - - std::vector data_arrays; - data_arrays.resize(array_count); - - for (size_t i = 0; i < array_count; ++i) - { - data_arrays[i] = arrays[i].column->scatter(max_array_size, selector); - } - - size_t prev_size = rows_count; - - for (size_t ind = 0; ind < max_array_size; ++ind) { - IColumn::Selector prev_selector(prev_size); - size_t prev_ind = 0; - for (ssize_t irow = 0; irow < rows_count; ++irow) - { - if (offsets[irow] - offsets[irow - 1] > ind) - { - prev_selector[prev_ind++] = 1; - } - else if (offsets[irow] - offsets[irow - 1] == ind) - { - inverse_perm[inverse_perm_count++] = irow; - prev_selector[prev_ind++] = 0; - } - } - - //remove rows that doesn't have any more elements - auto prev = current_column->scatter(2, prev_selector); - - - result_data->insertRangeFrom(*(prev[0]), 0, prev[0]->size()); - - auto res_lambda = column_function->cloneResized(prev[1]->size()); - auto * res_lambda_ptr = typeid_cast(res_lambda.get()); - - for (size_t i = 0; i < array_count; i++) - res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(data_arrays[i][ind]), arrays[i].type, arrays[i].name)})); - - res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)})); - - current_column = IColumn::mutate(res_lambda_ptr->reduce().column); - prev_size = current_column->size(); - } - result_data->insertRangeFrom(*current_column, 0, current_column->size()); - - - for (ssize_t irow = 0; irow < rows_count; ++irow) - { - if (offsets[irow] - offsets[irow-1] == max_array_size) - inverse_perm[inverse_perm_count++] = irow; - } - - IColumn::Permutation perm(rows_count); - for (ssize_t i = 0; i < rows_count; i++) { - perm[inverse_perm[i]] = i; - } - - return result_data->permute(perm, 0); - } - -private: - String getName() const override - { - return name; - } -}; - - -REGISTER_FUNCTION(ArrayFold) -{ - factory.registerFunction(); -} - - -} From 0a6d08fbf8784decbeccd688ff6416ef68c494e3 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Thu, 11 May 2023 15:53:39 +0300 Subject: [PATCH 0029/1687] add tests --- .../0_stateless/02718_array_fold.reference | 54 +++++++++++++++++++ .../queries/0_stateless/02718_array_fold.sql | 15 ++++++ 2 files changed, 69 insertions(+) create mode 100644 tests/queries/0_stateless/02718_array_fold.reference create mode 100644 tests/queries/0_stateless/02718_array_fold.sql diff --git a/tests/queries/0_stateless/02718_array_fold.reference b/tests/queries/0_stateless/02718_array_fold.reference new file mode 100644 index 00000000000..cdb7f42a8f9 --- /dev/null +++ b/tests/queries/0_stateless/02718_array_fold.reference @@ -0,0 +1,54 @@ +23 +------------------ +3 +------------------ +101 +------------------ +269 +------------------ +[1,2,3,4] +------------------ +[4,3,2,1] +------------------ +([4,3,2,1],[1,2,3,4]) +------------------ +([1,3,5],[2,4,6]) +------------------ +0 +0 +1 +3 +6 +10 +------------------ +[] +[0] +[1,0] +[2,1,0] +[3,2,1,0] +[4,3,2,1,0] +------------------ +[] +[0] +[1,0] +[1,0,2] +[3,1,0,2] +[3,1,0,2,4] +------------------ +[(0,0)] +[(0,1),(0,0)] +[(1,2),(0,1),(0,0)] +[(2,3),(1,2),(0,1),(0,0)] +[(3,4),(2,3),(1,2),(0,1),(0,0)] +[(4,5),(3,4),(2,3),(1,2),(0,1),(0,0)] +------------------ +[] +['0'] +['0','1'] +['0','1','2'] +['0','1','2','3'] +['0','1','2','3','4'] + + + + diff --git a/tests/queries/0_stateless/02718_array_fold.sql b/tests/queries/0_stateless/02718_array_fold.sql new file mode 100644 index 00000000000..4c3780ee93d --- /dev/null +++ b/tests/queries/0_stateless/02718_array_fold.sql @@ -0,0 +1,15 @@ +SELECT arrayFold(x,acc -> acc + x * 2, [1,2,3,4], toInt64(3)); +SELECT arrayFold(x,acc -> acc + x * 2, emptyArrayInt64(), toInt64(3)); +SELECT arrayFold(x,y,acc -> acc + x * 2 + y * 3, [1,2,3,4], [5,6,7,8], toInt64(3)); +SELECT arrayFold(x,y,z,acc -> acc + x * 2 + y * 3 + z * 4, [1,2,3,4], [5,6,7,8], [9,10,11,12], toInt64(3)); +SELECT arrayFold(x,acc -> arrayPushBack(acc,x), [1,2,3,4], emptyArrayInt64()); +SELECT arrayFold(x,acc -> arrayPushFront(acc,x), [1,2,3,4], emptyArrayInt64()); +SELECT arrayFold(x,acc -> (arrayPushFront(acc.1,x), arrayPushBack(acc.2,x)), [1,2,3,4], (emptyArrayInt64(), emptyArrayInt64())); +SELECT arrayFold(x,acc -> x % 2 ? (arrayPushBack(acc.1,x), acc.2): (acc.1, arrayPushBack(acc.2,x)), [1,2,3,4,5,6], (emptyArrayInt64(), emptyArrayInt64())); + +SELECT arrayFold(x,acc -> acc+x, range(number), toInt64(0)) FROM system.numbers LIMIT 6 +SELECT arrayFold(x,acc -> arrayPushFront(acc, x), range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 6; +SELECT arrayFold(x,acc -> x % 2 ? arrayPushFront(acc, x) : arrayPushBack(acc, x), range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 6; +SELECT arrayFold(x,acc -> arrayPushFront(acc, (x, x+1)), range(number), [(toUInt64(0),toUInt64(0))]) FROM system.numbers LIMIT 6; +SELECT arrayFold(x, acc -> concat(acc, arrayMap(z -> toString(x), [number])) , range(number), CAST([] as Array(String))) FROM system.numbers LIMIT 6; + From 1a8846cf000b2c936948a12ab232434d8ea8a5ce Mon Sep 17 00:00:00 2001 From: Lirikl Date: Thu, 11 May 2023 16:47:21 +0300 Subject: [PATCH 0030/1687] non const acum --- src/Functions/array/arrayFold.cpp | 25 +++++----- .../0_stateless/02718_array_fold.reference | 49 +++++++++++-------- .../queries/0_stateless/02718_array_fold.sql | 3 +- 3 files changed, 41 insertions(+), 36 deletions(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index 7f7d8d2787f..7ae764104de 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -126,23 +126,22 @@ public: ssize_t data_row_count = arrays[0].column->size(); auto array_count = arrays.size(); - MutableColumnPtr current_column; - current_column = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); - if (rows_count == 0) - return current_column; - current_column->insertMany((*arguments.back().column)[0], rows_count); - + if (rows_count == 0) + return arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); + + ColumnPtr current_column; + current_column = arguments.back().column->convertToFullColumnIfConst(); MutableColumnPtr result_data = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); size_t max_array_size = 0; auto& offsets = column_first_array->getOffsets(); - //get columns of Nth array elements + //get columns of Nth array elements IColumn::Selector selector(data_row_count); size_t cur_ind = 0; ssize_t cur_arr = 0; - if (data_row_count) + if (data_row_count) while (offsets[cur_arr] == 0) ++cur_arr; @@ -157,8 +156,8 @@ public: cur_ind = 0; } } - - std::vector data_arrays; + + std::vector data_arrays; data_arrays.resize(array_count); for (size_t i = 0; i < array_count; ++i) @@ -168,7 +167,7 @@ public: IColumn::Permutation inverse_permutation(rows_count); size_t inverse_permutation_count = 0; - for (size_t ind = 0; ind < max_array_size; ++ind) + for (size_t ind = 0; ind < max_array_size; ++ind) { IColumn::Selector prev_selector(prev_size); size_t prev_ind = 0; @@ -178,7 +177,7 @@ public: { prev_selector[prev_ind++] = 1; } - else if (offsets[irow] - offsets[irow - 1] == ind) + else if (offsets[irow] - offsets[irow - 1] == ind) { inverse_permutation[inverse_permutation_count++] = irow; prev_selector[prev_ind++] = 0; @@ -193,7 +192,6 @@ public: for (size_t i = 0; i < array_count; i++) res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(data_arrays[i][ind]), arrays[i].type, arrays[i].name)})); - res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)})); current_column = IColumn::mutate(res_lambda_ptr->reduce().column); @@ -208,7 +206,6 @@ public: IColumn::Permutation perm(rows_count); for (ssize_t i = 0; i < rows_count; i++) perm[inverse_permutation[i]] = i; - return result_data->permute(perm, 0); } diff --git a/tests/queries/0_stateless/02718_array_fold.reference b/tests/queries/0_stateless/02718_array_fold.reference index cdb7f42a8f9..57f10988da8 100644 --- a/tests/queries/0_stateless/02718_array_fold.reference +++ b/tests/queries/0_stateless/02718_array_fold.reference @@ -21,33 +21,40 @@ 6 10 ------------------ +0 +1 +3 +6 +10 +15 +------------------ [] -[0] -[1,0] -[2,1,0] -[3,2,1,0] +[0] +[1,0] +[2,1,0] +[3,2,1,0] [4,3,2,1,0] ------------------ -[] -[0] -[1,0] -[1,0,2] -[3,1,0,2] -[3,1,0,2,4] +[] +[0] +[1,0] +[1,0,2] +[3,1,0,2] +[3,1,0,2,4] ------------------ -[(0,0)] -[(0,1),(0,0)] -[(1,2),(0,1),(0,0)] -[(2,3),(1,2),(0,1),(0,0)] -[(3,4),(2,3),(1,2),(0,1),(0,0)] +[(0,0)] +[(0,1),(0,0)] +[(1,2),(0,1),(0,0)] +[(2,3),(1,2),(0,1),(0,0)] +[(3,4),(2,3),(1,2),(0,1),(0,0)] [(4,5),(3,4),(2,3),(1,2),(0,1),(0,0)] ------------------ -[] -['0'] -['0','1'] -['0','1','2'] -['0','1','2','3'] -['0','1','2','3','4'] +[] +['0'] +['0','1'] +['0','1','2'] +['0','1','2','3'] +['0','1','2','3','4'] diff --git a/tests/queries/0_stateless/02718_array_fold.sql b/tests/queries/0_stateless/02718_array_fold.sql index 4c3780ee93d..8372d871f59 100644 --- a/tests/queries/0_stateless/02718_array_fold.sql +++ b/tests/queries/0_stateless/02718_array_fold.sql @@ -7,7 +7,8 @@ SELECT arrayFold(x,acc -> arrayPushFront(acc,x), [1,2,3,4], emptyArrayInt64()); SELECT arrayFold(x,acc -> (arrayPushFront(acc.1,x), arrayPushBack(acc.2,x)), [1,2,3,4], (emptyArrayInt64(), emptyArrayInt64())); SELECT arrayFold(x,acc -> x % 2 ? (arrayPushBack(acc.1,x), acc.2): (acc.1, arrayPushBack(acc.2,x)), [1,2,3,4,5,6], (emptyArrayInt64(), emptyArrayInt64())); -SELECT arrayFold(x,acc -> acc+x, range(number), toInt64(0)) FROM system.numbers LIMIT 6 +SELECT arrayFold(x,acc -> acc+x, range(number), toInt64(0)) FROM system.numbers LIMIT 6; +SELECT arrayFold(x,acc -> acc+x, range(number), number) FROM system.numbers LIMIT 6; SELECT arrayFold(x,acc -> arrayPushFront(acc, x), range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 6; SELECT arrayFold(x,acc -> x % 2 ? arrayPushFront(acc, x) : arrayPushBack(acc, x), range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 6; SELECT arrayFold(x,acc -> arrayPushFront(acc, (x, x+1)), range(number), [(toUInt64(0),toUInt64(0))]) FROM system.numbers LIMIT 6; From bd3c084cb17d1a8161e08ad1905b0effcc306484 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Fri, 12 May 2023 12:09:58 +0300 Subject: [PATCH 0031/1687] fix test --- .../0_stateless/02718_array_fold.reference | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tests/queries/0_stateless/02718_array_fold.reference b/tests/queries/0_stateless/02718_array_fold.reference index 57f10988da8..c89b19f1e4b 100644 --- a/tests/queries/0_stateless/02718_array_fold.reference +++ b/tests/queries/0_stateless/02718_array_fold.reference @@ -1,61 +1,44 @@ 23 ------------------- 3 ------------------- 101 ------------------- 269 ------------------- [1,2,3,4] ------------------- [4,3,2,1] ------------------- ([4,3,2,1],[1,2,3,4]) ------------------- ([1,3,5],[2,4,6]) ------------------- 0 0 1 3 6 10 ------------------- 0 1 3 6 10 15 ------------------- [] [0] [1,0] [2,1,0] [3,2,1,0] [4,3,2,1,0] ------------------- [] [0] [1,0] [1,0,2] [3,1,0,2] [3,1,0,2,4] ------------------- [(0,0)] [(0,1),(0,0)] [(1,2),(0,1),(0,0)] [(2,3),(1,2),(0,1),(0,0)] [(3,4),(2,3),(1,2),(0,1),(0,0)] [(4,5),(3,4),(2,3),(1,2),(0,1),(0,0)] ------------------- [] ['0'] ['0','1'] ['0','1','2'] ['0','1','2','3'] ['0','1','2','3','4'] - - - - From 23dec236c23d23e396b74b129f0d29634bb030e0 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Fri, 12 May 2023 12:20:52 +0300 Subject: [PATCH 0032/1687] add description --- src/Functions/array/arrayFold.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index 7ae764104de..51a1fb50322 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -219,7 +219,12 @@ private: REGISTER_FUNCTION(ArrayFold) { - factory.registerFunction(); + + factory.registerFunction(R"( + Function arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) applies lambda function to a number of same sized array columns + and collects result in accumulator. Accumulator can be either constant or column. + )"); + } From 65cdae872a32ea644ebd9b72b028c06d6b25cae7 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Fri, 12 May 2023 12:23:36 +0300 Subject: [PATCH 0033/1687] fix style --- src/Functions/array/arrayFold.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index 51a1fb50322..961e6dffcf4 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -193,7 +193,7 @@ public: for (size_t i = 0; i < array_count; i++) res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(data_arrays[i][ind]), arrays[i].type, arrays[i].name)})); res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)})); - + current_column = IColumn::mutate(res_lambda_ptr->reduce().column); prev_size = current_column->size(); } @@ -216,16 +216,11 @@ private: } }; - REGISTER_FUNCTION(ArrayFold) { - factory.registerFunction(R"( Function arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) applies lambda function to a number of same sized array columns and collects result in accumulator. Accumulator can be either constant or column. )"); - } - - } From 2c9635cc8dc0aa3462baf36a4d27b33900292ed6 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Fri, 12 May 2023 12:49:16 +0300 Subject: [PATCH 0034/1687] fix style --- src/Functions/array/arrayFold.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index 961e6dffcf4..02755cfee53 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -219,7 +219,7 @@ private: REGISTER_FUNCTION(ArrayFold) { factory.registerFunction(R"( - Function arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) applies lambda function to a number of same sized array columns + Function arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) applies lambda function to a number of same sized array columns and collects result in accumulator. Accumulator can be either constant or column. )"); } From 03f5465106454dd623faab73e8fa551cc20c0f4f Mon Sep 17 00:00:00 2001 From: Lirikl Date: Fri, 12 May 2023 12:55:54 +0300 Subject: [PATCH 0035/1687] fix description --- src/Functions/array/arrayFold.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index 02755cfee53..ec9b553d6a9 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -218,7 +218,7 @@ private: REGISTER_FUNCTION(ArrayFold) { - factory.registerFunction(R"( + factory.registerFunction("arrayFold", R"( Function arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) applies lambda function to a number of same sized array columns and collects result in accumulator. Accumulator can be either constant or column. )"); From 0334edf35b6a9122d73ade8ffb3a73b42ef24d4c Mon Sep 17 00:00:00 2001 From: Lirikl Date: Fri, 12 May 2023 13:04:07 +0300 Subject: [PATCH 0036/1687] fix Documentation --- src/Functions/array/arrayFold.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index ec9b553d6a9..9d638a0c0c9 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -218,9 +218,24 @@ private: REGISTER_FUNCTION(ArrayFold) { - factory.registerFunction("arrayFold", R"( + { + R"( +Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString. +This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library. +The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256. +It returns a BLAKE3 hash as a byte array with type FixedString(32). +)", + Documentation::Examples{ + {"hash", "SELECT hex(BLAKE3('ABC'))"}}, + Documentation::Categories{"Hash"} + }, + factory.registerFunction("arrayFold", {R"( Function arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) applies lambda function to a number of same sized array columns and collects result in accumulator. Accumulator can be either constant or column. - )"); + )", + Documentation::Examples{ + {"sum", "SELECT arrayFold(x,acc -> acc + x, [1,2,3,4], toInt64(0));"}}, + Documentation::Categories{} + }); } } From c19a13e75bd797d0ad2e6a1072a0a1ee32f11a40 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Fri, 12 May 2023 14:05:24 +0300 Subject: [PATCH 0037/1687] remove accidental garbage --- src/Functions/array/arrayFold.cpp | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index 9d638a0c0c9..a3ca37829b1 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -1,6 +1,7 @@ #include "FunctionArrayMapped.h" #include #include +#include namespace DB { @@ -14,7 +15,6 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; } - /** arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) - apply the expression to each element of the array (or set of parallel arrays). */ class ArrayFold : public IFunction @@ -128,11 +128,11 @@ public: if (rows_count == 0) return arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); - + ColumnPtr current_column; current_column = arguments.back().column->convertToFullColumnIfConst(); MutableColumnPtr result_data = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); - + size_t max_array_size = 0; auto& offsets = column_first_array->getOffsets(); @@ -162,7 +162,7 @@ public: for (size_t i = 0; i < array_count; ++i) data_arrays[i] = arrays[i].column->scatter(max_array_size, selector); - + size_t prev_size = rows_count; IColumn::Permutation inverse_permutation(rows_count); size_t inverse_permutation_count = 0; @@ -189,7 +189,7 @@ public: auto res_lambda = column_function->cloneResized(prev[1]->size()); auto * res_lambda_ptr = typeid_cast(res_lambda.get()); - + for (size_t i = 0; i < array_count; i++) res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(data_arrays[i][ind]), arrays[i].type, arrays[i].name)})); res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)})); @@ -218,17 +218,6 @@ private: REGISTER_FUNCTION(ArrayFold) { - { - R"( -Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString. -This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library. -The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256. -It returns a BLAKE3 hash as a byte array with type FixedString(32). -)", - Documentation::Examples{ - {"hash", "SELECT hex(BLAKE3('ABC'))"}}, - Documentation::Categories{"Hash"} - }, factory.registerFunction("arrayFold", {R"( Function arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) applies lambda function to a number of same sized array columns and collects result in accumulator. Accumulator can be either constant or column. From c213ee1037a079558441faa4796ad59aa9029193 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Fri, 12 May 2023 15:58:48 +0300 Subject: [PATCH 0038/1687] remove extra include --- src/Functions/array/arrayFold.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index a3ca37829b1..cd115cff84a 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -1,7 +1,6 @@ #include "FunctionArrayMapped.h" #include #include -#include namespace DB { From 6c31772f36fc55b1660c7d66615312ce1e40cde5 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Fri, 12 May 2023 19:10:58 +0300 Subject: [PATCH 0039/1687] Documentation include --- src/Functions/array/arrayFold.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index cd115cff84a..a3ca37829b1 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -1,6 +1,7 @@ #include "FunctionArrayMapped.h" #include #include +#include namespace DB { From 28589d8d6776223e5c3dde25c89a4cd8042a48e1 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Fri, 12 May 2023 21:49:28 +0300 Subject: [PATCH 0040/1687] use new Function Documentation --- src/Functions/array/arrayFold.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index a3ca37829b1..bdaf8d403b1 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -218,13 +218,9 @@ private: REGISTER_FUNCTION(ArrayFold) { - factory.registerFunction("arrayFold", {R"( + factory.registerFunction(FunctionDocumentation{.description=R"( Function arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) applies lambda function to a number of same sized array columns and collects result in accumulator. Accumulator can be either constant or column. - )", - Documentation::Examples{ - {"sum", "SELECT arrayFold(x,acc -> acc + x, [1,2,3,4], toInt64(0));"}}, - Documentation::Categories{} - }); + )", .categories{"Array"}, .examples{{"sum", "SELECT arrayFold(x,acc -> acc + x, [1,2,3,4], toInt64(1));", "11"}}}); } } From f09a22148ee132dae056055a6d43b411619432d4 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Fri, 12 May 2023 23:22:29 +0300 Subject: [PATCH 0041/1687] fix documentation --- src/Functions/array/arrayFold.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index bdaf8d403b1..9786d5b2e5b 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -1,7 +1,6 @@ #include "FunctionArrayMapped.h" #include #include -#include namespace DB { From 2e6a48fbcfe90ace79a4f41ccb60e31bc4da9fca Mon Sep 17 00:00:00 2001 From: Lirikl Date: Sat, 13 May 2023 01:05:43 +0300 Subject: [PATCH 0042/1687] fix documentation --- src/Functions/array/arrayFold.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index 9786d5b2e5b..ee122d64149 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -220,6 +220,6 @@ REGISTER_FUNCTION(ArrayFold) factory.registerFunction(FunctionDocumentation{.description=R"( Function arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, init_accum) applies lambda function to a number of same sized array columns and collects result in accumulator. Accumulator can be either constant or column. - )", .categories{"Array"}, .examples{{"sum", "SELECT arrayFold(x,acc -> acc + x, [1,2,3,4], toInt64(1));", "11"}}}); + )", .examples{{"sum", "SELECT arrayFold(x,acc -> acc + x, [1,2,3,4], toInt64(1));", "11"}}, .categories{"Array"}}); } } From 9e91e19f2b85821d78e9c1228b61e546c465b58d Mon Sep 17 00:00:00 2001 From: Lirikl Date: Sat, 13 May 2023 11:12:47 +0300 Subject: [PATCH 0043/1687] ClangTidy fix --- src/Functions/array/arrayFold.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index ee122d64149..23160af58b6 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -133,7 +133,7 @@ public: MutableColumnPtr result_data = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); size_t max_array_size = 0; - auto& offsets = column_first_array->getOffsets(); + const auto & offsets = column_first_array->getOffsets(); //get columns of Nth array elements IColumn::Selector selector(data_row_count); From cb2976491bd9c7e80157ed8cd31ba1ab1a5b3ada Mon Sep 17 00:00:00 2001 From: Lirikl Date: Sat, 13 May 2023 11:43:08 +0300 Subject: [PATCH 0044/1687] fix empty arrays bug --- src/Functions/array/arrayFold.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index 23160af58b6..e2d36903f36 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -159,8 +159,9 @@ public: std::vector data_arrays; data_arrays.resize(array_count); - for (size_t i = 0; i < array_count; ++i) - data_arrays[i] = arrays[i].column->scatter(max_array_size, selector); + if (max_array_size > 0) + for (size_t i = 0; i < array_count; ++i) + data_arrays[i] = arrays[i].column->scatter(max_array_size, selector); size_t prev_size = rows_count; IColumn::Permutation inverse_permutation(rows_count); From 0d6561ee77041725f12d350e620c1d126563e82e Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Mon, 17 Apr 2023 16:52:30 +0200 Subject: [PATCH 0045/1687] Prototype --- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 1 + src/Common/ZooKeeper/ZooKeeperCommon.h | 9 +++++-- src/Common/ZooKeeper/ZooKeeperConstants.cpp | 3 +++ src/Common/ZooKeeper/ZooKeeperConstants.h | 1 + src/Coordination/KeeperStorage.cpp | 27 ++++++++++++++++++--- 5 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 5031af38812..527b04c8c43 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -962,6 +962,7 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory() registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); + registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index 5f00698423e..69f4dd84860 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -194,7 +194,7 @@ struct ZooKeeperCloseResponse final : ZooKeeperResponse OpNum getOpNum() const override { return OpNum::Close; } }; -struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest +struct ZooKeeperCreateRequest : public CreateRequest, ZooKeeperRequest { /// used only during restore from zookeeper log int32_t parent_cversion = -1; @@ -215,7 +215,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest void createLogElements(LogElements & elems) const override; }; -struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse +struct ZooKeeperCreateResponse : CreateResponse, ZooKeeperResponse { void readImpl(ReadBuffer & in) override; @@ -228,6 +228,11 @@ struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse void fillLogElements(LogElements & elems, size_t idx) const override; }; +struct ZooKeeperCreateIfNotExistsRequest final : public ZooKeeperCreateRequest +{ + OpNum getOpNum() const override { return OpNum::CreateIfNotExists; } +}; + struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest { ZooKeeperRemoveRequest() = default; diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.cpp b/src/Common/ZooKeeper/ZooKeeperConstants.cpp index 86f70ea547a..334afde52f2 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.cpp +++ b/src/Common/ZooKeeper/ZooKeeperConstants.cpp @@ -21,6 +21,7 @@ static const std::unordered_set VALID_OPERATIONS = static_cast(OpNum::Check), static_cast(OpNum::Multi), static_cast(OpNum::MultiRead), + static_cast(OpNum::CreateIfNotExists), static_cast(OpNum::Auth), static_cast(OpNum::SessionID), static_cast(OpNum::SetACL), @@ -57,6 +58,8 @@ std::string toString(OpNum op_num) return "Multi"; case OpNum::MultiRead: return "MultiRead"; + case OpNum::CreateIfNotExists: + return "CreateIfNotExists"; case OpNum::Sync: return "Sync"; case OpNum::Heartbeat: diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h index 6b50c5c5d09..6582e58e92c 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.h +++ b/src/Common/ZooKeeper/ZooKeeperConstants.h @@ -37,6 +37,7 @@ enum class OpNum : int32_t // CH Keeper specific operations FilteredList = 500, CheckNotExists = 501, + CreateIfNotExists = 502, SessionID = 997, /// Special internal request }; diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 7a1a5e42632..e0c69933337 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1001,11 +1001,25 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); + Coordination::ZooKeeperCreateResponse * response = dynamic_cast(response_ptr.get()); + + Coordination::ZooKeeperCreateIfNotExistsRequest * create_if_not_exists_request = dynamic_cast(zk_request.get()); + + if (create_if_not_exists_request != nullptr) { + Coordination::ZooKeeperCreateIfNotExistsRequest & request = dynamic_cast(*zk_request); + + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it != container.end()) + { + response->error = Coordination::Error::ZOK; + return response_ptr; + } + } if (const auto result = storage.commit(zxid); result != Coordination::Error::ZOK) { - response.error = result; + response->error = result; return response_ptr; } @@ -1016,8 +1030,8 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr [zxid](const auto & delta) { return delta.zxid == zxid && std::holds_alternative(delta.operation); }); - response.path_created = create_delta_it->path; - response.error = Coordination::Error::ZOK; + response->path_created = create_delta_it->path; + response->error = Coordination::Error::ZOK; return response_ptr; } }; @@ -1730,6 +1744,10 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro check_operation_type(OperationType::Write); concrete_requests.push_back(std::make_shared(sub_zk_request)); break; + case Coordination::OpNum::CreateIfNotExists: + check_operation_type(OperationType::Write); + concrete_requests.push_back(std::make_shared(sub_zk_request)); + break; case Coordination::OpNum::Remove: check_operation_type(OperationType::Write); concrete_requests.push_back(std::make_shared(sub_zk_request)); @@ -1993,6 +2011,7 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); From b53f36369e9a148634d6b4c1fa9526b3bcd3c67c Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Mon, 17 Apr 2023 17:47:57 +0200 Subject: [PATCH 0046/1687] Remove new request object --- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 2 +- src/Common/ZooKeeper/ZooKeeperCommon.h | 9 ++------- src/Coordination/KeeperStorage.cpp | 16 +++++++--------- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 527b04c8c43..c148b68b95e 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -962,7 +962,7 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory() registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); - registerZooKeeperRequest(*this); + registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index 69f4dd84860..5f00698423e 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -194,7 +194,7 @@ struct ZooKeeperCloseResponse final : ZooKeeperResponse OpNum getOpNum() const override { return OpNum::Close; } }; -struct ZooKeeperCreateRequest : public CreateRequest, ZooKeeperRequest +struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest { /// used only during restore from zookeeper log int32_t parent_cversion = -1; @@ -215,7 +215,7 @@ struct ZooKeeperCreateRequest : public CreateRequest, ZooKeeperRequest void createLogElements(LogElements & elems) const override; }; -struct ZooKeeperCreateResponse : CreateResponse, ZooKeeperResponse +struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse { void readImpl(ReadBuffer & in) override; @@ -228,11 +228,6 @@ struct ZooKeeperCreateResponse : CreateResponse, ZooKeeperResponse void fillLogElements(LogElements & elems, size_t idx) const override; }; -struct ZooKeeperCreateIfNotExistsRequest final : public ZooKeeperCreateRequest -{ - OpNum getOpNum() const override { return OpNum::CreateIfNotExists; } -}; - struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest { ZooKeeperRemoveRequest() = default; diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index e0c69933337..825fcca42f5 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1001,25 +1001,23 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperCreateResponse * response = dynamic_cast(response_ptr.get()); + Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperCreateIfNotExistsRequest * create_if_not_exists_request = dynamic_cast(zk_request.get()); - - if (create_if_not_exists_request != nullptr) { - Coordination::ZooKeeperCreateIfNotExistsRequest & request = dynamic_cast(*zk_request); + if (zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists) { + Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*zk_request); auto & container = storage.container; auto node_it = container.find(request.path); if (node_it != container.end()) { - response->error = Coordination::Error::ZOK; + response.error = Coordination::Error::ZOK; return response_ptr; } } if (const auto result = storage.commit(zxid); result != Coordination::Error::ZOK) { - response->error = result; + response.error = result; return response_ptr; } @@ -1030,8 +1028,8 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr [zxid](const auto & delta) { return delta.zxid == zxid && std::holds_alternative(delta.operation); }); - response->path_created = create_delta_it->path; - response->error = Coordination::Error::ZOK; + response.path_created = create_delta_it->path; + response.error = Coordination::Error::ZOK; return response_ptr; } }; From f84fdb7f1079c688278d1378d677ecdc60bd942b Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Mon, 17 Apr 2023 17:48:22 +0200 Subject: [PATCH 0047/1687] Increment Keeper API version --- src/Coordination/KeeperConstants.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperConstants.h b/src/Coordination/KeeperConstants.h index 4b5a5b54be0..afe3fcfb4c9 100644 --- a/src/Coordination/KeeperConstants.h +++ b/src/Coordination/KeeperConstants.h @@ -11,9 +11,10 @@ enum class KeeperApiVersion : uint8_t WITH_FILTERED_LIST, WITH_MULTI_READ, WITH_CHECK_NOT_EXISTS, + WITH_CREATE_IF_NOT_EXISTS }; -inline constexpr auto current_keeper_api_version = KeeperApiVersion::WITH_CHECK_NOT_EXISTS; +inline constexpr auto current_keeper_api_version = KeeperApiVersion::WITH_CREATE_IF_NOT_EXISTS; const std::string keeper_system_path = "/keeper"; const std::string keeper_api_version_path = keeper_system_path + "/api_version"; From 965f7850f8b7d5150bfedaffc11f9a5d4c2fe6fd Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 20 Apr 2023 13:26:52 +0200 Subject: [PATCH 0048/1687] Cleanup --- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 2 ++ src/Common/ZooKeeper/ZooKeeperCommon.h | 5 ++++- src/Coordination/KeeperStorage.cpp | 21 +++++++++++---------- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index c148b68b95e..89999f7d56b 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -941,6 +941,8 @@ void registerZooKeeperRequest(ZooKeeperRequestFactory & factory) res->operation_type = ZooKeeperMultiRequest::OperationType::Write; else if constexpr (num == OpNum::CheckNotExists) res->not_exists = true; + else if constexpr (num == OpNum::CreateIfNotExists) + res->not_exists = true; return res; }); diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index 5f00698423e..ee4eab1156c 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -199,10 +199,13 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest /// used only during restore from zookeeper log int32_t parent_cversion = -1; + /// should it fail if node already exists + bool not_exists = false; + ZooKeeperCreateRequest() = default; explicit ZooKeeperCreateRequest(const CreateRequest & base) : CreateRequest(base) {} - OpNum getOpNum() const override { return OpNum::Create; } + OpNum getOpNum() const override { return not_exists ? OpNum::CreateIfNotExists : OpNum::Create; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; std::string toStringImpl() const override; diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 825fcca42f5..8bfe1e667b4 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -917,6 +917,15 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr std::vector new_deltas; + if (zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists) { + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it != container.end()) + { + return new_deltas; + } + } + auto parent_path = parentPath(request.path); auto parent_node = storage.uncommitted_state.getNode(parent_path); if (parent_node == nullptr) @@ -1003,16 +1012,8 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); - if (zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists) { - Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*zk_request); - - auto & container = storage.container; - auto node_it = container.find(request.path); - if (node_it != container.end()) - { - response.error = Coordination::Error::ZOK; - return response_ptr; - } + if (storage.uncommitted_state.deltas.begin()->zxid != zxid) { + return response_ptr; } if (const auto result = storage.commit(zxid); result != Coordination::Error::ZOK) From 7e151428acb257dbaab24940351159c9b50b6a9c Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Fri, 21 Apr 2023 15:10:38 +0200 Subject: [PATCH 0049/1687] Lint --- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 89999f7d56b..61b9d6e5172 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -1,4 +1,5 @@ #include "Common/ZooKeeper/IKeeper.h" +#include "Common/ZooKeeper/ZooKeeperConstants.h" #include #include #include @@ -939,9 +940,7 @@ void registerZooKeeperRequest(ZooKeeperRequestFactory & factory) res->operation_type = ZooKeeperMultiRequest::OperationType::Read; else if constexpr (num == OpNum::Multi) res->operation_type = ZooKeeperMultiRequest::OperationType::Write; - else if constexpr (num == OpNum::CheckNotExists) - res->not_exists = true; - else if constexpr (num == OpNum::CreateIfNotExists) + else if constexpr (num == OpNum::CheckNotExists || num == OpNum::CreateIfNotExists) res->not_exists = true; return res; From 203276dfcc40b9d4f2b19a1eb0af430e612e8949 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Fri, 21 Apr 2023 15:18:39 +0200 Subject: [PATCH 0050/1687] Use `storage.uncommited_state` --- src/Coordination/KeeperStorage.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 8bfe1e667b4..308f6dcf815 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -918,12 +918,14 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr std::vector new_deltas; if (zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists) { - auto & container = storage.container; - auto node_it = container.find(request.path); - if (node_it != container.end()) - { + // auto & container = storage.container; + // auto node_it = container.find(request.path); + // if (node_it != container.end()) + // { + // return new_deltas; + // } + if (storage.uncommitted_state.getNode(request.path) != nullptr) return new_deltas; - } } auto parent_path = parentPath(request.path); From 2b94418dc3e3a891212a7a677e11930b987e2f34 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Fri, 21 Apr 2023 15:19:08 +0200 Subject: [PATCH 0051/1687] Add new OpNum to ZooKeeperLogElement --- src/Interpreters/ZooKeeperLog.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index 48f4d510af7..880b9d3686d 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -88,6 +88,7 @@ NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes() {"SessionID", static_cast(Coordination::OpNum::SessionID)}, {"FilteredList", static_cast(Coordination::OpNum::FilteredList)}, {"CheckNotExists", static_cast(Coordination::OpNum::CheckNotExists)}, + {"CreateIfNotExists", static_cast(Coordination::OpNum::CreateIfNotExists)}, }); auto error_enum = getCoordinationErrorCodesEnumType(); From bd2718c79dc0dbbd84b81f8086082f8164f887e7 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Mon, 8 May 2023 17:55:49 +0200 Subject: [PATCH 0052/1687] Fix comment --- src/Common/ZooKeeper/ZooKeeperCommon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index ee4eab1156c..264b2bb9606 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -199,7 +199,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest /// used only during restore from zookeeper log int32_t parent_cversion = -1; - /// should it fail if node already exists + /// should it succeed if node already exists bool not_exists = false; ZooKeeperCreateRequest() = default; From 3a3539b9965012752f955e956e12624b54083bd2 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Mon, 8 May 2023 18:22:35 +0200 Subject: [PATCH 0053/1687] Style fix --- src/Coordination/KeeperStorage.cpp | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 308f6dcf815..f368c11a2c6 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -917,16 +917,8 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr std::vector new_deltas; - if (zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists) { - // auto & container = storage.container; - // auto node_it = container.find(request.path); - // if (node_it != container.end()) - // { - // return new_deltas; - // } - if (storage.uncommitted_state.getNode(request.path) != nullptr) - return new_deltas; - } + if (zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists && storage.uncommitted_state.getNode(request.path) != nullptr) + return new_deltas; auto parent_path = parentPath(request.path); auto parent_node = storage.uncommitted_state.getNode(parent_path); @@ -1014,9 +1006,8 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); - if (storage.uncommitted_state.deltas.begin()->zxid != zxid) { + if (storage.uncommitted_state.deltas.begin()->zxid != zxid) return response_ptr; - } if (const auto result = storage.commit(zxid); result != Coordination::Error::ZOK) { From b61ffe3ff7873fe17a33d8ce90a795bd1d6879b6 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 9 May 2023 18:11:55 +0200 Subject: [PATCH 0054/1687] Merge checks --- src/Coordination/KeeperStorage.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index f368c11a2c6..53df5451e67 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -917,9 +917,6 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr std::vector new_deltas; - if (zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists && storage.uncommitted_state.getNode(request.path) != nullptr) - return new_deltas; - auto parent_path = parentPath(request.path); auto parent_node = storage.uncommitted_state.getNode(parent_path); if (parent_node == nullptr) @@ -949,7 +946,12 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr } if (storage.uncommitted_state.getNode(path_created)) + { + if (zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists) + return new_deltas; + return {KeeperStorage::Delta{zxid, Coordination::Error::ZNODEEXISTS}}; + } if (getBaseName(path_created).size == 0) return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; From b2324d723e02616d472f4b0c7622c99f4acd6296 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 30 May 2023 21:09:37 +0200 Subject: [PATCH 0055/1687] Use native `createIfNotExists` for `createAncestors` if available --- src/Common/ZooKeeper/IKeeper.h | 3 +++ src/Common/ZooKeeper/Types.h | 2 +- src/Common/ZooKeeper/ZooKeeper.cpp | 30 +++++++++++++++++++++++- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 8 ++++++- src/Common/ZooKeeper/ZooKeeperCommon.h | 11 +++++---- 5 files changed, 47 insertions(+), 7 deletions(-) diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 3eb5819df90..efc05fd7db1 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -199,6 +199,9 @@ struct CreateRequest : virtual Request bool is_sequential = false; ACLs acls; + /// should it succeed if node already exists + bool not_exists = false; + void addRootPath(const String & root_path) override; String getPath() const override { return path; } diff --git a/src/Common/ZooKeeper/Types.h b/src/Common/ZooKeeper/Types.h index 0309f56ad5b..d2876adaabc 100644 --- a/src/Common/ZooKeeper/Types.h +++ b/src/Common/ZooKeeper/Types.h @@ -29,7 +29,7 @@ using EventPtr = std::shared_ptr; template using AsyncResponses = std::vector>>; -Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode); +Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode, bool ignore_if_exists = false); Coordination::RequestPtr makeRemoveRequest(const std::string & path, int version); Coordination::RequestPtr makeSetRequest(const std::string & path, const std::string & data, int version); Coordination::RequestPtr makeCheckRequest(const std::string & path, int version); diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index a587ad6caf4..3357c75f50a 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -1,4 +1,5 @@ #include "ZooKeeper.h" +#include "Coordination/KeeperConstants.h" #include "ZooKeeperImpl.h" #include "KeeperException.h" #include "TestKeeper.h" @@ -351,6 +352,32 @@ void ZooKeeper::createIfNotExists(const std::string & path, const std::string & void ZooKeeper::createAncestors(const std::string & path) { size_t pos = 1; + + if (getApiVersion() >= DB::KeeperApiVersion::WITH_CREATE_IF_NOT_EXISTS) + { + Coordination::Requests create_ops; + + while (true) + { + pos = path.find('/', pos); + if (pos == std::string::npos) + break; + + auto request = makeCreateRequest(path.substr(0, pos), "", CreateMode::Persistent, true); + create_ops.emplace_back(request); + + ++pos; + } + + Coordination::Responses responses; + Coordination::Error code = multiImpl(create_ops, responses); + + if (code == Coordination::Error::ZOK) + return; + + throw KeeperException(code, path); + } + while (true) { pos = path.find('/', pos); @@ -1261,13 +1288,14 @@ void KeeperMultiException::check( } -Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode) +Coordination::RequestPtr makeCreateRequest(const std::string & path, const std::string & data, int create_mode, bool ignore_if_exists) { auto request = std::make_shared(); request->path = path; request->data = data; request->is_ephemeral = create_mode == CreateMode::Ephemeral || create_mode == CreateMode::EphemeralSequential; request->is_sequential = create_mode == CreateMode::PersistentSequential || create_mode == CreateMode::EphemeralSequential; + request->not_exists = ignore_if_exists; return request; } diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 61b9d6e5172..5a34f7cc8d5 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -660,7 +660,6 @@ void ZooKeeperMultiResponse::writeImpl(WriteBuffer & out) const ZooKeeperResponsePtr ZooKeeperHeartbeatRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperSyncRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperAuthRequest::makeResponse() const { return setTime(std::make_shared()); } -ZooKeeperResponsePtr ZooKeeperCreateRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperExistsRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperGetRequest::makeResponse() const { return setTime(std::make_shared()); } @@ -668,6 +667,13 @@ ZooKeeperResponsePtr ZooKeeperSetRequest::makeResponse() const { return setTime( ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperSimpleListRequest::makeResponse() const { return setTime(std::make_shared()); } +ZooKeeperResponsePtr ZooKeeperCreateRequest::makeResponse() const +{ + if (not_exists) + return setTime(std::make_shared()); + return setTime(std::make_shared()); +} + ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const { if (not_exists) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index 264b2bb9606..b79cbc204a0 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -199,9 +199,6 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest /// used only during restore from zookeeper log int32_t parent_cversion = -1; - /// should it succeed if node already exists - bool not_exists = false; - ZooKeeperCreateRequest() = default; explicit ZooKeeperCreateRequest(const CreateRequest & base) : CreateRequest(base) {} @@ -218,7 +215,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest void createLogElements(LogElements & elems) const override; }; -struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse +struct ZooKeeperCreateResponse : CreateResponse, ZooKeeperResponse { void readImpl(ReadBuffer & in) override; @@ -231,6 +228,12 @@ struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse void fillLogElements(LogElements & elems, size_t idx) const override; }; +struct ZooKeeperCreateIfNotExistsResponse : ZooKeeperCreateResponse +{ + OpNum getOpNum() const override { return OpNum::CreateIfNotExists; } + using ZooKeeperCreateResponse::ZooKeeperCreateResponse; +}; + struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest { ZooKeeperRemoveRequest() = default; From c645d5f5d1b01effd8253d172a9e6803e5a87619 Mon Sep 17 00:00:00 2001 From: Lirikl Date: Mon, 5 Jun 2023 02:21:50 +0300 Subject: [PATCH 0056/1687] add more comments --- src/Functions/array/arrayFold.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index e2d36903f36..8398ee19e4d 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -82,7 +82,7 @@ public: const ColumnArray * column_first_array = nullptr; ColumnsWithTypeAndName arrays; arrays.reserve(arguments.size() - 1); - + /// Valdate input types and get input array caolumns in convinient form for (size_t i = 1; i < arguments.size() - 1; ++i) { const auto & array_with_type_and_name = arguments[i]; @@ -135,15 +135,16 @@ public: size_t max_array_size = 0; const auto & offsets = column_first_array->getOffsets(); - //get columns of Nth array elements IColumn::Selector selector(data_row_count); size_t cur_ind = 0; ssize_t cur_arr = 0; + /// skip to the first non empty array if (data_row_count) while (offsets[cur_arr] == 0) ++cur_arr; + /// selector[i] is an index that i_th data element has in an array it corresponds to for (ssize_t i = 0; i < data_row_count; ++i) { selector[i] = cur_ind++; @@ -159,14 +160,20 @@ public: std::vector data_arrays; data_arrays.resize(array_count); + /// Split each data column to columns containing elements of only Nth index in array if (max_array_size > 0) for (size_t i = 0; i < array_count; ++i) data_arrays[i] = arrays[i].column->scatter(max_array_size, selector); size_t prev_size = rows_count; + IColumn::Permutation inverse_permutation(rows_count); size_t inverse_permutation_count = 0; + /**Current_column after each iteration contains value of accumulator after aplying values under indexes ind of arrays. + *At each iteration only rows of current_column with arrays that still has unaplied elements are kept. + *Discarded rows which contain finished calculations are added to result_data column and as we insert them we save their original row_number in inverse_permutation vector + */ for (size_t ind = 0; ind < max_array_size; ++ind) { IColumn::Selector prev_selector(prev_size); @@ -203,6 +210,8 @@ public: if (offsets[irow] - offsets[irow - 1] == max_array_size) inverse_permutation[inverse_permutation_count++] = irow; + /// We have result_data containing result for every row and inverse_permutation which contains indexes of rows in input it corresponds to. + /// Now we neead to invert inverse_permuation and apply it to result_data to get rows in right order. IColumn::Permutation perm(rows_count); for (ssize_t i = 0; i < rows_count; i++) perm[inverse_permutation[i]] = i; From 6e8ccafbb22813f7c736c3fd2f5363124985da3e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 5 Jun 2023 09:40:29 +0200 Subject: [PATCH 0057/1687] Fix test --- .../0_stateless/02735_system_zookeeper_connection.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02735_system_zookeeper_connection.reference b/tests/queries/0_stateless/02735_system_zookeeper_connection.reference index 1deabd88b88..2176580086b 100644 --- a/tests/queries/0_stateless/02735_system_zookeeper_connection.reference +++ b/tests/queries/0_stateless/02735_system_zookeeper_connection.reference @@ -1,2 +1,2 @@ -default ::1 9181 0 0 3 +default ::1 9181 0 0 4 zookeeper2 ::1 9181 0 0 0 From eb0e14b870108acf131456abee6acfab389bfa42 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Jun 2023 00:44:20 +0000 Subject: [PATCH 0058/1687] allow to replace long file names to hashes --- src/Common/SipHash.h | 11 ++++ src/DataTypes/Serializations/ISerialization.h | 1 + .../MergeTree/MergeTreeDataPartChecksum.cpp | 13 ++++ .../MergeTree/MergeTreeDataPartChecksum.h | 2 + .../MergeTree/MergeTreeDataPartWriterWide.cpp | 40 +++++++----- .../MergeTree/MergeTreeDataPartWriterWide.h | 4 ++ .../MergeTree/MergeTreeReaderWide.cpp | 61 +++++++++++-------- src/Storages/MergeTree/MergeTreeSettings.h | 2 + 8 files changed, 93 insertions(+), 41 deletions(-) diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 9e6479d81c1..e1cd5cc0aa3 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -284,6 +285,16 @@ inline UInt128 sipHash128(const char * data, const size_t size) return sipHash128Keyed(0, 0, data, size); } +inline String sipHash128String(const char * data, const size_t size) +{ + return getHexUIntLowercase(sipHash128(data, size)); +} + +inline String sipHash128String(const String & str) +{ + return sipHash128String(str.data(), str.size()); +} + inline UInt128 sipHash128ReferenceKeyed(UInt64 key0, UInt64 key1, const char * data, const size_t size) { SipHash hash(key0, key1, true); diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 17e6dfb85bc..ed090cefa38 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -368,6 +368,7 @@ public: static String getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path); static String getFileNameForStream(const String & name_in_storage, const SubstreamPath & path); + static String getSubcolumnNameForStream(const SubstreamPath & path); static String getSubcolumnNameForStream(const SubstreamPath & path, size_t prefix_len); diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 78f68ea72fe..2f97edd1a9c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -340,6 +341,18 @@ MergeTreeDataPartChecksums::Checksum::uint128 MergeTreeDataPartChecksums::getTot return ret; } +std::optional MergeTreeDataPartChecksums::getFileNameOrHash(const String & name) const +{ + if (files.contains(name + ".bin")) + return name; + + auto hash = sipHash128String(name); + if (files.contains(hash + ".bin")) + return hash; + + return std::nullopt; +} + void MinimalisticDataPartChecksums::serialize(WriteBuffer & to) const { writeString("checksums format version: 5\n", to); diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index db110043b74..626b0a90839 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -88,6 +88,8 @@ struct MergeTreeDataPartChecksums static MergeTreeDataPartChecksums deserializeFrom(const String & s); UInt64 getTotalSizeOnDisk() const; + + std::optional getFileNameOrHash(const String & name) const; }; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index f9fe6f2c8ab..60bb1119770 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -97,7 +97,15 @@ void MergeTreeDataPartWriterWide::addStreams( ISerialization::StreamCallback callback = [&](const auto & substream_path) { assert(!substream_path.empty()); - String stream_name = ISerialization::getFileNameForStream(column, substream_path); + + auto storage_settings = storage.getSettings(); + auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); + + String stream_name; + if (storage_settings->replace_long_file_name_to_hash && full_stream_name.size() > storage_settings->max_file_name_length) + stream_name = sipHash128String(full_stream_name); + else + stream_name = full_stream_name; /// Shared offsets for Nested type. if (column_streams.contains(stream_name)) @@ -126,12 +134,21 @@ void MergeTreeDataPartWriterWide::addStreams( marks_compression_codec, settings.marks_compress_block_size, settings.query_write_settings); + + full_name_to_stream_name.emplace(full_stream_name, stream_name); }; ISerialization::SubstreamPath path; data_part->getSerialization(column.name)->enumerateStreams(callback, column.type); } +const String & MergeTreeDataPartWriterWide::getStreamName( + const NameAndTypePair & column, + const ISerialization::SubstreamPath & substream_path) const +{ + auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); + return full_name_to_stream_name.at(full_stream_name); +} ISerialization::OutputStreamGetter MergeTreeDataPartWriterWide::createStreamGetter( const NameAndTypePair & column, WrittenOffsetColumns & offset_columns) const @@ -139,8 +156,7 @@ ISerialization::OutputStreamGetter MergeTreeDataPartWriterWide::createStreamGett return [&, this] (const ISerialization::SubstreamPath & substream_path) -> WriteBuffer * { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; - - String stream_name = ISerialization::getFileNameForStream(column, substream_path); + auto stream_name = getStreamName(column, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.contains(stream_name)) @@ -289,8 +305,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; - - String stream_name = ISerialization::getFileNameForStream(column, substream_path); + auto stream_name = getStreamName(column, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.contains(stream_name)) @@ -328,14 +343,13 @@ void MergeTreeDataPartWriterWide::writeSingleGranule( serialization->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; - - String stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); + auto stream_name = getStreamName(name_and_type, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.contains(stream_name)) return; - column_streams[stream_name]->compressed_hashing.nextIfAtEnd(); + column_streams.at(stream_name)->compressed_hashing.nextIfAtEnd(); }); } @@ -406,10 +420,7 @@ void MergeTreeDataPartWriterWide::writeColumn( { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (is_offsets) - { - String stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); - offset_columns.insert(stream_name); - } + offset_columns.insert(getStreamName(name_and_type, substream_path)); }); } @@ -656,10 +667,7 @@ void MergeTreeDataPartWriterWide::writeFinalMark( { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (is_offsets) - { - String stream_name = ISerialization::getFileNameForStream(column, substream_path); - offset_columns.insert(stream_name); - } + offset_columns.insert(getStreamName(column, substream_path)); }); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index 633b5119474..de7419fedb2 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -101,6 +101,7 @@ private: void adjustLastMarkIfNeedAndFlushToDisk(size_t new_rows_in_last_mark); ISerialization::OutputStreamGetter createStreamGetter(const NameAndTypePair & column, WrittenOffsetColumns & offset_columns) const; + const String & getStreamName(const NameAndTypePair & column, const ISerialization::SubstreamPath & substream_path) const; using SerializationState = ISerialization::SerializeBinaryBulkStatePtr; using SerializationStates = std::unordered_map; @@ -110,6 +111,9 @@ private: using ColumnStreams = std::map; ColumnStreams column_streams; + /// TODO: + std::unordered_map full_name_to_stream_name; + /// Non written marks to disk (for each column). Waiting until all rows for /// this marks will be written to disk. using MarksForColumns = std::unordered_map; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index baacfa55c94..cd641a5cd2a 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -198,13 +198,21 @@ size_t MergeTreeReaderWide::readRows( catch (...) { data_part_info_for_read->reportBroken(); - throw; } return read_rows; } +std::optional getStreamName( + const NameAndTypePair & column, + const ISerialization::SubstreamPath & substream_path, + const MergeTreeDataPartChecksums & checksums) +{ + auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); + return checksums.getFileNameOrHash(full_stream_name); +} + void MergeTreeReaderWide::addStreams( const NameAndTypePair & name_and_type, const SerializationPtr & serialization, @@ -216,35 +224,33 @@ void MergeTreeReaderWide::addStreams( ISerialization::StreamCallback callback = [&] (const ISerialization::SubstreamPath & substream_path) { - String stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); - - if (streams.contains(stream_name)) - { - has_any_stream = true; - return; - } - - bool data_file_exists = data_part_info_for_read->getChecksums().files.contains(stream_name + DATA_FILE_EXTENSION); + auto stream_name = getStreamName(name_and_type, substream_path, data_part_info_for_read->getChecksums()); /** If data file is missing then we will not try to open it. * It is necessary since it allows to add new column to structure of the table without creating new files for old parts. */ - if (!data_file_exists) + if (!stream_name) { has_all_streams = false; return; } + if (streams.contains(*stream_name)) + { + has_any_stream = true; + return; + } + has_any_stream = true; bool is_lc_dict = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys; auto context = data_part_info_for_read->getContext(); auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; - streams.emplace(stream_name, std::make_unique( - data_part_info_for_read, stream_name, DATA_FILE_EXTENSION, + streams.emplace(*stream_name, std::make_unique( + data_part_info_for_read, *stream_name, DATA_FILE_EXTENSION, data_part_info_for_read->getMarksCount(), all_mark_ranges, settings, mark_cache, - uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION), + uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION), &data_part_info_for_read->getIndexGranularityInfo(), profile_callback, clock_type, is_lc_dict, load_marks_threadpool)); }; @@ -255,13 +261,14 @@ void MergeTreeReaderWide::addStreams( partially_read_columns.insert(name_and_type.name); } - static ReadBuffer * getStream( bool seek_to_start, const ISerialization::SubstreamPath & substream_path, + const MergeTreeDataPartChecksums & checksums, MergeTreeReaderWide::FileStreams & streams, const NameAndTypePair & name_and_type, - size_t from_mark, bool seek_to_mark, + size_t from_mark, + bool seek_to_mark, size_t current_task_last_mark, ISerialization::SubstreamsCache & cache) { @@ -269,9 +276,12 @@ static ReadBuffer * getStream( if (cache.contains(ISerialization::getSubcolumnNameForStream(substream_path))) return nullptr; - String stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); + auto stream_name = getStreamName(name_and_type, substream_path, checksums); - auto it = streams.find(stream_name); + if (!stream_name) + return nullptr; + + auto it = streams.find(*stream_name); if (it == streams.end()) return nullptr; @@ -298,7 +308,7 @@ void MergeTreeReaderWide::deserializePrefix( ISerialization::DeserializeBinaryBulkSettings deserialize_settings; deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { - return getStream(/* seek_to_start = */true, substream_path, streams, name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); + return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); }; serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]); } @@ -317,15 +327,15 @@ void MergeTreeReaderWide::prefetchForColumn( serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - String stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); + auto stream_name = getStreamName(name_and_type, substream_path, data_part_info_for_read->getChecksums()); - if (!prefetched_streams.contains(stream_name)) + if (stream_name && !prefetched_streams.contains(*stream_name)) { bool seek_to_mark = !continue_reading; - if (ReadBuffer * buf = getStream(false, substream_path, streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) + if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) { buf->prefetch(priority); - prefetched_streams.insert(stream_name); + prefetched_streams.insert(*stream_name); } } }); @@ -348,8 +358,9 @@ void MergeTreeReaderWide::readData( bool seek_to_mark = !was_prefetched && !continue_reading; return getStream( - /* seek_to_start = */false, substream_path, streams, name_and_type, from_mark, - seek_to_mark, current_task_last_mark, cache); + /* seek_to_start = */false, substream_path, + data_part_info_for_read->getChecksums(), streams, + name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache); }; deserialize_settings.continuous_reading = continue_reading; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 5ea99009756..ae4d585e5fe 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -34,6 +34,8 @@ struct Settings; M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \ M(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ + M(Bool, replace_long_file_name_to_hash, false, "", 0) \ + M(UInt64, max_file_name_length, 128, "", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ From b30544a6ab6b773ed3dc7bd6c3cffcebbb6ae1b8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Jun 2023 01:39:45 +0000 Subject: [PATCH 0059/1687] allow to replace long file names to hashes --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 +++- .../MergeTree/IMergedBlockOutputStream.cpp | 8 +++-- .../MergeTree/MergeTreeDataPartChecksum.cpp | 4 +-- .../MergeTree/MergeTreeDataPartChecksum.h | 2 +- .../MergeTree/MergeTreeDataPartWide.cpp | 24 +++++++++----- .../MergeTree/MergeTreeReaderWide.cpp | 33 +++++++++---------- src/Storages/MergeTree/MergeTreeSettings.h | 4 +-- src/Storages/MergeTree/MutateTask.cpp | 15 ++++++--- src/Storages/MergeTree/checkDataPart.cpp | 13 +++++++- .../System/StorageSystemPartsColumns.cpp | 7 ++-- 10 files changed, 75 insertions(+), 41 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index d27b03fff44..dfc1fe0c262 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,4 +1,5 @@ #include "IMergeTreeDataPart.h" +#include "Common/SipHash.h" #include "Storages/MergeTree/IDataPartStorage.h" #include @@ -1015,7 +1016,10 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const { if (path_to_data_file.empty()) { - String candidate_path = /*fs::path(getRelativePath()) */ (ISerialization::getFileNameForStream(part_column, substream_path) + ".bin"); + auto candidate_path = ISerialization::getFileNameForStream(part_column, substream_path) + ".bin"; + + if (!getDataPartStorage().exists(candidate_path)) + candidate_path = sipHash128String(candidate_path) + ".bin"; /// We can have existing, but empty .bin files. Example: LowCardinality(Nullable(...)) columns and column_name.dict.null.bin file. if (getDataPartStorage().exists(candidate_path) && getDataPartStorage().getFileSize(candidate_path) != 0) diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index 21bead2864a..2df3b6d15a6 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -51,7 +51,9 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( data_part->getSerialization(column.name)->enumerateStreams( [&](const ISerialization::SubstreamPath & substream_path) { - ++stream_counts[ISerialization::getFileNameForStream(column.name, substream_path)]; + auto full_stream_name = ISerialization::getFileNameForStream(column.name, substream_path); + auto stream_name = checksums.getFileNameOrHash(full_stream_name); + ++stream_counts[stream_name]; }); } @@ -65,7 +67,9 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( ISerialization::StreamCallback callback = [&](const ISerialization::SubstreamPath & substream_path) { - String stream_name = ISerialization::getFileNameForStream(column_name, substream_path); + auto full_stream_name = ISerialization::getFileNameForStream(column_name, substream_path); + auto stream_name = checksums.getFileNameOrHash(full_stream_name); + /// Delete files if they are no longer shared with another column. if (--stream_counts[stream_name] == 0) { diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 2f97edd1a9c..7d39ea0707f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -341,7 +341,7 @@ MergeTreeDataPartChecksums::Checksum::uint128 MergeTreeDataPartChecksums::getTot return ret; } -std::optional MergeTreeDataPartChecksums::getFileNameOrHash(const String & name) const +String MergeTreeDataPartChecksums::getFileNameOrHash(const String & name) const { if (files.contains(name + ".bin")) return name; @@ -350,7 +350,7 @@ std::optional MergeTreeDataPartChecksums::getFileNameOrHash(const String if (files.contains(hash + ".bin")) return hash; - return std::nullopt; + return name; } void MinimalisticDataPartChecksums::serialize(WriteBuffer & to) const diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index 626b0a90839..2a38b52c72a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -89,7 +89,7 @@ struct MergeTreeDataPartChecksums UInt64 getTotalSizeOnDisk() const; - std::optional getFileNameOrHash(const String & name) const; + String getFileNameOrHash(const String & name) const; }; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index f44cbdd8628..645e16eed38 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -73,19 +73,20 @@ ColumnSize MergeTreeDataPartWide::getColumnSizeImpl( getSerialization(column.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - String file_name = ISerialization::getFileNameForStream(column, substream_path); + auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); + auto stream_name = checksums.getFileNameOrHash(full_stream_name); - if (processed_substreams && !processed_substreams->insert(file_name).second) + if (processed_substreams && !processed_substreams->insert(stream_name).second) return; - auto bin_checksum = checksums.files.find(file_name + ".bin"); + auto bin_checksum = checksums.files.find(stream_name + ".bin"); if (bin_checksum != checksums.files.end()) { size.data_compressed += bin_checksum->second.file_size; size.data_uncompressed += bin_checksum->second.uncompressed_size; } - auto mrk_checksum = checksums.files.find(file_name + getMarksFileExtension()); + auto mrk_checksum = checksums.files.find(stream_name + getMarksFileExtension()); if (mrk_checksum != checksums.files.end()) size.marks += mrk_checksum->second.file_size; }); @@ -185,9 +186,11 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const { getSerialization(name_type.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - String file_name = ISerialization::getFileNameForStream(name_type, substream_path); - String mrk_file_name = file_name + marks_file_extension; - String bin_file_name = file_name + DATA_FILE_EXTENSION; + String full_stream_name = ISerialization::getFileNameForStream(name_type, substream_path); + String stream_name = checksums.getFileNameOrHash(full_stream_name); + + String mrk_file_name = stream_name + marks_file_extension; + String bin_file_name = stream_name + DATA_FILE_EXTENSION; if (!checksums.files.contains(mrk_file_name)) throw Exception( @@ -213,6 +216,8 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const getSerialization(name_type.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { auto file_path = ISerialization::getFileNameForStream(name_type, substream_path) + marks_file_extension; + if (!getDataPartStorage().exists(file_path)) + file_path = sipHash128String(file_path) + marks_file_extension; /// Missing file is Ok for case when new column was added. if (getDataPartStorage().exists(file_path)) @@ -266,7 +271,10 @@ String MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & colum getSerialization(column.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { if (filename.empty()) - filename = ISerialization::getFileNameForStream(column, substream_path); + { + auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); + auto filname = checksums.getFileNameOrHash(full_stream_name); + } }); return filename; } diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index cd641a5cd2a..0ce20dc02f0 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -204,7 +204,7 @@ size_t MergeTreeReaderWide::readRows( return read_rows; } -std::optional getStreamName( +String getStreamName( const NameAndTypePair & column, const ISerialization::SubstreamPath & substream_path, const MergeTreeDataPartChecksums & checksums) @@ -226,18 +226,20 @@ void MergeTreeReaderWide::addStreams( { auto stream_name = getStreamName(name_and_type, substream_path, data_part_info_for_read->getChecksums()); - /** If data file is missing then we will not try to open it. - * It is necessary since it allows to add new column to structure of the table without creating new files for old parts. - */ - if (!stream_name) + if (streams.contains(stream_name)) { - has_all_streams = false; + has_any_stream = true; return; } - if (streams.contains(*stream_name)) + bool data_file_exists = data_part_info_for_read->getChecksums().files.contains(stream_name + DATA_FILE_EXTENSION); + + /** If data file is missing then we will not try to open it. + * It is necessary since it allows to add new column to structure of the table without creating new files for old parts. + */ + if (!data_file_exists) { - has_any_stream = true; + has_all_streams = false; return; } @@ -247,10 +249,10 @@ void MergeTreeReaderWide::addStreams( auto context = data_part_info_for_read->getContext(); auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; - streams.emplace(*stream_name, std::make_unique( - data_part_info_for_read, *stream_name, DATA_FILE_EXTENSION, + streams.emplace(stream_name, std::make_unique( + data_part_info_for_read, stream_name, DATA_FILE_EXTENSION, data_part_info_for_read->getMarksCount(), all_mark_ranges, settings, mark_cache, - uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION), + uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION), &data_part_info_for_read->getIndexGranularityInfo(), profile_callback, clock_type, is_lc_dict, load_marks_threadpool)); }; @@ -278,10 +280,7 @@ static ReadBuffer * getStream( auto stream_name = getStreamName(name_and_type, substream_path, checksums); - if (!stream_name) - return nullptr; - - auto it = streams.find(*stream_name); + auto it = streams.find(stream_name); if (it == streams.end()) return nullptr; @@ -329,13 +328,13 @@ void MergeTreeReaderWide::prefetchForColumn( { auto stream_name = getStreamName(name_and_type, substream_path, data_part_info_for_read->getChecksums()); - if (stream_name && !prefetched_streams.contains(*stream_name)) + if (!prefetched_streams.contains(stream_name)) { bool seek_to_mark = !continue_reading; if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) { buf->prefetch(priority); - prefetched_streams.insert(*stream_name); + prefetched_streams.insert(stream_name); } } }); diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index ae4d585e5fe..0d32567d2fa 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -34,8 +34,8 @@ struct Settings; M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \ M(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ - M(Bool, replace_long_file_name_to_hash, false, "", 0) \ - M(UInt64, max_file_name_length, 128, "", 0) \ + M(Bool, replace_long_file_name_to_hash, false, "If the file name for column is too long (more than 'max_file_name_length' bytes) replace it to SipHash128", 0) \ + M(UInt64, max_file_name_length, 128, "The maximal length of the file name to keep it as is without hashing", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 76096d00641..4bcaea53337 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1,3 +1,4 @@ +#include "Common/SipHash.h" #include #include @@ -591,7 +592,8 @@ static std::unordered_map getStreamCounts( { auto callback = [&](const ISerialization::SubstreamPath & substream_path) { - auto stream_name = ISerialization::getFileNameForStream(column_name, substream_path); + auto full_stream_name = ISerialization::getFileNameForStream(column_name, substream_path); + auto stream_name = data_part->checksums.getFileNameOrHash(full_stream_name); ++stream_counts[stream_name]; }; @@ -705,7 +707,9 @@ static NameToNameVector collectFilesForRenames( { ISerialization::StreamCallback callback = [&](const ISerialization::SubstreamPath & substream_path) { - String stream_name = ISerialization::getFileNameForStream({command.column_name, command.data_type}, substream_path); + auto full_stream_name = ISerialization::getFileNameForStream({command.column_name, command.data_type}, substream_path); + auto stream_name = source_part->checksums.getFileNameOrHash(full_stream_name); + /// Delete files if they are no longer shared with another column. if (--stream_counts[stream_name] == 0) { @@ -724,8 +728,11 @@ static NameToNameVector collectFilesForRenames( ISerialization::StreamCallback callback = [&](const ISerialization::SubstreamPath & substream_path) { - String stream_from = ISerialization::getFileNameForStream(command.column_name, substream_path); - String stream_to = boost::replace_first_copy(stream_from, escaped_name_from, escaped_name_to); + String full_stream_from = ISerialization::getFileNameForStream(command.column_name, substream_path); + String full_stream_to = boost::replace_first_copy(full_stream_from, escaped_name_from, escaped_name_to); + + String stream_from = source_part->checksums.getFileNameOrHash(full_stream_from); + String stream_to = stream_from == full_stream_from ? full_stream_to : sipHash128String(full_stream_to); if (stream_from != stream_to) { diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 00710ed3ed6..561f76d8b5f 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace CurrentMetrics @@ -30,6 +31,7 @@ namespace ErrorCodes extern const int CANNOT_MUNMAP; extern const int CANNOT_MREMAP; extern const int UNEXPECTED_FILE_IN_DATA_PART; + extern const int NO_FILE_IN_DATA_PART; } @@ -137,7 +139,16 @@ IMergeTreeDataPart::Checksums checkDataPart( { get_serialization(column)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - String file_name = ISerialization::getFileNameForStream(column, substream_path) + ".bin"; + auto file_name = ISerialization::getFileNameForStream(column, substream_path) + ".bin"; + + if (!data_part_storage.exists(file_name)) + file_name = sipHash128String(file_name); + + if (!data_part_storage.exists(file_name)) + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, + "There is no file for column '{}' in data part '{}'", + column.name, data_part->name); + checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); }); } diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 00b958b015f..de874b22e7e 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -261,16 +261,17 @@ void StorageSystemPartsColumns::processNextStorage( ColumnSize size; NameAndTypePair subcolumn(column.name, name, column.type, data.type); - String file_name = ISerialization::getFileNameForStream(subcolumn, subpath); + String full_stream_name = ISerialization::getFileNameForStream(subcolumn, subpath); + String stream_name = part->checksums.getFileNameOrHash(full_stream_name); - auto bin_checksum = part->checksums.files.find(file_name + ".bin"); + auto bin_checksum = part->checksums.files.find(stream_name + ".bin"); if (bin_checksum != part->checksums.files.end()) { size.data_compressed += bin_checksum->second.file_size; size.data_uncompressed += bin_checksum->second.uncompressed_size; } - auto mrk_checksum = part->checksums.files.find(file_name + part->index_granularity_info.mark_type.getFileExtension()); + auto mrk_checksum = part->checksums.files.find(stream_name + part->index_granularity_info.mark_type.getFileExtension()); if (mrk_checksum != part->checksums.files.end()) size.marks += mrk_checksum->second.file_size; From 0b06f247829d15fcf493d3d1804592ef0b9bd9c2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Jun 2023 01:40:34 +0000 Subject: [PATCH 0060/1687] temporarly enable hashing of names --- src/Storages/MergeTree/MergeTreeSettings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 0d32567d2fa..d63e33e2477 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -34,8 +34,8 @@ struct Settings; M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \ M(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ - M(Bool, replace_long_file_name_to_hash, false, "If the file name for column is too long (more than 'max_file_name_length' bytes) replace it to SipHash128", 0) \ - M(UInt64, max_file_name_length, 128, "The maximal length of the file name to keep it as is without hashing", 0) \ + M(Bool, replace_long_file_name_to_hash, true, "If the file name for column is too long (more than 'max_file_name_length' bytes) replace it to SipHash128", 0) \ + M(UInt64, max_file_name_length, 0, "The maximal length of the file name to keep it as is without hashing", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ From a8a561b28cd8f1f5835c0bce288755fbe0819928 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Jun 2023 01:54:01 +0000 Subject: [PATCH 0061/1687] fix typo --- src/Storages/MergeTree/MergeTreeDataPartWide.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 645e16eed38..04e672933a5 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -273,7 +273,7 @@ String MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & colum if (filename.empty()) { auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); - auto filname = checksums.getFileNameOrHash(full_stream_name); + filename = checksums.getFileNameOrHash(full_stream_name); } }); return filename; From 562ad9536669b9932cc196852354bfdb8f484402 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Jun 2023 18:01:11 +0000 Subject: [PATCH 0062/1687] fix getting the size of column --- src/Storages/MergeTree/MergeTreeDataPartWide.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 04e672933a5..f8627ec8073 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -257,8 +257,10 @@ bool MergeTreeDataPartWide::hasColumnFiles(const NameAndTypePair & column) const bool res = true; getSerialization(column.name)->enumerateStreams([&](const auto & substream_path) { - String file_name = ISerialization::getFileNameForStream(column, substream_path); - if (!check_stream_exists(file_name)) + auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); + auto stream_name = checksums.getFileNameOrHash(full_stream_name); + + if (!check_stream_exists(stream_name)) res = false; }); From 7e3b0c5f86046a03f30b484cc99481269d4dfebd Mon Sep 17 00:00:00 2001 From: Boris Kuschel Date: Thu, 8 Jun 2023 08:43:22 -0700 Subject: [PATCH 0063/1687] Add new exceptions to 4xx error --- src/Server/HTTPHandler.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 5a2bf0bad6c..09063d93a59 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -70,12 +70,15 @@ namespace ErrorCodes extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; extern const int CANNOT_OPEN_FILE; extern const int CANNOT_COMPILE_REGEXP; - + extern const int DUPLICATE_COLUMN; + extern const int ILLEGAL_COLUMN; + extern const int THERE_IS_NO_COLUMN; extern const int UNKNOWN_ELEMENT_IN_AST; extern const int UNKNOWN_TYPE_OF_AST_NODE; extern const int TOO_DEEP_AST; extern const int TOO_BIG_AST; extern const int UNEXPECTED_AST_STRUCTURE; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; extern const int SYNTAX_ERROR; @@ -186,7 +189,9 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti { return HTTPResponse::HTTP_FORBIDDEN; } - else if (exception_code == ErrorCodes::CANNOT_PARSE_TEXT || + else if (exception_code == ErrorCodes::BAD_ARGUMENTS || + exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP || + exception_code == ErrorCodes::CANNOT_PARSE_TEXT || exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE || exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING || exception_code == ErrorCodes::CANNOT_PARSE_DATE || @@ -196,14 +201,19 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti exception_code == ErrorCodes::CANNOT_PARSE_IPV4 || exception_code == ErrorCodes::CANNOT_PARSE_IPV6 || exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || + exception_code == ErrorCodes::CANNOT_PARSE_UUID || + exception_code == ErrorCodes::DUPLICATE_COLUMN || + exception_code == ErrorCodes::ILLEGAL_COLUMN || exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST || exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE || + exception_code == ErrorCodes::THERE_IS_NO_COLUMN || exception_code == ErrorCodes::TOO_DEEP_AST || exception_code == ErrorCodes::TOO_BIG_AST || exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE || exception_code == ErrorCodes::SYNTAX_ERROR || exception_code == ErrorCodes::INCORRECT_DATA || - exception_code == ErrorCodes::TYPE_MISMATCH) + exception_code == ErrorCodes::TYPE_MISMATCH || + exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE) { return HTTPResponse::HTTP_BAD_REQUEST; } From ae28c549a76f500a72426211a75ef2cfbb7eb7cd Mon Sep 17 00:00:00 2001 From: Boris Kuschel Date: Fri, 9 Jun 2023 09:10:06 -0700 Subject: [PATCH 0064/1687] Fix style --- src/Server/HTTPHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 09063d93a59..c99134c86d2 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -212,7 +212,7 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE || exception_code == ErrorCodes::SYNTAX_ERROR || exception_code == ErrorCodes::INCORRECT_DATA || - exception_code == ErrorCodes::TYPE_MISMATCH || + exception_code == ErrorCodes::TYPE_MISMATCH || exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE) { return HTTPResponse::HTTP_BAD_REQUEST; From 003565d08e1a4bf0d29711edbf8fe32a2d7ca36a Mon Sep 17 00:00:00 2001 From: Boris Kuschel Date: Mon, 12 Jun 2023 07:28:21 -0700 Subject: [PATCH 0065/1687] Add undefs --- src/Server/HTTPHandler.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index c99134c86d2..9906438f995 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -57,6 +57,7 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int CANNOT_PARSE_TEXT; extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; @@ -67,6 +68,7 @@ namespace ErrorCodes extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; extern const int CANNOT_PARSE_IPV4; extern const int CANNOT_PARSE_IPV6; + extern const int CANNOT_PARSE_UUID; extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; extern const int CANNOT_OPEN_FILE; extern const int CANNOT_COMPILE_REGEXP; From e8f7f1df6aab2f52b684dafd9b87dde168438e85 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 13 Jun 2023 15:23:12 +0000 Subject: [PATCH 0066/1687] Fixing test. --- .../02785_left_anti_join_bug.reference | 0 .../0_stateless/02785_left_anti_join_bug.sql | 15 +++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/02785_left_anti_join_bug.reference create mode 100644 tests/queries/0_stateless/02785_left_anti_join_bug.sql diff --git a/tests/queries/0_stateless/02785_left_anti_join_bug.reference b/tests/queries/0_stateless/02785_left_anti_join_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02785_left_anti_join_bug.sql b/tests/queries/0_stateless/02785_left_anti_join_bug.sql new file mode 100644 index 00000000000..7c23e0aa814 --- /dev/null +++ b/tests/queries/0_stateless/02785_left_anti_join_bug.sql @@ -0,0 +1,15 @@ + + +SET allow_suspicious_low_cardinality_types=1; + +CREATE TABLE test_table (`id` Float32, `value` Float32) ENGINE = MergeTree ORDER BY id; +INSERT INTO test_table VALUES (-10.75, 95.57); + +CREATE TABLE test_table__fuzz_3 (`id` LowCardinality(Nullable(Float32)), `value` Float32) ENGINE = MergeTree ORDER BY id SETTINGS allow_nullable_key=1; + +insert into test_table__fuzz_3 select * from generateRandom() limit 10; +SELECT * FROM (SELECT CAST('104857.5', 'Float32'), corr(NULL, id, id) AS corr_value FROM test_table__fuzz_3 GROUP BY value) AS subquery ANTI LEFT JOIN test_table ON subquery.corr_value = test_table.id format Null; + +DROP TABLE IF EXISTS test_table; +DROP TABLE IF EXISTS test_table__fuzz_3; + From 0d2b9fd0e5bb5ba50317260e7c3bcc2fea6f420c Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 22 Jun 2023 01:35:11 +0000 Subject: [PATCH 0067/1687] Add VALID UNTIL clause for users --- .../sql-reference/statements/create/user.md | 11 +++ src/Access/IAccessStorage.cpp | 8 +++ src/Access/User.cpp | 3 +- src/Access/User.h | 1 + .../Access/InterpreterCreateUserQuery.cpp | 38 ++++++++++- ...InterpreterShowCreateAccessEntityQuery.cpp | 8 +++ src/Parsers/Access/ASTCreateUserQuery.cpp | 8 +++ src/Parsers/Access/ASTCreateUserQuery.h | 2 + src/Parsers/Access/ParserCreateUserQuery.cpp | 23 +++++++ .../test_user_valid_until/__init__.py | 0 .../integration/test_user_valid_until/test.py | 68 +++++++++++++++++++ 11 files changed, 166 insertions(+), 4 deletions(-) create mode 100644 tests/integration/test_user_valid_until/__init__.py create mode 100644 tests/integration/test_user_valid_until/test.py diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index d168be63c36..b32fa8dbeb0 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -14,6 +14,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + [VALID UNTIL datetime] [DEFAULT ROLE role [,...]] [DEFAULT DATABASE database | NONE] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] @@ -135,6 +136,16 @@ Another way of specifying host is to use `@` syntax following the username. Exam ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so. ::: +## VALID UNTIL Clause + +Allows you to specify the expiration date and, optionally, the time for a user. It accepts a string as a parameter. It is recommended to use the `YYYY-MM-DD [hh:mm:ss] [timezone]` format for datetime. + +Examples: + +- `CREATE USER name1 VALID UNTIL '2025-01-01'` +- `CREATE USER name1 VALID UNTIL '2025-01-01 12:00:00 UTC'` +- `CREATE USER name1 VALID UNTIL 'infinity'` + ## GRANTEES Clause Specifies users or roles which are allowed to receive [privileges](../../../sql-reference/statements/grant.md#grant-privileges) from this user on the condition this user has also all required access granted with [GRANT OPTION](../../../sql-reference/statements/grant.md#grant-privigele-syntax). Options of the `GRANTEES` clause: diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 9468e8d220a..cb628c3e559 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -514,6 +514,14 @@ bool IAccessStorage::areCredentialsValid( if (credentials.getUserName() != user.getName()) return false; + if (user.valid_until) + { + const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + + if (now > user.valid_until) + return false; + } + return Authentication::areCredentialsValid(credentials, user.auth_data, external_authenticators); } diff --git a/src/Access/User.cpp b/src/Access/User.cpp index c5750cdcd6c..3b4055b6b1d 100644 --- a/src/Access/User.cpp +++ b/src/Access/User.cpp @@ -17,7 +17,8 @@ bool User::equal(const IAccessEntity & other) const const auto & other_user = typeid_cast(other); return (auth_data == other_user.auth_data) && (allowed_client_hosts == other_user.allowed_client_hosts) && (access == other_user.access) && (granted_roles == other_user.granted_roles) && (default_roles == other_user.default_roles) - && (settings == other_user.settings) && (grantees == other_user.grantees) && (default_database == other_user.default_database); + && (settings == other_user.settings) && (grantees == other_user.grantees) && (default_database == other_user.default_database) + && (valid_until == other_user.valid_until); } void User::setName(const String & name_) diff --git a/src/Access/User.h b/src/Access/User.h index 4b4bf90137f..e4ab654dafd 100644 --- a/src/Access/User.h +++ b/src/Access/User.h @@ -23,6 +23,7 @@ struct User : public IAccessEntity SettingsProfileElements settings; RolesOrUsersSet grantees = RolesOrUsersSet::AllTag{}; String default_database; + time_t valid_until = 0; bool equal(const IAccessEntity & other) const override; std::shared_ptr clone() const override { return cloneImpl(); } diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 165937560cc..fa68b1adc1a 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -10,6 +10,10 @@ #include #include #include +#include +#include +#include +#include namespace DB @@ -28,6 +32,7 @@ namespace const std::optional & override_default_roles, const std::optional & override_settings, const std::optional & override_grantees, + const std::optional & valid_until, bool allow_implicit_no_password, bool allow_no_password, bool allow_plaintext_password) @@ -61,6 +66,9 @@ namespace } } + if (valid_until) + user.valid_until = *valid_until; + if (override_name && !override_name->host_pattern.empty()) { user.allowed_client_hosts = AllowedClientHosts{}; @@ -116,6 +124,26 @@ BlockIO InterpreterCreateUserQuery::execute() if (query.auth_data) auth_data = AuthenticationData::fromAST(*query.auth_data, getContext(), !query.attach); + std::optional valid_until; + if (query.valid_until) + { + const ASTPtr valid_until_literal = evaluateConstantExpressionAsLiteral(query.valid_until, getContext()); + const String valid_until_str = checkAndGetLiteralArgument(valid_until_literal, "valid_until"); + + time_t time = 0; + + if (valid_until_str != "infinity") + { + const auto & time_zone = DateLUT::instance(""); + const auto & utc_time_zone = DateLUT::instance("UTC"); + + ReadBufferFromString in(valid_until_str); + parseDateTimeBestEffort(time, in, time_zone, utc_time_zone); + } + + valid_until = time; + } + std::optional default_roles_from_query; if (query.default_roles) { @@ -148,7 +176,9 @@ BlockIO InterpreterCreateUserQuery::execute() auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto updated_user = typeid_cast>(entity->clone()); - updateUserFromQueryImpl(*updated_user, query, auth_data, {}, default_roles_from_query, settings_from_query, grantees_from_query, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); + updateUserFromQueryImpl( + *updated_user, query, auth_data, {}, default_roles_from_query, settings_from_query, grantees_from_query, + valid_until, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); return updated_user; }; @@ -167,7 +197,9 @@ BlockIO InterpreterCreateUserQuery::execute() for (const auto & name : *query.names) { auto new_user = std::make_shared(); - updateUserFromQueryImpl(*new_user, query, auth_data, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); + updateUserFromQueryImpl( + *new_user, query, auth_data, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, + valid_until, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); new_users.emplace_back(std::move(new_user)); } @@ -201,7 +233,7 @@ void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreat if (query.auth_data) auth_data = AuthenticationData::fromAST(*query.auth_data, {}, !query.attach); - updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, allow_no_password, allow_plaintext_password, true); + updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, {}, allow_no_password, allow_plaintext_password, true); } } diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index 7292892d3c1..ec2e60b2ef7 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -65,6 +66,13 @@ namespace if (user.auth_data.getType() != AuthenticationType::NO_PASSWORD) query->auth_data = user.auth_data.toAST(); + if (user.valid_until) + { + WriteBufferFromOwnString out; + writeDateTimeText(user.valid_until, out); + query->valid_until = std::make_shared(out.str()); + } + if (!user.settings.empty()) { if (attach_mode) diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 0611545adf0..d73d6243b8f 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -24,6 +24,11 @@ namespace auth_data.format(settings); } + void formatValidUntil(const IAST & valid_until, const IAST::FormatSettings & settings) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " VALID UNTIL " << (settings.hilite ? IAST::hilite_none : ""); + valid_until.format(settings); + } void formatHosts(const char * prefix, const AllowedClientHosts & hosts, const IAST::FormatSettings & settings) { @@ -216,6 +221,9 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & if (auth_data) formatAuthenticationData(*auth_data, format); + if (valid_until) + formatValidUntil(*valid_until, format); + if (hosts) formatHosts(nullptr, *hosts, format); if (add_hosts) diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h index 62ddbfd0040..f75d9b03de6 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -58,6 +58,8 @@ public: std::shared_ptr default_database; + ASTPtr valid_until; + String getID(char) const override; ASTPtr clone() const override; void formatImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const override; diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 0344fb99c04..550d9756aec 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -363,6 +363,19 @@ namespace return true; }); } + + bool parseValidUntil(IParserBase::Pos & pos, Expected & expected, ASTPtr & valid_until) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (!ParserKeyword{"VALID UNTIL"}.ignore(pos, expected)) + return false; + + ParserStringAndSubstitution until_p; + + return until_p.parse(pos, valid_until, expected); + }); + } } @@ -413,6 +426,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec std::shared_ptr settings; std::shared_ptr grantees; std::shared_ptr default_database; + ASTPtr valid_until; String cluster; while (true) @@ -427,6 +441,11 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } } + if (!valid_until) + { + parseValidUntil(pos, expected, valid_until); + } + AllowedClientHosts new_hosts; if (parseHosts(pos, expected, "", new_hosts)) { @@ -514,10 +533,14 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->settings = std::move(settings); query->grantees = std::move(grantees); query->default_database = std::move(default_database); + query->valid_until = std::move(valid_until); if (query->auth_data) query->children.push_back(query->auth_data); + if (query->valid_until) + query->children.push_back(query->valid_until); + return true; } } diff --git a/tests/integration/test_user_valid_until/__init__.py b/tests/integration/test_user_valid_until/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_user_valid_until/test.py b/tests/integration/test_user_valid_until/test.py new file mode 100644 index 00000000000..787250e6005 --- /dev/null +++ b/tests/integration/test_user_valid_until/test.py @@ -0,0 +1,68 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node") + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_basic(started_cluster): + # 1. Without VALID UNTIL + node.query("CREATE USER user_basic") + + assert node.query("SHOW CREATE USER user_basic") == "CREATE USER user_basic\n" + assert node.query("SELECT 1", user="user_basic") == "1\n" + + # 2. With valid VALID UNTIL + node.query("ALTER USER user_basic VALID UNTIL '06/11/2040 08:03:20 Z+3'") + + assert ( + node.query("SHOW CREATE USER user_basic") + == "CREATE USER user_basic VALID UNTIL \\'2040-11-06 05:03:20\\'\n" + ) + assert node.query("SELECT 1", user="user_basic") == "1\n" + + # 3. With invalid VALID UNTIL + node.query("ALTER USER user_basic VALID UNTIL '06/11/2010 08:03:20 Z+3'") + + assert ( + node.query("SHOW CREATE USER user_basic") + == "CREATE USER user_basic VALID UNTIL \\'2010-11-06 05:03:20\\'\n" + ) + + error = "Authentication failed" + assert error in node.query_and_get_error("SELECT 1", user="user_basic") + + # 4. Reset VALID UNTIL + node.query("ALTER USER user_basic VALID UNTIL 'infinity'") + + assert node.query("SHOW CREATE USER user_basic") == "CREATE USER user_basic\n" + assert node.query("SELECT 1", user="user_basic") == "1\n" + + +def test_details(started_cluster): + # 1. Does not do anything + node.query("CREATE USER user_details_infinity VALID UNTIL 'infinity'") + + assert ( + node.query("SHOW CREATE USER user_details_infinity") + == "CREATE USER user_details_infinity\n" + ) + + # 2. Time only is not supported + node.query("CREATE USER user_details_time_only VALID UNTIL '22:03:40'") + + assert ( + node.query("SHOW CREATE USER user_details_time_only") + == "CREATE USER user_details_time_only VALID UNTIL \\'2000-01-01 22:03:40\\'\n" + ) From 8864e30d2eee950d5d4d6eaaa910754cb66b9343 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 22 Jun 2023 15:17:13 +0000 Subject: [PATCH 0068/1687] fix replacing column names after mutation --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 1 - .../MergeTree/IMergeTreeDataPartWriter.h | 2 +- .../MergeTree/MergeTreeDataPartChecksum.cpp | 37 ++++++------------- .../MergeTreeDataPartWriterCompact.cpp | 2 +- .../MergeTreeDataPartWriterCompact.h | 2 +- .../MergeTreeDataPartWriterInMemory.cpp | 2 +- .../MergeTreeDataPartWriterInMemory.h | 2 +- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 22 ++++++++--- .../MergeTree/MergeTreeDataPartWriterWide.h | 9 +++-- .../MergeTree/MergedBlockOutputStream.cpp | 6 ++- .../MergedColumnOnlyOutputStream.cpp | 10 +++-- .../configs/wide_parts_only.xml | 1 + .../configs/wide_parts_only.xml | 1 + .../test_filesystem_layout/test.py | 2 +- .../configs/wide_parts_only.xml | 1 + tests/integration/test_partition/test.py | 2 +- 16 files changed, 54 insertions(+), 48 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index fa33bef1582..289c41e5d10 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,5 +1,4 @@ #include "IMergeTreeDataPart.h" -#include "Common/SipHash.h" #include "Storages/MergeTree/IDataPartStorage.h" #include diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index fa3c675f7da..3f359904ddd 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -32,7 +32,7 @@ public: virtual void write(const Block & block, const IColumn::Permutation * permutation) = 0; - virtual void fillChecksums(IMergeTreeDataPart::Checksums & checksums) = 0; + virtual void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) = 0; virtual void finish(bool sync) = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 7d39ea0707f..5dc71147246 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -68,44 +68,35 @@ void MergeTreeDataPartChecksum::checkSize(const IDataPartStorage & storage, cons void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed) const { - for (const auto & it : rhs.files) - { - const String & name = it.first; - + for (const auto & [name, _] : rhs.files) if (!files.contains(name)) throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, "Unexpected file {} in data part", name); - } - for (const auto & it : files) + for (const auto & [name, checksum] : files) { - const String & name = it.first; - /// Exclude files written by inverted index from check. No correct checksums are available for them currently. if (name.ends_with(".gin_dict") || name.ends_with(".gin_post") || name.ends_with(".gin_seg") || name.ends_with(".gin_sid")) continue; - auto jt = rhs.files.find(name); - if (jt == rhs.files.end()) + auto it = rhs.files.find(name); + if (it == rhs.files.end()) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name); - it.second.checkEqual(jt->second, have_uncompressed, name); + checksum.checkEqual(it->second, have_uncompressed, name); } } void MergeTreeDataPartChecksums::checkSizes(const IDataPartStorage & storage) const { - for (const auto & it : files) - { - const String & name = it.first; - it.second.checkSize(storage, name); - } + for (const auto & [name, checksum] : files) + checksum.checkSize(storage, name); } UInt64 MergeTreeDataPartChecksums::getTotalSizeOnDisk() const { UInt64 res = 0; - for (const auto & it : files) - res += it.second.file_size; + for (const auto & [_, checksum] : files) + res += checksum.file_size; return res; } @@ -219,11 +210,8 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const writeVarUInt(files.size(), out); - for (const auto & it : files) + for (const auto & [name, sum] : files) { - const String & name = it.first; - const Checksum & sum = it.second; - writeBinary(name, out); writeVarUInt(sum.file_size, out); writePODBinary(sum.file_hash, out); @@ -256,11 +244,8 @@ void MergeTreeDataPartChecksums::add(MergeTreeDataPartChecksums && rhs_checksums void MergeTreeDataPartChecksums::computeTotalChecksumDataOnly(SipHash & hash) const { /// We use fact that iteration is in deterministic (lexicographical) order. - for (const auto & it : files) + for (const auto & [name, sum] : files) { - const String & name = it.first; - const Checksum & sum = it.second; - if (!endsWith(name, ".bin")) continue; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 0b650eb9f16..9b8f1155912 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -403,7 +403,7 @@ size_t MergeTreeDataPartWriterCompact::ColumnsBuffer::size() const return accumulated_columns.at(0)->size(); } -void MergeTreeDataPartWriterCompact::fillChecksums(IMergeTreeDataPart::Checksums & checksums) +void MergeTreeDataPartWriterCompact::fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & /*checksums_to_remove*/) { // If we don't have anything to write, skip finalization. if (!columns_list.empty()) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index 06f8122393f..b1cfefd2d8f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -22,7 +22,7 @@ public: void write(const Block & block, const IColumn::Permutation * permutation) override; - void fillChecksums(IMergeTreeDataPart::Checksums & checksums) override; + void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) override; void finish(bool sync) override; private: diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp index 9afa7a1e80d..048339b58c9 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp @@ -76,7 +76,7 @@ void MergeTreeDataPartWriterInMemory::calculateAndSerializePrimaryIndex(const Bl } } -void MergeTreeDataPartWriterInMemory::fillChecksums(IMergeTreeDataPart::Checksums & checksums) +void MergeTreeDataPartWriterInMemory::fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & /*checksums_to_remove*/) { /// If part is empty we still need to initialize block by empty columns. if (!part_in_memory->block) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h index 9e1e868beac..2d333822652 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h @@ -18,7 +18,7 @@ public: /// You can write only one block. In-memory part can be written only at INSERT. void write(const Block & block, const IColumn::Permutation * permutation) override; - void fillChecksums(IMergeTreeDataPart::Checksums & checksums) override; + void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) override; void finish(bool /*sync*/) override {} private: diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 60bb1119770..c9dae9a1f2c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -136,6 +136,7 @@ void MergeTreeDataPartWriterWide::addStreams( settings.query_write_settings); full_name_to_stream_name.emplace(full_stream_name, stream_name); + stream_name_to_full_name.emplace(stream_name, full_stream_name); }; ISerialization::SubstreamPath path; @@ -562,7 +563,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai } -void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums) +void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) { const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; @@ -598,10 +599,19 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum } } - for (auto & stream : column_streams) + for (auto & [stream_name, stream] : column_streams) { - stream.second->preFinalize(); - stream.second->addToChecksums(checksums); + /// Remove checksums for old stream name if file was + /// renamed due to replacing the name to the hash of name. + const auto & full_stream_name = stream_name_to_full_name.at(stream_name); + if (stream_name != full_stream_name) + { + checksums_to_remove.insert(full_stream_name + stream->data_file_extension); + checksums_to_remove.insert(full_stream_name + stream->marks_file_extension); + } + + stream->preFinalize(); + stream->addToChecksums(checksums); } } @@ -633,11 +643,11 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(bool sync) } -void MergeTreeDataPartWriterWide::fillChecksums(IMergeTreeDataPart::Checksums & checksums) +void MergeTreeDataPartWriterWide::fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) { // If we don't have anything to write, skip finalization. if (!columns_list.empty()) - fillDataChecksums(checksums); + fillDataChecksums(checksums, checksums_to_remove); if (settings.rewrite_primary_key) fillPrimaryIndexChecksums(checksums); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index de7419fedb2..c274fc9807c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -29,14 +29,14 @@ public: void write(const Block & block, const IColumn::Permutation * permutation) override; - void fillChecksums(IMergeTreeDataPart::Checksums & checksums) final; + void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) final; void finish(bool sync) final; private: /// Finish serialization of data: write final mark if required and compute checksums /// Also validate written data in debug mode - void fillDataChecksums(IMergeTreeDataPart::Checksums & checksums); + void fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove); void finishDataSerialization(bool sync); /// Write data of one column. @@ -111,8 +111,11 @@ private: using ColumnStreams = std::map; ColumnStreams column_streams; - /// TODO: + /// Some long column names may be replaced to hashes. + /// Below are mapping from original stream name to actual + /// stream name (probably hash of the stream) and vice versa. std::unordered_map full_name_to_stream_name; + std::unordered_map stream_name_to_full_name; /// Non written marks to disk (for each column). Waiting until all rows for /// this marks will be written to disk. diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index c93ad135835..1ebb1d87aae 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -142,12 +142,16 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( { /// Finish write and get checksums. MergeTreeData::DataPart::Checksums checksums; + NameSet checksums_to_remove; if (additional_column_checksums) checksums = std::move(*additional_column_checksums); /// Finish columns serialization. - writer->fillChecksums(checksums); + writer->fillChecksums(checksums, checksums_to_remove); + + for (const auto & name : checksums_to_remove) + checksums.files.erase(name); LOG_TRACE(&Poco::Logger::get("MergedBlockOutputStream"), "filled checksums {}", new_part->getNameWithState()); diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 3b2eb96f2d4..108f364fc2d 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -63,7 +63,11 @@ MergedColumnOnlyOutputStream::fillChecksums( { /// Finish columns serialization. MergeTreeData::DataPart::Checksums checksums; - writer->fillChecksums(checksums); + NameSet checksums_to_remove; + writer->fillChecksums(checksums, checksums_to_remove); + + for (const auto & filename : checksums_to_remove) + all_checksums.files.erase(filename); for (const auto & [projection_name, projection_part] : new_part->getProjectionParts()) checksums.addFile( @@ -80,9 +84,7 @@ MergedColumnOnlyOutputStream::fillChecksums( for (const String & removed_file : removed_files) { new_part->getDataPartStorage().removeFileIfExists(removed_file); - - if (all_checksums.files.contains(removed_file)) - all_checksums.files.erase(removed_file); + all_checksums.files.erase(removed_file); } new_part->setColumns(columns, serialization_infos, metadata_snapshot->getMetadataVersion()); diff --git a/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml b/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml index e9cf053f1c5..674ffff6c93 100644 --- a/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml +++ b/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml @@ -1,5 +1,6 @@ 0 + 0 diff --git a/tests/integration/test_default_compression_codec/configs/wide_parts_only.xml b/tests/integration/test_default_compression_codec/configs/wide_parts_only.xml index 10b9edef36d..4d1a3357799 100644 --- a/tests/integration/test_default_compression_codec/configs/wide_parts_only.xml +++ b/tests/integration/test_default_compression_codec/configs/wide_parts_only.xml @@ -2,5 +2,6 @@ 0 0 + 0 diff --git a/tests/integration/test_filesystem_layout/test.py b/tests/integration/test_filesystem_layout/test.py index 2be478f95d0..81f3b67cb75 100644 --- a/tests/integration/test_filesystem_layout/test.py +++ b/tests/integration/test_filesystem_layout/test.py @@ -23,7 +23,7 @@ def test_file_path_escaping(started_cluster): node.query( """ CREATE TABLE test.`T.a_b,l-e!` (`~Id` UInt32) - ENGINE = MergeTree() PARTITION BY `~Id` ORDER BY `~Id` SETTINGS min_bytes_for_wide_part = 0; + ENGINE = MergeTree() PARTITION BY `~Id` ORDER BY `~Id` SETTINGS min_bytes_for_wide_part = 0, replace_long_file_name_to_hash = 0; """ ) node.query("""INSERT INTO test.`T.a_b,l-e!` VALUES (1);""") diff --git a/tests/integration/test_mutations_hardlinks/configs/wide_parts_only.xml b/tests/integration/test_mutations_hardlinks/configs/wide_parts_only.xml index 10b9edef36d..4d1a3357799 100644 --- a/tests/integration/test_mutations_hardlinks/configs/wide_parts_only.xml +++ b/tests/integration/test_mutations_hardlinks/configs/wide_parts_only.xml @@ -2,5 +2,6 @@ 0 0 + 0 diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py index 93f03f4420e..7634c81f807 100644 --- a/tests/integration/test_partition/test.py +++ b/tests/integration/test_partition/test.py @@ -150,7 +150,7 @@ def partition_table_complex(started_cluster): q("DROP TABLE IF EXISTS test.partition_complex") q( "CREATE TABLE test.partition_complex (p Date, k Int8, v1 Int8 MATERIALIZED k + 1) " - "ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false" + "ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false, replace_long_file_name_to_hash = false" ) q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(31), 1)") q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(1), 2)") From 1745535cb1dc87f4065e4536d0ff71f43205b439 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Jun 2023 03:17:31 +0200 Subject: [PATCH 0069/1687] Add support for ALTER TABLE MODIFY COMMENT --- src/Parsers/ParserCreateQuery.cpp | 1 + .../MergeTree/registerStorageMergeTree.cpp | 1 + src/Storages/StorageMergeTree.cpp | 5 ++ src/Storages/StorageReplicatedMergeTree.cpp | 34 +++++++-- ...02792_alter_table_modify_comment.reference | 20 ++++++ .../02792_alter_table_modify_comment.sql | 69 +++++++++++++++++++ 6 files changed, 126 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02792_alter_table_modify_comment.reference create mode 100644 tests/queries/0_stateless/02792_alter_table_modify_comment.sql diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index adf3513ba40..11fc3ca4e29 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -449,6 +449,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) break; } + // If any part of storage definition is found create storage node if (!storage_like) return false; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 027cd1af7c9..d4bf6374c02 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -79,6 +79,7 @@ ORDER BY expr [SAMPLE BY expr] [TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...] [SETTINGS name=value, ...] +[COMMENT 'comment'] See details in documentation: https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/. Other engines of the family support different syntax, see details in the corresponding documentation topics. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index c02c96f62be..8e029214b06 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -326,6 +326,11 @@ void StorageMergeTree::alter( changeSettings(new_metadata.settings_changes, table_lock_holder); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata); } + else if (commands.isCommentAlter()) + { + setInMemoryMetadata(new_metadata); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata); + } else { if (!maybe_mutation_commands.empty() && maybe_mutation_commands.containBarrierCommand()) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index bb99e21e4ab..d4b85c72ccc 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5375,6 +5375,17 @@ void StorageReplicatedMergeTree::alter( return; } + if (commands.isCommentAlter()) + { + StorageInMemoryMetadata future_metadata = getInMemoryMetadata(); + commands.apply(future_metadata, query_context); + + setInMemoryMetadata(future_metadata); + + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, future_metadata); + return; + } + auto ast_to_str = [](ASTPtr query) -> String { if (!query) @@ -5444,12 +5455,27 @@ void StorageReplicatedMergeTree::alter( String new_columns_str = future_metadata.columns.toString(); ops.emplace_back(zkutil::makeSetRequest(fs::path(zookeeper_path) / "columns", new_columns_str, -1)); - if (ast_to_str(current_metadata->settings_changes) != ast_to_str(future_metadata.settings_changes)) + bool settings_are_changed = (ast_to_str(current_metadata->settings_changes) != ast_to_str(future_metadata.settings_changes)); + bool comment_is_changed = (current_metadata->comment != future_metadata.comment); + + if (settings_are_changed || comment_is_changed) { - /// Just change settings StorageInMemoryMetadata metadata_copy = *current_metadata; - metadata_copy.settings_changes = future_metadata.settings_changes; - changeSettings(metadata_copy.settings_changes, table_lock_holder); + + if (settings_are_changed) + { + /// Just change settings + metadata_copy.settings_changes = future_metadata.settings_changes; + changeSettings(metadata_copy.settings_changes, table_lock_holder); + } + + /// The comment is not replicated as of today, but we can implement it later. + if (comment_is_changed) + { + metadata_copy.setComment(future_metadata.comment); + setInMemoryMetadata(metadata_copy); + } + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, metadata_copy); } diff --git a/tests/queries/0_stateless/02792_alter_table_modify_comment.reference b/tests/queries/0_stateless/02792_alter_table_modify_comment.reference new file mode 100644 index 00000000000..b148993285c --- /dev/null +++ b/tests/queries/0_stateless/02792_alter_table_modify_comment.reference @@ -0,0 +1,20 @@ +Hello +World +Hello +World +Hello +World +Hello +World +Hello +World + +World + +World + +World + +World + +World diff --git a/tests/queries/0_stateless/02792_alter_table_modify_comment.sql b/tests/queries/0_stateless/02792_alter_table_modify_comment.sql new file mode 100644 index 00000000000..70c3d266bfe --- /dev/null +++ b/tests/queries/0_stateless/02792_alter_table_modify_comment.sql @@ -0,0 +1,69 @@ +DROP TABLE IF EXISTS t; + +# Memory, MergeTree, and ReplicatedMergeTree + +CREATE TABLE t (x UInt8) ENGINE = Memory COMMENT 'Hello'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +CREATE TABLE t (x UInt8) ENGINE = MergeTree ORDER BY () COMMENT 'Hello'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +# The case when there are many operations in one ALTER + +CREATE TABLE t (x UInt8) ENGINE = MergeTree ORDER BY () COMMENT 'Hello'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World', MODIFY COLUMN x UInt16; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +# Note that the table comment is not replicated. We can implement it later. + +CREATE TABLE t (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_comment_table1/t', '1') ORDER BY () COMMENT 'Hello'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t SYNC; + +CREATE TABLE t (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_comment_table2/t', '1') ORDER BY () COMMENT 'Hello'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World', MODIFY COLUMN x UInt16; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t SYNC; + +# The cases when there is no comment on creation + +CREATE TABLE t (x UInt8) ENGINE = Memory; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +CREATE TABLE t (x UInt8) ENGINE = MergeTree ORDER BY (); +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +CREATE TABLE t (x UInt8) ENGINE = MergeTree ORDER BY (); +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World', MODIFY COLUMN x UInt16; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t; + +CREATE TABLE t (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_comment_table3/t', '1') ORDER BY (); +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World'; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t SYNC; + +CREATE TABLE t (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_comment_table4/t', '1') ORDER BY (); +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +ALTER TABLE t MODIFY COMMENT 'World', MODIFY COLUMN x UInt16; +SELECT comment FROM system.tables WHERE database = currentDatabase() AND table = 't'; +DROP TABLE t SYNC; From 630e2e790787c60a818357d517abba8f95b8c90a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Jun 2023 03:22:21 +0200 Subject: [PATCH 0070/1687] Add documentation --- docs/en/sql-reference/statements/alter/comment.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/en/sql-reference/statements/alter/comment.md b/docs/en/sql-reference/statements/alter/comment.md index cc49c6abf80..8c374edd55a 100644 --- a/docs/en/sql-reference/statements/alter/comment.md +++ b/docs/en/sql-reference/statements/alter/comment.md @@ -57,3 +57,9 @@ Output of a removed comment: │ │ └─────────┘ ``` + +**Caveats** + +For Replicated tables, the comment can be different on different replicas. Modifying the comment applies to a single replica. + +The feature is available since the version 23.6. It does not work in previous ClickHouse versions. From 4aa73d341c5ff5d19f50f758f4c462d56187e739 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Jun 2023 05:14:09 +0200 Subject: [PATCH 0071/1687] Update test --- .../02020_alter_table_modify_comment.reference | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02020_alter_table_modify_comment.reference b/tests/queries/0_stateless/02020_alter_table_modify_comment.reference index a9c37eb2ba5..d2e74fd07f0 100644 --- a/tests/queries/0_stateless/02020_alter_table_modify_comment.reference +++ b/tests/queries/0_stateless/02020_alter_table_modify_comment.reference @@ -124,21 +124,21 @@ CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\n comment= Test table with comment change a comment -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'new comment on a table\' +comment= new comment on a table remove a comment -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192 +comment= add a comment back -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' +comment= another comment on a table detach table -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' re-attach table -CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'Test table with comment\' -comment= Test table with comment +CREATE TABLE default.comment_test_table\n(\n `k` UInt64,\n `s` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/02020_alter_table_modify_comment_default\', \'1\')\nORDER BY k\nSETTINGS index_granularity = 8192\nCOMMENT \'another comment on a table\' +comment= another comment on a table From 25053323d9f88e3794bfb0dbafbc72370548aea3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 26 Jun 2023 15:43:55 +0000 Subject: [PATCH 0072/1687] Test libunwind changes. --- contrib/libunwind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind b/contrib/libunwind index e48aa13f67d..09f93176072 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit e48aa13f67dc722511b5af33a32ba9b7748176b5 +Subproject commit 09f931760727a200c24e11f06ff0ad254502a304 From 014aa81e0bcf3713a84fd36e9ac485c45f5eafc6 Mon Sep 17 00:00:00 2001 From: frinkr Date: Tue, 27 Jun 2023 11:56:40 +0800 Subject: [PATCH 0073/1687] improve performace for huge ast by direct-writing the column name for ASTLiteral --- src/Parsers/ASTLiteral.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp index 5c76f6f33bf..14dc80ec8bc 100644 --- a/src/Parsers/ASTLiteral.cpp +++ b/src/Parsers/ASTLiteral.cpp @@ -86,8 +86,17 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const } else { - String column_name = applyVisitor(FieldVisitorToString(), value); - writeString(column_name, ostr); + /// Special case for huge AST. The `FieldVisitorToString` becomes expensive + /// for tons of literals. + if (value.getType() == Field::Types::String) + { + writeQuoted(value.get(), ostr); + } + else + { + String column_name = applyVisitor(FieldVisitorToString(), value); + writeString(column_name, ostr); + } } } From f7e20d884f7fb317b822d1f62edfe5fa6ddf80ba Mon Sep 17 00:00:00 2001 From: frinkr Date: Tue, 27 Jun 2023 16:32:19 +0800 Subject: [PATCH 0074/1687] update comments --- src/Parsers/ASTLiteral.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp index 14dc80ec8bc..6481fc6eb00 100644 --- a/src/Parsers/ASTLiteral.cpp +++ b/src/Parsers/ASTLiteral.cpp @@ -86,8 +86,8 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const } else { - /// Special case for huge AST. The `FieldVisitorToString` becomes expensive - /// for tons of literals. + /// Shortcut for huge AST. The `FieldVisitorToString` becomes expensive + /// for tons of literals as it creates temporary String. if (value.getType() == Field::Types::String) { writeQuoted(value.get(), ostr); From a9cb4d1b3d7a5bdf7571c79927d0aa269c06ecc8 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Tue, 27 Jun 2023 12:32:32 +0200 Subject: [PATCH 0075/1687] Better tar --- docker/packager/binary/build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 08a9b07f3ce..d962f385f1b 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -17,7 +17,8 @@ if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64 if [ "$EXPORT_SOURCES_WITH_SUBMODULES" = "1" ]; then - tar -c /build --exclude-vcs-ignores --exclude-vcs --exclude '/build/build' --exclude '/build/build_docker' --exclude '/build/debian' --exclude '/build/.cache' --exclude '/build/docs' --exclude '/build/tests/integration' | pigz -9 > /output/source_sub.tar.gz + cd /build + tar --exclude-vcs-ignores --exclude-vcs --exclude build --exclude build_docker --exclude debian --exclude .git --exclude .github --exclude .cache --exclude docs --exclude tests/integration -c . | pigz -9 > /output/source_sub.tar.gz fi fi From 16ef88c346f4c79e74a978c4cfb95d5879600068 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 27 Jun 2023 15:11:18 +0000 Subject: [PATCH 0076/1687] Update libunwind --- contrib/libunwind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind b/contrib/libunwind index 09f93176072..ad41616855c 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 09f931760727a200c24e11f06ff0ad254502a304 +Subproject commit ad41616855ce75b768a5ebb6975003a6ac959250 From 62feaf6e54cc00e5efc33e04e90432d46705b2e2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 27 Jun 2023 18:48:19 +0000 Subject: [PATCH 0077/1687] Update contrib. --- contrib/libunwind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind b/contrib/libunwind index ad41616855c..2a82f19272f 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit ad41616855ce75b768a5ebb6975003a6ac959250 +Subproject commit 2a82f19272f37d345001f6568ee32655aeb2e54d From 24e56259062c1f859da394ab4b7466e11461e4ca Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 6 Jul 2023 12:57:14 +0200 Subject: [PATCH 0078/1687] Use sparse submodules in darwin builds to reduce artifacts size --- .github/workflows/backport_branches.yml | 14 ++++++++++++++ .github/workflows/master.yml | 14 ++++++++++++++ .github/workflows/pull_request.yml | 14 ++++++++++++++ .github/workflows/release_branches.yml | 14 ++++++++++++++ 4 files changed, 56 insertions(+) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index d69168b01ee..5ba1c1ddb8d 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -398,6 +398,13 @@ jobs: clear-repository: true submodules: true fetch-depth: 0 # otherwise we will have no info about contributors + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -440,6 +447,13 @@ jobs: clear-repository: true submodules: true fetch-depth: 0 # otherwise we will have no info about contributors + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f0741b5465f..bf596450c80 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -580,6 +580,13 @@ jobs: clear-repository: true submodules: true fetch-depth: 0 # otherwise we will have no info about contributors + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -706,6 +713,13 @@ jobs: clear-repository: true submodules: true fetch-depth: 0 # otherwise we will have no info about contributors + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index afc08f3e637..e01a41ed2ab 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -647,6 +647,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -770,6 +777,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 21284815583..fe22f4dde8b 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -455,6 +455,13 @@ jobs: clear-repository: true submodules: true fetch-depth: 0 # otherwise we will have no info about contributors + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -497,6 +504,13 @@ jobs: clear-repository: true submodules: true fetch-depth: 0 # otherwise we will have no info about contributors + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" From 48c3c5a347236f741a449cf1414aac3850bc877e Mon Sep 17 00:00:00 2001 From: CuiShuoGuo <129303239+bakam412@users.noreply.github.com> Date: Thu, 20 Jul 2023 11:25:18 +0800 Subject: [PATCH 0079/1687] Update drop.md --- docs/en/sql-reference/statements/drop.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index b6208c2fd52..a8658dd49fc 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -32,6 +32,14 @@ Syntax: DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] [SYNC] ``` +You can also drop multiple tables at the same time. + +Syntax: + +```sql +DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name_1[,[db.]name_2][,[db.]name_3]... [ON CLUSTER cluster] [SYNC] +``` + ## DROP DICTIONARY Deletes the dictionary. From 0b4167cf102e0a2d3d7babb8a76f71123307bcc2 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 21 Jun 2023 10:33:11 +0800 Subject: [PATCH 0080/1687] rewrite uniq to count (cherry picked from commit b2522d81c42cd19e2b6c76e18ee36ba7a0624fe7) --- .../RewriteUinqToCountVisitor.cpp | 88 +++++++++++++++++++ src/Interpreters/RewriteUinqToCountVisitor.h | 29 ++++++ 2 files changed, 117 insertions(+) create mode 100644 src/Interpreters/RewriteUinqToCountVisitor.cpp create mode 100644 src/Interpreters/RewriteUinqToCountVisitor.h diff --git a/src/Interpreters/RewriteUinqToCountVisitor.cpp b/src/Interpreters/RewriteUinqToCountVisitor.cpp new file mode 100644 index 00000000000..f491bb08c88 --- /dev/null +++ b/src/Interpreters/RewriteUinqToCountVisitor.cpp @@ -0,0 +1,88 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace +{ + +static bool matchFnUniq(String func_name) +{ + auto name = Poco::toLower(func_name); + return name == "uniq" || name == "" +} + +} + +/// 'SELECT uniq(x) FROM (SELECT DISTINCT x ...)' to 'SELECT count() FROM (SELECT DISTINCT x ...)' +void RewriteUinqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) +{ + auto * selectq = ast->as(); + if (!selectq || !selectq->tables() || selectq->tables()->children.size() != 1) + return; + auto expr_list = selectq->select(); + if (!expr_list || expr_list->children.size() != 1) + return; + auto * func = expr_list->children[0]->as(); + if (!func || (Poco::toLower(func->name) != "uniq" && Poco::toLower(func->name) != "uniqexact")) + return; + auto arg = func->arguments->children; + if (arg.size() != 1) + return; + if (!arg[0]->as()) + return; + if (selectq->tables()->as()->children[0]->as()->children.size() != 1) + return; + auto * table_expr = selectq->tables()->as()->children[0]->as()->children[0]->as(); + if (!table_expr || table_expr->size() != 1 || !table_expr->database_and_table_name) + return; + // Check done, we now rewrite the AST + auto cloned_select_query = selectq->clone(); + expr_list->children[0] = makeASTFunction("count"); + + auto table_name = table_expr->database_and_table_name->as()->name(); + table_expr->children.clear(); + table_expr->children.emplace_back(std::make_shared()); + table_expr->database_and_table_name = nullptr; + table_expr->table_function = nullptr; + table_expr->subquery = table_expr->children[0]; + + auto column_name = arg[0]->as()->name(); + // Form AST for subquery + { + auto * select_ptr = cloned_select_query->as(); + select_ptr->refSelect()->children.clear(); + select_ptr->refSelect()->children.emplace_back(std::make_shared(column_name)); + auto exprlist = std::make_shared(); + exprlist->children.emplace_back(std::make_shared(column_name)); + cloned_select_query->as()->setExpression(ASTSelectQuery::Expression::GROUP_BY, exprlist); + + auto expr = std::make_shared(); + expr->children.emplace_back(cloned_select_query); + auto select_with_union = std::make_shared(); + select_with_union->union_mode = SelectUnionMode::UNION_DEFAULT; + select_with_union->is_normalized = false; + select_with_union->list_of_modes.clear(); + select_with_union->set_of_modes.clear(); + select_with_union->children.emplace_back(expr); + select_with_union->list_of_selects = expr; + table_expr->children[0]->as()->children.emplace_back(select_with_union); + } +} + +} diff --git a/src/Interpreters/RewriteUinqToCountVisitor.h b/src/Interpreters/RewriteUinqToCountVisitor.h new file mode 100644 index 00000000000..d7aa745352b --- /dev/null +++ b/src/Interpreters/RewriteUinqToCountVisitor.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include "Interpreters/TreeRewriter.h" + +namespace DB +{ + +class ASTFunction; + +/// Simple rewrite: +/// 'SELECT uniq(x) FROM (SELECT DISTINCT x ...)' to +/// 'SELECT count() FROM (SELECT DISTINCT x ...)' +/// +/// 'SELECT uniq() FROM (SELECT x ... GROUP BY x)' to +/// 'SELECT count() FROM (SELECT x ... GROUP BY x)' +/// +/// Note we can rewrite all uniq variants except uniqUpTo. +class RewriteUinqToCountMatcher +{ +public: + struct Data {}; + static void visit(ASTPtr & ast, Data &); + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } +}; + +using RewriteUinqToCountVisitor = InDepthNodeVisitor; +} From 1729a7437a0eb3eb47d7b642a26ec1240ddb38e2 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Sun, 25 Jun 2023 14:43:39 +0800 Subject: [PATCH 0081/1687] add settings --- src/Interpreters/InterpreterSelectQuery.cpp | 7 ++ .../RewriteUinqToCountVisitor.cpp | 88 ----------------- .../RewriteUniqToCountVisitor.cpp | 99 +++++++++++++++++++ ...tVisitor.h => RewriteUniqToCountVisitor.h} | 4 +- 4 files changed, 108 insertions(+), 90 deletions(-) delete mode 100644 src/Interpreters/RewriteUinqToCountVisitor.cpp create mode 100644 src/Interpreters/RewriteUniqToCountVisitor.cpp rename src/Interpreters/{RewriteUinqToCountVisitor.h => RewriteUniqToCountVisitor.h} (87%) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index fc3ea3a13ca..8402165b62b 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -426,6 +427,12 @@ InterpreterSelectQuery::InterpreterSelectQuery( RewriteCountDistinctFunctionVisitor(data_rewrite_countdistinct).visit(query_ptr); } + if (settings.optimize_uniq_to_count) + { + RewriteUniqToCountMatcher::Data data_rewrite_uniq_count; + RewriteUniqToCountVisitor(data_rewrite_uniq_count).visit(query_ptr); + } + JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery(), options.with_all_cols, options_.is_create_parameterized_view); bool got_storage_from_query = false; diff --git a/src/Interpreters/RewriteUinqToCountVisitor.cpp b/src/Interpreters/RewriteUinqToCountVisitor.cpp deleted file mode 100644 index f491bb08c88..00000000000 --- a/src/Interpreters/RewriteUinqToCountVisitor.cpp +++ /dev/null @@ -1,88 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace -{ - -static bool matchFnUniq(String func_name) -{ - auto name = Poco::toLower(func_name); - return name == "uniq" || name == "" -} - -} - -/// 'SELECT uniq(x) FROM (SELECT DISTINCT x ...)' to 'SELECT count() FROM (SELECT DISTINCT x ...)' -void RewriteUinqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) -{ - auto * selectq = ast->as(); - if (!selectq || !selectq->tables() || selectq->tables()->children.size() != 1) - return; - auto expr_list = selectq->select(); - if (!expr_list || expr_list->children.size() != 1) - return; - auto * func = expr_list->children[0]->as(); - if (!func || (Poco::toLower(func->name) != "uniq" && Poco::toLower(func->name) != "uniqexact")) - return; - auto arg = func->arguments->children; - if (arg.size() != 1) - return; - if (!arg[0]->as()) - return; - if (selectq->tables()->as()->children[0]->as()->children.size() != 1) - return; - auto * table_expr = selectq->tables()->as()->children[0]->as()->children[0]->as(); - if (!table_expr || table_expr->size() != 1 || !table_expr->database_and_table_name) - return; - // Check done, we now rewrite the AST - auto cloned_select_query = selectq->clone(); - expr_list->children[0] = makeASTFunction("count"); - - auto table_name = table_expr->database_and_table_name->as()->name(); - table_expr->children.clear(); - table_expr->children.emplace_back(std::make_shared()); - table_expr->database_and_table_name = nullptr; - table_expr->table_function = nullptr; - table_expr->subquery = table_expr->children[0]; - - auto column_name = arg[0]->as()->name(); - // Form AST for subquery - { - auto * select_ptr = cloned_select_query->as(); - select_ptr->refSelect()->children.clear(); - select_ptr->refSelect()->children.emplace_back(std::make_shared(column_name)); - auto exprlist = std::make_shared(); - exprlist->children.emplace_back(std::make_shared(column_name)); - cloned_select_query->as()->setExpression(ASTSelectQuery::Expression::GROUP_BY, exprlist); - - auto expr = std::make_shared(); - expr->children.emplace_back(cloned_select_query); - auto select_with_union = std::make_shared(); - select_with_union->union_mode = SelectUnionMode::UNION_DEFAULT; - select_with_union->is_normalized = false; - select_with_union->list_of_modes.clear(); - select_with_union->set_of_modes.clear(); - select_with_union->children.emplace_back(expr); - select_with_union->list_of_selects = expr; - table_expr->children[0]->as()->children.emplace_back(select_with_union); - } -} - -} diff --git a/src/Interpreters/RewriteUniqToCountVisitor.cpp b/src/Interpreters/RewriteUniqToCountVisitor.cpp new file mode 100644 index 00000000000..c3a9fb7547c --- /dev/null +++ b/src/Interpreters/RewriteUniqToCountVisitor.cpp @@ -0,0 +1,99 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace +{ + +static bool matchFnUniq(String func_name) +{ + auto name = Poco::toLower(func_name); + return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta"; +} + +class PrintTreeMatcher +{ +public: + struct Data {String prefix;}; + static void visit(ASTPtr & ast, Data &) + { + ast->getID(); + } + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } +}; + +using PrintTreeVisitor = InDepthNodeVisitor; + +} + +/// 'SELECT uniq(x) FROM (SELECT DISTINCT x ...)' to 'SELECT count() FROM (SELECT DISTINCT x ...)' +void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) +{ + auto * selectq = ast->as(); + if (!selectq || !selectq->tables() || selectq->tables()->children.size() != 1) + return; + auto expr_list = selectq->select(); + if (!expr_list || expr_list->children.size() != 1) + return; + auto * func = expr_list->children[0]->as(); + if (!func || !matchFnUniq(func->name)) + return; + if (selectq->tables()->as()->children[0]->as()->children.size() != 1) + return; + auto * table_expr = selectq->tables()->as()->children[0]->as()->children[0]->as(); + if (!table_expr || table_expr->children.size() != 1 || !table_expr->subquery) + return; + auto * subquery = table_expr->subquery->as(); + subquery->formatForLogging(0); + + // Check done, we now rewrite the AST + auto cloned_select_query = selectq->clone(); + expr_list->children[0] = makeASTFunction("count"); + +// auto table_name = table_expr->database_and_table_name->as()->name(); + table_expr->children.clear(); + table_expr->children.emplace_back(std::make_shared()); + table_expr->database_and_table_name = nullptr; + table_expr->table_function = nullptr; + table_expr->subquery = table_expr->children[0]; + +// auto column_name = arg[0]->as()->name(); +// // Form AST for subquery +// { +// auto * select_ptr = cloned_select_query->as(); +// select_ptr->refSelect()->children.clear(); +// select_ptr->refSelect()->children.emplace_back(std::make_shared(column_name)); +// auto exprlist = std::make_shared(); +// exprlist->children.emplace_back(std::make_shared(column_name)); +// cloned_select_query->as()->setExpression(ASTSelectQuery::Expression::GROUP_BY, exprlist); +// +// auto expr = std::make_shared(); +// expr->children.emplace_back(cloned_select_query); +// auto select_with_union = std::make_shared(); +// select_with_union->union_mode = SelectUnionMode::UNION_DEFAULT; +// select_with_union->is_normalized = false; +// select_with_union->list_of_modes.clear(); +// select_with_union->set_of_modes.clear(); +// select_with_union->children.emplace_back(expr); +// select_with_union->list_of_selects = expr; +// table_expr->children[0]->as()->children.emplace_back(select_with_union); +// } +} + +} diff --git a/src/Interpreters/RewriteUinqToCountVisitor.h b/src/Interpreters/RewriteUniqToCountVisitor.h similarity index 87% rename from src/Interpreters/RewriteUinqToCountVisitor.h rename to src/Interpreters/RewriteUniqToCountVisitor.h index d7aa745352b..f59206101d4 100644 --- a/src/Interpreters/RewriteUinqToCountVisitor.h +++ b/src/Interpreters/RewriteUniqToCountVisitor.h @@ -17,7 +17,7 @@ class ASTFunction; /// 'SELECT count() FROM (SELECT x ... GROUP BY x)' /// /// Note we can rewrite all uniq variants except uniqUpTo. -class RewriteUinqToCountMatcher +class RewriteUniqToCountMatcher { public: struct Data {}; @@ -25,5 +25,5 @@ public: static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } }; -using RewriteUinqToCountVisitor = InDepthNodeVisitor; +using RewriteUniqToCountVisitor = InDepthNodeVisitor; } From 1d022fe593e706e66540bb14d270938ddcda0490 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 28 Jun 2023 14:12:24 +0800 Subject: [PATCH 0082/1687] add some comments (cherry picked from commit 0dc1fdd67f8609564cda4ca61d17913d05d318c2) --- .../RewriteUniqToCountVisitor.cpp | 111 ++++++++++-------- src/Interpreters/RewriteUniqToCountVisitor.h | 6 +- 2 files changed, 68 insertions(+), 49 deletions(-) diff --git a/src/Interpreters/RewriteUniqToCountVisitor.cpp b/src/Interpreters/RewriteUniqToCountVisitor.cpp index c3a9fb7547c..d608b6dfae4 100644 --- a/src/Interpreters/RewriteUniqToCountVisitor.cpp +++ b/src/Interpreters/RewriteUniqToCountVisitor.cpp @@ -4,15 +4,11 @@ #include #include #include -#include #include #include #include #include -#include -#include #include -#include namespace DB @@ -21,28 +17,48 @@ namespace DB namespace { -static bool matchFnUniq(String func_name) +bool matchFnUniq(String func_name) { auto name = Poco::toLower(func_name); - return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta"; + return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined" || name == "uniqCombined64"; } -class PrintTreeMatcher +bool expressionListEquals(ASTExpressionList * lhs, ASTExpressionList * rhs) { -public: - struct Data {String prefix;}; - static void visit(ASTPtr & ast, Data &) + if (!lhs || !rhs) + return false; + if (lhs->children.size() != rhs->children.size()) + return false; + for (size_t i = 0; i < lhs->children.size(); i++) { - ast->getID(); + if (lhs->children[i]->formatForLogging() != rhs->children[i]->formatForLogging()) // TODO not a elegant way + return false; } - static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } -}; + return true; +} -using PrintTreeVisitor = InDepthNodeVisitor; +/// Test whether lhs contains all expr in rhs. +bool expressionListContainsAll(ASTExpressionList * lhs, ASTExpressionList * rhs) +{ + if (!lhs || !rhs) + return false; + if (lhs->children.size() < rhs->children.size()) + return false; + std::vector lhs_strs; + for (const auto & le : lhs->children) + { + lhs_strs.emplace_back(le->formatForLogging()); + } + for (const auto & re : rhs->children) + { + if (std::find(lhs_strs.begin(), lhs_strs.end(), re->formatForLogging()) != lhs_strs.end()) + return false; + } + return true; +} } -/// 'SELECT uniq(x) FROM (SELECT DISTINCT x ...)' to 'SELECT count() FROM (SELECT DISTINCT x ...)' void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) { auto * selectq = ast->as(); @@ -60,40 +76,43 @@ void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) if (!table_expr || table_expr->children.size() != 1 || !table_expr->subquery) return; auto * subquery = table_expr->subquery->as(); - subquery->formatForLogging(0); + if (!subquery) + return; + auto * sub_selectq = subquery->children[0]->as()->children[0]->as()->children[0]->as(); + if (!sub_selectq) + return; - // Check done, we now rewrite the AST - auto cloned_select_query = selectq->clone(); - expr_list->children[0] = makeASTFunction("count"); + auto match_distinct = [&]() -> bool + { + if (!sub_selectq->distinct) + return false; + auto sub_expr_list = sub_selectq->select(); + if (!sub_expr_list) + return false; + /// uniq expression list == subquery group by expression list + if (!expressionListEquals(func->children[0]->as(), sub_expr_list->as())) + return false; + return true; + }; -// auto table_name = table_expr->database_and_table_name->as()->name(); - table_expr->children.clear(); - table_expr->children.emplace_back(std::make_shared()); - table_expr->database_and_table_name = nullptr; - table_expr->table_function = nullptr; - table_expr->subquery = table_expr->children[0]; + auto match_group_by = [&]() -> bool + { + auto group_by = sub_selectq->groupBy(); + if (!group_by) + return false; + auto sub_expr_list = sub_selectq->select(); + if (!sub_expr_list) + return false; + /// uniq expression list == subquery group by expression list + if (!expressionListEquals(func->children[0]->as(), group_by->as())) + return false; + /// subquery select expression list must contain all columns in uniq expression list + expressionListContainsAll(sub_expr_list->as(), func->children[0]->as()); + return true; + }; -// auto column_name = arg[0]->as()->name(); -// // Form AST for subquery -// { -// auto * select_ptr = cloned_select_query->as(); -// select_ptr->refSelect()->children.clear(); -// select_ptr->refSelect()->children.emplace_back(std::make_shared(column_name)); -// auto exprlist = std::make_shared(); -// exprlist->children.emplace_back(std::make_shared(column_name)); -// cloned_select_query->as()->setExpression(ASTSelectQuery::Expression::GROUP_BY, exprlist); -// -// auto expr = std::make_shared(); -// expr->children.emplace_back(cloned_select_query); -// auto select_with_union = std::make_shared(); -// select_with_union->union_mode = SelectUnionMode::UNION_DEFAULT; -// select_with_union->is_normalized = false; -// select_with_union->list_of_modes.clear(); -// select_with_union->set_of_modes.clear(); -// select_with_union->children.emplace_back(expr); -// select_with_union->list_of_selects = expr; -// table_expr->children[0]->as()->children.emplace_back(select_with_union); -// } + if (match_distinct() || match_group_by()) + expr_list->children[0] = makeASTFunction("count"); } } diff --git a/src/Interpreters/RewriteUniqToCountVisitor.h b/src/Interpreters/RewriteUniqToCountVisitor.h index f59206101d4..42a86049bb9 100644 --- a/src/Interpreters/RewriteUniqToCountVisitor.h +++ b/src/Interpreters/RewriteUniqToCountVisitor.h @@ -10,11 +10,11 @@ namespace DB class ASTFunction; /// Simple rewrite: -/// 'SELECT uniq(x) FROM (SELECT DISTINCT x ...)' to +/// 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to /// 'SELECT count() FROM (SELECT DISTINCT x ...)' /// -/// 'SELECT uniq() FROM (SELECT x ... GROUP BY x)' to -/// 'SELECT count() FROM (SELECT x ... GROUP BY x)' +/// 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to +/// 'SELECT count() FROM (SELECT x ... GROUP BY x ...)' /// /// Note we can rewrite all uniq variants except uniqUpTo. class RewriteUniqToCountMatcher From a33a6dafacbc6b9df1197a1f7d0b2e715b87361e Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 7 Jul 2023 17:03:37 +0800 Subject: [PATCH 0083/1687] rewrite support alias --- src/Core/Settings.h | 34 +++---- .../RewriteUniqToCountVisitor.cpp | 97 +++++++++++++------ 2 files changed, 81 insertions(+), 50 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c69d132ea25..29ea7fe5cb9 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -127,9 +127,8 @@ class IColumn; \ M(Bool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \ M(Bool, optimize_move_to_prewhere_if_final, false, "If query has `FINAL`, the optimization `move_to_prewhere` is not always correct and it is enabled only if both settings `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are turned on", 0) \ - M(Bool, move_all_conditions_to_prewhere, true, "Move all viable conditions from WHERE to PREWHERE", 0) \ - M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \ - M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \ + M(Bool, move_all_conditions_to_prewhere, false, "Move all viable conditions from WHERE to PREWHERE", 0) \ + M(Bool, enable_multiple_prewhere_read_steps, false, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \ \ M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \ M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \ @@ -276,7 +275,7 @@ class IColumn; \ M(UInt64, http_headers_progress_interval_ms, 100, "Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval.", 0) \ M(Bool, http_wait_end_of_query, false, "Enable HTTP response buffering on the server-side.", 0) \ - M(UInt64, http_response_buffer_size, 0, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \ + M(UInt64, http_response_buffer_size, false, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \ \ M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ \ @@ -534,6 +533,7 @@ class IColumn; M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \ M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \ M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \ + M(Bool, optimize_duplicate_order_by_and_distinct, false, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \ M(Bool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \ M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ @@ -577,7 +577,6 @@ class IColumn; M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) ALIAS(allow_experimental_projection_optimization) \ - M(Bool, optimize_use_implicit_projections, true, "Automatically choose implicit projections to perform SELECT query", 0) \ M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \ @@ -622,7 +621,6 @@ class IColumn; M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \ M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \ M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \ - M(Bool, disable_url_encoding, false, " Allows to disable decoding/encoding path in uri in URL table engine", 0) \ M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \ @@ -631,7 +629,7 @@ class IColumn; M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \ - M(UInt64, distributed_ddl_entry_format_version, 5, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \ + M(UInt64, distributed_ddl_entry_format_version, 3, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \ \ M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \ M(UInt64, external_storage_max_read_bytes, 0, "Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \ @@ -659,8 +657,7 @@ class IColumn; M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \ M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \ \ - M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ - M(UInt64, function_sleep_max_microseconds_per_block, 3000000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \ + M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \ @@ -674,8 +671,8 @@ class IColumn; M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \ M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \ M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \ - M(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, "Whether to use only prewhere columns size to determine reading task size.", 0) \ \ + M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \ M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \ @@ -739,7 +736,7 @@ class IColumn; M(String, workload, "default", "Name of workload to be used to access resources", 0) \ M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \ \ - M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \ + M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \ \ M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \ M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \ @@ -762,7 +759,7 @@ class IColumn; /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ - M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ + M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ @@ -776,8 +773,8 @@ class IColumn; M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \ - M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ - M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\ + M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \ + M(Bool, optimize_uniq_to_count, false, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause, it is a RBO based optimization.", 0) // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. @@ -821,7 +818,6 @@ class IColumn; MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_distributed_schedule_pool_size, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \ - MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, async_insert_threads, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0) \ /* ---- */ \ @@ -833,7 +829,6 @@ class IColumn; MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \ MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \ MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \ - MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \ /** The section above is for obsolete settings. Do not add anything there. */ @@ -878,7 +873,6 @@ class IColumn; M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \ M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \ M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \ - M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \ M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \ M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ @@ -913,7 +907,6 @@ class IColumn; \ M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \ M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \ - M(IntervalOutputFormat, interval_output_format, FormatSettings::IntervalOutputFormat::Numeric, "Textual representation of Interval. Possible values: 'kusto', 'numeric'.", 0) \ \ M(Bool, input_format_ipv4_default_on_conversion_error, false, "Deserialization of IPv4 will use default values instead of throwing exception on conversion error.", 0) \ M(Bool, input_format_ipv6_default_on_conversion_error, false, "Deserialization of IPV6 will use default values instead of throwing exception on conversion error.", 0) \ @@ -954,10 +947,6 @@ class IColumn; M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \ M(ParquetCompression, output_format_parquet_compression_method, "lz4", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \ M(Bool, output_format_parquet_compliant_nested_types, true, "In parquet file schema, use name 'element' instead of 'item' for list elements. This is a historical artifact of Arrow library implementation. Generally increases compatibility, except perhaps with some old versions of Arrow.", 0) \ - M(Bool, output_format_parquet_use_custom_encoder, true, "Use experimental faster Parquet encoder implementation.", 0) \ - M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \ - M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, "Target page size in bytes, before compression.", 0) \ - M(UInt64, output_format_parquet_batch_size, 1024, "Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ @@ -1023,7 +1012,6 @@ class IColumn; M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ - M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Interpreters/RewriteUniqToCountVisitor.cpp b/src/Interpreters/RewriteUniqToCountVisitor.cpp index d608b6dfae4..587a905e4c5 100644 --- a/src/Interpreters/RewriteUniqToCountVisitor.cpp +++ b/src/Interpreters/RewriteUniqToCountVisitor.cpp @@ -14,16 +14,49 @@ namespace DB { +using Aliases = std::unordered_map; + namespace { bool matchFnUniq(String func_name) { auto name = Poco::toLower(func_name); - return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined" || name == "uniqCombined64"; + return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined" + || name == "uniqCombined64"; } -bool expressionListEquals(ASTExpressionList * lhs, ASTExpressionList * rhs) +bool expressionEquals(const ASTPtr & lhs, const ASTPtr & rhs, Aliases & alias) +{ + if (lhs->getTreeHash() == rhs->getTreeHash()) + { + return true; + } + else + { + auto * lhs_idf = lhs->as(); + auto * rhs_idf = rhs->as(); + if (lhs_idf && rhs_idf) + { + /// compound identifiers, such as: + if (lhs_idf->shortName() == rhs_idf->shortName()) + return true; + + /// translate alias + if (alias.find(lhs_idf->shortName()) != alias.end()) + lhs_idf = alias.find(lhs_idf->shortName())->second->as(); + + if (alias.find(rhs_idf->shortName()) != alias.end()) + rhs_idf = alias.find(rhs_idf->shortName())->second->as(); + + if (lhs_idf->shortName() == rhs_idf->shortName()) + return true; + } + } + return false; +} + +bool expressionListEquals(ASTExpressionList * lhs, ASTExpressionList * rhs, Aliases & alias) { if (!lhs || !rhs) return false; @@ -31,27 +64,23 @@ bool expressionListEquals(ASTExpressionList * lhs, ASTExpressionList * rhs) return false; for (size_t i = 0; i < lhs->children.size(); i++) { - if (lhs->children[i]->formatForLogging() != rhs->children[i]->formatForLogging()) // TODO not a elegant way + if (!expressionEquals(lhs->children[i], rhs->children[i], alias)) return false; } return true; } -/// Test whether lhs contains all expr in rhs. -bool expressionListContainsAll(ASTExpressionList * lhs, ASTExpressionList * rhs) +/// Test whether lhs contains all expressions in rhs. +bool expressionListContainsAll(ASTExpressionList * lhs, ASTExpressionList * rhs, Aliases alias) { if (!lhs || !rhs) return false; if (lhs->children.size() < rhs->children.size()) return false; - std::vector lhs_strs; - for (const auto & le : lhs->children) - { - lhs_strs.emplace_back(le->formatForLogging()); - } for (const auto & re : rhs->children) { - if (std::find(lhs_strs.begin(), lhs_strs.end(), re->formatForLogging()) != lhs_strs.end()) + auto predicate = [&re, &alias](ASTPtr & le) { return expressionEquals(le, re, alias); }; + if (std::find_if(lhs->children.begin(), lhs->children.end(), predicate) == lhs->children.end()) return false; } return true; @@ -72,46 +101,60 @@ void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) return; if (selectq->tables()->as()->children[0]->as()->children.size() != 1) return; - auto * table_expr = selectq->tables()->as()->children[0]->as()->children[0]->as(); + auto * table_expr = selectq->tables() + ->as() + ->children[0] + ->as() + ->children[0] + ->as(); if (!table_expr || table_expr->children.size() != 1 || !table_expr->subquery) return; auto * subquery = table_expr->subquery->as(); if (!subquery) return; - auto * sub_selectq = subquery->children[0]->as()->children[0]->as()->children[0]->as(); + auto * sub_selectq = subquery->children[0] + ->as()->children[0] + ->as()->children[0] + ->as(); if (!sub_selectq) return; + auto sub_expr_list = sub_selectq->select(); + if (!sub_expr_list) + return; - auto match_distinct = [&]() -> bool + /// collect subquery select expressions alias + std::unordered_map alias; + for (auto expr : sub_expr_list->children) + { + if (!expr->tryGetAlias().empty()) + alias.insert({expr->tryGetAlias(), expr}); + } + + auto match_subquery_with_distinct = [&]() -> bool { if (!sub_selectq->distinct) return false; - auto sub_expr_list = sub_selectq->select(); - if (!sub_expr_list) - return false; /// uniq expression list == subquery group by expression list - if (!expressionListEquals(func->children[0]->as(), sub_expr_list->as())) + if (!expressionListEquals(func->children[0]->as(), sub_expr_list->as(), alias)) return false; return true; }; - auto match_group_by = [&]() -> bool + auto match_subquery_with_group_by = [&]() -> bool { - auto group_by = sub_selectq->groupBy(); + auto group_by = sub_selectq->groupBy(); // TODO group by type if (!group_by) return false; - auto sub_expr_list = sub_selectq->select(); - if (!sub_expr_list) - return false; - /// uniq expression list == subquery group by expression list - if (!expressionListEquals(func->children[0]->as(), group_by->as())) + /// uniq expression list == subquery group by expression list + if (!expressionListEquals(func->children[0]->as(), group_by->as(), alias)) return false; /// subquery select expression list must contain all columns in uniq expression list - expressionListContainsAll(sub_expr_list->as(), func->children[0]->as()); + if (!expressionListContainsAll(sub_expr_list->as(), func->children[0]->as(), alias)) + return false; return true; }; - if (match_distinct() || match_group_by()) + if (match_subquery_with_distinct() || match_subquery_with_group_by()) expr_list->children[0] = makeASTFunction("count"); } From aa513c8575ec44b10008bebfcd06315f535f8280 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 7 Jul 2023 18:04:15 +0800 Subject: [PATCH 0084/1687] add tests for uniq to count rewrite (cherry picked from commit e0b223aa7eac0e780a4048a2e302f67406ace0aa) --- .../test_rewrite_uniq_to_count/__init__.py | 0 .../test_rewrite_uniq_to_count/test.py | 81 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 tests/integration/test_rewrite_uniq_to_count/__init__.py create mode 100644 tests/integration/test_rewrite_uniq_to_count/test.py diff --git a/tests/integration/test_rewrite_uniq_to_count/__init__.py b/tests/integration/test_rewrite_uniq_to_count/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_rewrite_uniq_to_count/test.py b/tests/integration/test_rewrite_uniq_to_count/test.py new file mode 100644 index 00000000000..2e21a8a50a5 --- /dev/null +++ b/tests/integration/test_rewrite_uniq_to_count/test.py @@ -0,0 +1,81 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node") + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + prepare() + yield cluster + finally: + shutdown() + cluster.shutdown() + + +def prepare(): + node.query( + """ + CREATE TABLE IF NOT EXISTS test_rewrite_uniq_to_count + ( + `a` UInt8, + `b` UInt8, + `c` UInt8 + ) + ENGINE = MergeTree + ORDER BY `a` + """ + ) + node.query("INSERT INTO test_rewrite_uniq_to_count values ('1', '1', '1'), ('1', '1', '1')") + node.query("INSERT INTO test_rewrite_uniq_to_count values ('2', '2', '2'), ('2', '2', '2')") + node.query("INSERT INTO test_rewrite_uniq_to_count values ('3', '3', '3'), ('3', '3', '3')") + + +def shutdown(): + node.query("DROP TABLE IF EXISTS test_rewrite_uniq_to_count SYNC") + + +def check(query, result): + # old analyzer + query = query + " settings optimize_uniq_to_count = 1" + assert node.query(query) == f"{result}\n" + assert "count" in node.query("EXPLAIN SYNTAX " + query) + + # # new analyzer + # query = query + ", allow_experimental_analyzer = 1" + # assert node.query(query) == f"{result}\n" + # assert "count" in node.query("EXPLAIN QUERY_TREE " + query) + + +def test_rewrite_distinct(started_cluster): + check("SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count)", + 3) + + check("SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t", + 3) + + check("SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t", + 3) + + check("SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as n FROM test_rewrite_uniq_to_count) t", + 3) + + +def test_rewrite_group_by(started_cluster): + check("SELECT uniq(a) FROM (SELECT a, min(b) FROM test_rewrite_uniq_to_count GROUP BY a)", + 3) + + check("SELECT uniq(t.a) FROM (SELECT a, min(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", + 3) + + check("SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", + 3) + + check("SELECT uniq(t.a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + 3) + + check("SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + 3) From fccd6e2abff0613e909608c328648f28315dfeab Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 10 Jul 2023 17:09:05 +0800 Subject: [PATCH 0085/1687] rewrite uniq to count in new analyzer (cherry picked from commit 8004f37e972c9efc7cfc9d66a1157b020e7b6d89) --- src/Analyzer/Passes/QueryAnalysisPass.h | 1 + src/Analyzer/Passes/UniqToCountPass.cpp | 166 ++++++++++++++++++ src/Analyzer/Passes/UniqToCountPass.h | 30 ++++ src/Analyzer/QueryTreePassManager.cpp | 2 + src/Core/Settings.h | 2 +- .../RewriteUniqToCountVisitor.cpp | 12 +- src/Interpreters/RewriteUniqToCountVisitor.h | 17 +- .../test_rewrite_uniq_to_count/test.py | 39 +++- 8 files changed, 246 insertions(+), 23 deletions(-) create mode 100644 src/Analyzer/Passes/UniqToCountPass.cpp create mode 100644 src/Analyzer/Passes/UniqToCountPass.h diff --git a/src/Analyzer/Passes/QueryAnalysisPass.h b/src/Analyzer/Passes/QueryAnalysisPass.h index fa8778ebf76..ea845f26bd9 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.h +++ b/src/Analyzer/Passes/QueryAnalysisPass.h @@ -51,6 +51,7 @@ namespace DB * Function `arrayJoin` is handled properly. * For functions `dictGet` and its variations and for function `joinGet` identifier as first argument is handled properly. * Replace `countDistinct` and `countIfDistinct` aggregate functions using setting count_distinct_implementation. + * Replace `uniq` and `uniq` and its variants(except uniqUpTo) into `count` aggregate functions using setting optimize_uniq_to_count. * Add -OrNull suffix to aggregate functions if setting aggregate_functions_null_for_empty is true. * Function `exists` is converted into `in`. * Functions `in`, `notIn`, `globalIn`, `globalNotIn` converted into `nullIn`, `notNullIn`, `globalNullIn`, `globalNotNullIn` if setting transform_null_in is true. diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp new file mode 100644 index 00000000000..1ffb83a6e36 --- /dev/null +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -0,0 +1,166 @@ +#include "UniqToCountPass.h" + +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +using Aliases = std::unordered_map; + +namespace +{ + + bool matchFnUniq(String func_name) + { + auto name = Poco::toLower(func_name); + return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined" + || name == "uniqCombined64"; + } + + bool nodeEquals(const QueryTreeNodePtr & lhs, const QueryTreeNodePtr & rhs, const Aliases & alias) + { + auto * lhs_node = lhs->as(); + auto * rhs_node = rhs->as(); + + if (lhs_node && rhs_node) + { + if (lhs_node->getColumn() == rhs_node->getColumn()) + return true; + + /// translate alias + if (lhs->hasAlias() && alias.find(lhs->getAlias()) != alias.end()) + lhs_node = alias.find(lhs->getAlias())->second->as(); + + if (rhs->hasAlias() && alias.find(rhs->getAlias()) != alias.end()) + rhs_node = alias.find(rhs->getAlias())->second->as(); + + if (lhs_node && rhs_node && lhs_node == rhs_node) + return true; + } + return false; + } + + bool nodeListEquals(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs, const Aliases & alias) + { + if (lhs.size() != rhs.size()) + return false; + for (size_t i = 0; i < lhs.size(); i++) + { + if (!nodeEquals(lhs[i], rhs[i], alias)) + return false; + } + return true; + } + + bool nodeListContainsAll(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs, const Aliases & alias) + { + if (lhs.size() < rhs.size()) + return false; + for (const auto & re : rhs) + { + auto predicate = [&](const QueryTreeNodePtr & le) { return nodeEquals(le, re, alias); }; + if (std::find_if(lhs.begin(), lhs.end(), predicate) == lhs.end()) + return false; + } + return true; + } + +} + +class UniqToCountVisitor : public InDepthQueryTreeVisitorWithContext +{ +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + void visitImpl(QueryTreeNodePtr & node) + { + if (!getSettings().optimize_uniq_to_count) + return; + + auto * query_node = node->as(); + if (!query_node) + return; + + /// Check that query has only single table expression which is subquery + auto * subquery_node = query_node->getJoinTree()->as(); + if (!subquery_node) + return; + + /// Check that query has only single node in projection + auto & projection_nodes = query_node->getProjection().getNodes(); + if (projection_nodes.size() != 1) + return; + + /// Check that projection_node is a function + auto & projection_node = projection_nodes[0]; + auto * function_node = projection_node->as(); + if (!function_node) + return; + + /// Check that query single projection node is `uniq` or its variants + if (!matchFnUniq(function_node->getFunctionName())) + return; + + /// collect subquery select expressions alias. + /// TODO new analyzer will lose alias info, so we will collect nothing and we can not rewrite SQL with alias. + Aliases alias; + for (auto & subquery_projection_node : subquery_node->getProjection().getNodes()) + { + if (subquery_projection_node->hasAlias()) + alias.insert({subquery_projection_node->getAlias(), subquery_projection_node}); + } + + auto & uniq_arguments_nodes = function_node->getArguments().getNodes(); + + /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' + auto match_subquery_with_distinct = [&]() -> bool + { + if (!subquery_node->isDistinct()) + return false; + /// uniq expression list == subquery group by expression list + if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjection().getNodes(), alias)) + return false; + return true; + }; + + /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' + auto match_subquery_with_group_by = [&]() -> bool + { + if (!subquery_node->hasGroupBy()) + return false; + /// uniq argument node list == subquery group by node list + if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getGroupByNode()->getChildren(), alias)) + return false; + /// subquery select node list must contain all columns in uniq argument node list + if (!nodeListContainsAll(subquery_node->getProjection().getNodes(), uniq_arguments_nodes, alias)) + return false; + return true; + }; + + /// Replace uniq of initial query to count + if (match_subquery_with_distinct() || match_subquery_with_group_by()) + { + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); + function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + function_node->getArguments().getNodes().clear(); + query_node->resolveProjectionColumns({{"count()", function_node->getResultType()}}); + } + } +}; + + +void UniqToCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) +{ + UniqToCountVisitor visitor(std::move(context)); + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/UniqToCountPass.h b/src/Analyzer/Passes/UniqToCountPass.h new file mode 100644 index 00000000000..4992d524e5e --- /dev/null +++ b/src/Analyzer/Passes/UniqToCountPass.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +namespace DB +{ + +/** Optimize `uniq` and its variants(except uniqUpTo) into `count` over subquery. + * Example: 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to + * Result: 'SELECT count() FROM (SELECT DISTINCT x ...)' + * + * Example: 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to + * Result: 'SELECT count() FROM (SELECT x ... GROUP BY x ...)' + * + * Note that we can rewrite all uniq variants except uniqUpTo. + */ +class UniqToCountPass final : public IQueryTreePass +{ +public: + String getName() override { return "UniqToCount"; } + + String getDescription() override + { + return "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause."; + } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; +}; + +} diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index a6da2a66615..dd75b0f586d 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -246,6 +247,7 @@ void addQueryTreePasses(QueryTreePassManager & manager) manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); + manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 29ea7fe5cb9..f71346be7b8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -774,7 +774,7 @@ class IColumn; M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \ M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \ - M(Bool, optimize_uniq_to_count, false, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause, it is a RBO based optimization.", 0) + M(Bool, optimize_uniq_to_count, false, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. diff --git a/src/Interpreters/RewriteUniqToCountVisitor.cpp b/src/Interpreters/RewriteUniqToCountVisitor.cpp index 587a905e4c5..ac42a8a82da 100644 --- a/src/Interpreters/RewriteUniqToCountVisitor.cpp +++ b/src/Interpreters/RewriteUniqToCountVisitor.cpp @@ -26,7 +26,7 @@ bool matchFnUniq(String func_name) || name == "uniqCombined64"; } -bool expressionEquals(const ASTPtr & lhs, const ASTPtr & rhs, Aliases & alias) +bool expressionEquals(const ASTPtr & lhs, const ASTPtr & rhs, const Aliases & alias) { if (lhs->getTreeHash() == rhs->getTreeHash()) { @@ -56,7 +56,7 @@ bool expressionEquals(const ASTPtr & lhs, const ASTPtr & rhs, Aliases & alias) return false; } -bool expressionListEquals(ASTExpressionList * lhs, ASTExpressionList * rhs, Aliases & alias) +bool expressionListEquals(ASTExpressionList * lhs, ASTExpressionList * rhs, const Aliases & alias) { if (!lhs || !rhs) return false; @@ -71,7 +71,7 @@ bool expressionListEquals(ASTExpressionList * lhs, ASTExpressionList * rhs, Alia } /// Test whether lhs contains all expressions in rhs. -bool expressionListContainsAll(ASTExpressionList * lhs, ASTExpressionList * rhs, Aliases alias) +bool expressionListContainsAll(ASTExpressionList * lhs, ASTExpressionList * rhs, const Aliases & alias) { if (!lhs || !rhs) return false; @@ -123,13 +123,14 @@ void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) return; /// collect subquery select expressions alias - std::unordered_map alias; + Aliases alias; for (auto expr : sub_expr_list->children) { if (!expr->tryGetAlias().empty()) alias.insert({expr->tryGetAlias(), expr}); } + /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' auto match_subquery_with_distinct = [&]() -> bool { if (!sub_selectq->distinct) @@ -140,9 +141,10 @@ void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) return true; }; + /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' auto match_subquery_with_group_by = [&]() -> bool { - auto group_by = sub_selectq->groupBy(); // TODO group by type + auto group_by = sub_selectq->groupBy(); if (!group_by) return false; /// uniq expression list == subquery group by expression list diff --git a/src/Interpreters/RewriteUniqToCountVisitor.h b/src/Interpreters/RewriteUniqToCountVisitor.h index 42a86049bb9..94528ccf2ee 100644 --- a/src/Interpreters/RewriteUniqToCountVisitor.h +++ b/src/Interpreters/RewriteUniqToCountVisitor.h @@ -9,14 +9,15 @@ namespace DB class ASTFunction; -/// Simple rewrite: -/// 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to -/// 'SELECT count() FROM (SELECT DISTINCT x ...)' -/// -/// 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to -/// 'SELECT count() FROM (SELECT x ... GROUP BY x ...)' -/// -/// Note we can rewrite all uniq variants except uniqUpTo. +/** Optimize `uniq` into `count` over subquery. + * Example: 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to + * Result: 'SELECT count() FROM (SELECT DISTINCT x ...)' + * + * Example: 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to + * Result: 'SELECT count() FROM (SELECT x ... GROUP BY x ...)' + * + * Note that we can rewrite all uniq variants except uniqUpTo. + */ class RewriteUniqToCountMatcher { public: diff --git a/tests/integration/test_rewrite_uniq_to_count/test.py b/tests/integration/test_rewrite_uniq_to_count/test.py index 2e21a8a50a5..af0c4f09117 100644 --- a/tests/integration/test_rewrite_uniq_to_count/test.py +++ b/tests/integration/test_rewrite_uniq_to_count/test.py @@ -42,40 +42,61 @@ def check(query, result): # old analyzer query = query + " settings optimize_uniq_to_count = 1" assert node.query(query) == f"{result}\n" - assert "count" in node.query("EXPLAIN SYNTAX " + query) + assert "count()" in node.query("EXPLAIN SYNTAX " + query) - # # new analyzer - # query = query + ", allow_experimental_analyzer = 1" - # assert node.query(query) == f"{result}\n" - # assert "count" in node.query("EXPLAIN QUERY_TREE " + query) + # new analyzer + query = query + ", allow_experimental_analyzer = 1" + assert node.query(query) == f"{result}\n" + assert "count()" in node.query("EXPLAIN QUERY TREE " + query) + + +# For new analyzer loses alias info, we can not rewrite SQL with alias. +def check_by_old_analyzer(query, result): + # only old analyzer + query = query + " settings optimize_uniq_to_count = 1" + assert node.query(query) == f"{result}\n" + assert "count()" in node.query("EXPLAIN SYNTAX " + query) def test_rewrite_distinct(started_cluster): + # simple test check("SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count)", 3) + # test subquery alias check("SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t", 3) + # test table.column check("SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t", 3) - check("SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as n FROM test_rewrite_uniq_to_count) t", + # test select expression alias + check_by_old_analyzer("SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", + 3) + + # test select expression alias + check_by_old_analyzer("SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t", 3) def test_rewrite_group_by(started_cluster): + # simple test check("SELECT uniq(a) FROM (SELECT a, min(b) FROM test_rewrite_uniq_to_count GROUP BY a)", 3) + # test subquery alias check("SELECT uniq(t.a) FROM (SELECT a, min(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", 3) - check("SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", + # test select expression alias + check_by_old_analyzer("SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", 3) - check("SELECT uniq(t.a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + # test select expression alias + check_by_old_analyzer("SELECT uniq(t.a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", 3) - check("SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + # test select expression alias + check_by_old_analyzer("SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", 3) From 3f3c15e11dba2d653c963e8ac8b3adaab27e9cc8 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 11 Jul 2023 09:51:52 +0800 Subject: [PATCH 0086/1687] fix style (cherry picked from commit a01a6834482cb8af82dd528fa557f3157b60eea9) --- .../test_rewrite_uniq_to_count/test.py | 79 +++++++++++++------ 1 file changed, 53 insertions(+), 26 deletions(-) diff --git a/tests/integration/test_rewrite_uniq_to_count/test.py b/tests/integration/test_rewrite_uniq_to_count/test.py index af0c4f09117..ec9dc6d9b7f 100644 --- a/tests/integration/test_rewrite_uniq_to_count/test.py +++ b/tests/integration/test_rewrite_uniq_to_count/test.py @@ -29,13 +29,21 @@ def prepare(): ORDER BY `a` """ ) - node.query("INSERT INTO test_rewrite_uniq_to_count values ('1', '1', '1'), ('1', '1', '1')") - node.query("INSERT INTO test_rewrite_uniq_to_count values ('2', '2', '2'), ('2', '2', '2')") - node.query("INSERT INTO test_rewrite_uniq_to_count values ('3', '3', '3'), ('3', '3', '3')") + node.query( + "INSERT INTO test_rewrite_uniq_to_count values ('1', '1', '1'), ('1', '1', '1')" + ) + node.query( + "INSERT INTO test_rewrite_uniq_to_count values ('2', '2', '2'), ('2', '2', '2')" + ) + node.query( + "INSERT INTO test_rewrite_uniq_to_count values ('3', '3', '3'), ('3', '3', '3')" + ) def shutdown(): - node.query("DROP TABLE IF EXISTS test_rewrite_uniq_to_count SYNC") + node.query( + "DROP TABLE IF EXISTS test_rewrite_uniq_to_count SYNC" + ) def check(query, result): @@ -50,7 +58,6 @@ def check(query, result): assert "count()" in node.query("EXPLAIN QUERY TREE " + query) -# For new analyzer loses alias info, we can not rewrite SQL with alias. def check_by_old_analyzer(query, result): # only old analyzer query = query + " settings optimize_uniq_to_count = 1" @@ -60,43 +67,63 @@ def check_by_old_analyzer(query, result): def test_rewrite_distinct(started_cluster): # simple test - check("SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count)", - 3) + check( + "SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count)", + 3, + ) # test subquery alias - check("SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t", - 3) + check( + "SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t", + 3, + ) - # test table.column - check("SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t", - 3) + # test compound column name + check( + "SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t", + 3, + ) # test select expression alias - check_by_old_analyzer("SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", - 3) + check_by_old_analyzer( + "SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", + 3, + ) # test select expression alias - check_by_old_analyzer("SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t", - 3) + check_by_old_analyzer( + "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t", + 3, + ) def test_rewrite_group_by(started_cluster): # simple test - check("SELECT uniq(a) FROM (SELECT a, min(b) FROM test_rewrite_uniq_to_count GROUP BY a)", - 3) + check( + "SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a)", + 3, + ) # test subquery alias - check("SELECT uniq(t.a) FROM (SELECT a, min(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3) + check( + "SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", + 3, + ) # test select expression alias - check_by_old_analyzer("SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3) + check_by_old_analyzer( + "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", + 3, + ) # test select expression alias - check_by_old_analyzer("SELECT uniq(t.a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", - 3) + check_by_old_analyzer( + "SELECT uniq(t.a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + 3, + ) # test select expression alias - check_by_old_analyzer("SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, min(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", - 3) + check_by_old_analyzer( + "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + 3, + ) From 0c11a9b2a20f0dbe974119ef880ecb51fc8a2770 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 11 Jul 2023 10:11:21 +0800 Subject: [PATCH 0087/1687] ignore alias for UniqToCountPass (cherry picked from commit cd1111b17b07cdaade0e909a4139205763701d24) --- src/Analyzer/Passes/QueryAnalysisPass.h | 1 - src/Analyzer/Passes/UniqToCountPass.cpp | 90 +++++++------------ .../test_rewrite_uniq_to_count/test.py | 6 +- 3 files changed, 35 insertions(+), 62 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.h b/src/Analyzer/Passes/QueryAnalysisPass.h index ea845f26bd9..fa8778ebf76 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.h +++ b/src/Analyzer/Passes/QueryAnalysisPass.h @@ -51,7 +51,6 @@ namespace DB * Function `arrayJoin` is handled properly. * For functions `dictGet` and its variations and for function `joinGet` identifier as first argument is handled properly. * Replace `countDistinct` and `countIfDistinct` aggregate functions using setting count_distinct_implementation. - * Replace `uniq` and `uniq` and its variants(except uniqUpTo) into `count` aggregate functions using setting optimize_uniq_to_count. * Add -OrNull suffix to aggregate functions if setting aggregate_functions_null_for_empty is true. * Function `exists` is converted into `in`. * Functions `in`, `notIn`, `globalIn`, `globalNotIn` converted into `nullIn`, `notNullIn`, `globalNullIn`, `globalNotNullIn` if setting transform_null_in is true. diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index 1ffb83a6e36..ae7952051e7 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -11,65 +11,50 @@ namespace DB { -using Aliases = std::unordered_map; - namespace { - bool matchFnUniq(String func_name) - { - auto name = Poco::toLower(func_name); - return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined" - || name == "uniqCombined64"; - } +bool matchFnUniq(String func_name) +{ + auto name = Poco::toLower(func_name); + return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined" + || name == "uniqCombined64"; +} - bool nodeEquals(const QueryTreeNodePtr & lhs, const QueryTreeNodePtr & rhs, const Aliases & alias) - { - auto * lhs_node = lhs->as(); - auto * rhs_node = rhs->as(); +bool nodeEquals(const QueryTreeNodePtr & lhs, const QueryTreeNodePtr & rhs) +{ + auto * lhs_node = lhs->as(); + auto * rhs_node = rhs->as(); - if (lhs_node && rhs_node) - { - if (lhs_node->getColumn() == rhs_node->getColumn()) - return true; + if (lhs_node && rhs_node && lhs_node->getColumn() == rhs_node->getColumn()) + return true; + return false; +} - /// translate alias - if (lhs->hasAlias() && alias.find(lhs->getAlias()) != alias.end()) - lhs_node = alias.find(lhs->getAlias())->second->as(); - - if (rhs->hasAlias() && alias.find(rhs->getAlias()) != alias.end()) - rhs_node = alias.find(rhs->getAlias())->second->as(); - - if (lhs_node && rhs_node && lhs_node == rhs_node) - return true; - } +bool nodeListEquals(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs) +{ + if (lhs.size() != rhs.size()) return false; - } - - bool nodeListEquals(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs, const Aliases & alias) + for (size_t i = 0; i < lhs.size(); i++) { - if (lhs.size() != rhs.size()) + if (!nodeEquals(lhs[i], rhs[i])) return false; - for (size_t i = 0; i < lhs.size(); i++) - { - if (!nodeEquals(lhs[i], rhs[i], alias)) - return false; - } - return true; } + return true; +} - bool nodeListContainsAll(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs, const Aliases & alias) +bool nodeListContainsAll(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs) +{ + if (lhs.size() < rhs.size()) + return false; + for (const auto & re : rhs) { - if (lhs.size() < rhs.size()) + auto predicate = [&](const QueryTreeNodePtr & le) { return nodeEquals(le, re); }; + if (std::find_if(lhs.begin(), lhs.end(), predicate) == lhs.end()) return false; - for (const auto & re : rhs) - { - auto predicate = [&](const QueryTreeNodePtr & le) { return nodeEquals(le, re, alias); }; - if (std::find_if(lhs.begin(), lhs.end(), predicate) == lhs.end()) - return false; - } - return true; } + return true; +} } @@ -108,15 +93,6 @@ public: if (!matchFnUniq(function_node->getFunctionName())) return; - /// collect subquery select expressions alias. - /// TODO new analyzer will lose alias info, so we will collect nothing and we can not rewrite SQL with alias. - Aliases alias; - for (auto & subquery_projection_node : subquery_node->getProjection().getNodes()) - { - if (subquery_projection_node->hasAlias()) - alias.insert({subquery_projection_node->getAlias(), subquery_projection_node}); - } - auto & uniq_arguments_nodes = function_node->getArguments().getNodes(); /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' @@ -125,7 +101,7 @@ public: if (!subquery_node->isDistinct()) return false; /// uniq expression list == subquery group by expression list - if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjection().getNodes(), alias)) + if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjection().getNodes())) return false; return true; }; @@ -136,10 +112,10 @@ public: if (!subquery_node->hasGroupBy()) return false; /// uniq argument node list == subquery group by node list - if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getGroupByNode()->getChildren(), alias)) + if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getGroupByNode()->getChildren())) return false; /// subquery select node list must contain all columns in uniq argument node list - if (!nodeListContainsAll(subquery_node->getProjection().getNodes(), uniq_arguments_nodes, alias)) + if (!nodeListContainsAll(subquery_node->getProjection().getNodes(), uniq_arguments_nodes)) return false; return true; }; diff --git a/tests/integration/test_rewrite_uniq_to_count/test.py b/tests/integration/test_rewrite_uniq_to_count/test.py index ec9dc6d9b7f..d7fa9f39441 100644 --- a/tests/integration/test_rewrite_uniq_to_count/test.py +++ b/tests/integration/test_rewrite_uniq_to_count/test.py @@ -41,9 +41,7 @@ def prepare(): def shutdown(): - node.query( - "DROP TABLE IF EXISTS test_rewrite_uniq_to_count SYNC" - ) + node.query("DROP TABLE IF EXISTS test_rewrite_uniq_to_count SYNC") def check(query, result): @@ -107,7 +105,7 @@ def test_rewrite_group_by(started_cluster): # test subquery alias check( "SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3, + 3, ) # test select expression alias From 7ade6169c48a98deb7212ea7f01363807852b13c Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 11 Jul 2023 12:59:34 +0800 Subject: [PATCH 0088/1687] fix clang-tidy checking (cherry picked from commit 063eebc16b8250b42f3f39b7cf00d8dcb578a702) --- src/Interpreters/RewriteUniqToCountVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/RewriteUniqToCountVisitor.cpp b/src/Interpreters/RewriteUniqToCountVisitor.cpp index ac42a8a82da..7445068207a 100644 --- a/src/Interpreters/RewriteUniqToCountVisitor.cpp +++ b/src/Interpreters/RewriteUniqToCountVisitor.cpp @@ -124,7 +124,7 @@ void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) /// collect subquery select expressions alias Aliases alias; - for (auto expr : sub_expr_list->children) + for (const auto & expr : sub_expr_list->children) { if (!expr->tryGetAlias().empty()) alias.insert({expr->tryGetAlias(), expr}); From 8faecdb7aca613e75e6c3d018684168bf69164fc Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 19 Jul 2023 18:46:20 +0800 Subject: [PATCH 0089/1687] support alias for new analyzer (cherry picked from commit 08409059cc198873ffbf11060bfdabaa0c74f07f) --- src/Analyzer/Passes/UniqToCountPass.cpp | 108 +++++++++++++----- .../test_rewrite_uniq_to_count/test.py | 16 +-- 2 files changed, 90 insertions(+), 34 deletions(-) diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index ae7952051e7..7533a99107b 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -21,36 +21,82 @@ bool matchFnUniq(String func_name) || name == "uniqCombined64"; } -bool nodeEquals(const QueryTreeNodePtr & lhs, const QueryTreeNodePtr & rhs) +/// Extract the corresponding projection columns for group by node list. +/// For example: +/// SELECT a as aa, any(b) FROM table group by a; -> aa(ColumnNode) +NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node) { - auto * lhs_node = lhs->as(); - auto * rhs_node = rhs->as(); + if (!query_node->hasGroupBy()) + return {}; - if (lhs_node && rhs_node && lhs_node->getColumn() == rhs_node->getColumn()) - return true; - return false; + NamesAndTypes result; + for (const auto & group_by_ele : query_node->getGroupByNode()->getChildren()) + { + const auto & projection_columns = query_node->getProjectionColumns(); + const auto & projection_nodes = query_node->getProjection().getNodes(); + + assert(projection_columns.size() == projection_nodes.size()); + + for (size_t i = 0; i < projection_columns.size(); i++) + { + if (projection_nodes[i]->isEqual(*group_by_ele)) + result.push_back(projection_columns[i]); + } + } + return result; } -bool nodeListEquals(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs) +/// Whether query_columns equals subquery_columns. +/// query_columns: query columns from query +/// subquery_columns: projection columns from subquery +bool nodeListEquals(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns) { - if (lhs.size() != rhs.size()) + if (query_columns.size() != subquery_columns.size()) return false; - for (size_t i = 0; i < lhs.size(); i++) + + for (const auto & query_column : query_columns) { - if (!nodeEquals(lhs[i], rhs[i])) + auto find = std::find_if( + subquery_columns.begin(), + subquery_columns.end(), + [&](const auto & subquery_column) -> bool + { + if (auto * column_node = query_column->as()) + { + return subquery_column == column_node->getColumn(); + } + return false; + }); + + if (find == subquery_columns.end()) return false; } return true; } -bool nodeListContainsAll(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs) +/// Whether subquery_columns contains all columns in subquery_columns. +/// query_columns: query columns from query +/// subquery_columns: projection columns from subquery +bool nodeListContainsAll(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns) { - if (lhs.size() < rhs.size()) + if (query_columns.size() > subquery_columns.size()) return false; - for (const auto & re : rhs) + + for (const auto & query_column : query_columns) { - auto predicate = [&](const QueryTreeNodePtr & le) { return nodeEquals(le, re); }; - if (std::find_if(lhs.begin(), lhs.end(), predicate) == lhs.end()) + auto find = std::find_if( + subquery_columns.begin(), + subquery_columns.end(), + [&](const auto & subquery_column) -> bool + { + if (auto * column_node = query_column->as()) + { + return subquery_column == column_node->getColumn(); + } + return false; + }); + + if (find == subquery_columns.end()) return false; } return true; @@ -58,17 +104,14 @@ bool nodeListContainsAll(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs) } -class UniqToCountVisitor : public InDepthQueryTreeVisitorWithContext +class UniqToCountVisitor : public InDepthQueryTreeVisitor { public: - using Base = InDepthQueryTreeVisitorWithContext; + using Base = InDepthQueryTreeVisitor; using Base::Base; void visitImpl(QueryTreeNodePtr & node) { - if (!getSettings().optimize_uniq_to_count) - return; - auto * query_node = node->as(); if (!query_node) return; @@ -100,9 +143,11 @@ public: { if (!subquery_node->isDistinct()) return false; - /// uniq expression list == subquery group by expression list - if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjection().getNodes())) + + /// uniq expression list == subquery projection columns + if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjectionColumns())) return false; + return true; }; @@ -111,12 +156,17 @@ public: { if (!subquery_node->hasGroupBy()) return false; + /// uniq argument node list == subquery group by node list - if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getGroupByNode()->getChildren())) + auto group_by_columns = extractProjectionColumnsForGroupBy(subquery_node); + + if (!nodeListEquals(uniq_arguments_nodes, group_by_columns)) return false; - /// subquery select node list must contain all columns in uniq argument node list - if (!nodeListContainsAll(subquery_node->getProjection().getNodes(), uniq_arguments_nodes)) + + /// subquery projection columns must contain all columns in uniq argument node list + if (!nodeListContainsAll(uniq_arguments_nodes, subquery_node->getProjectionColumns())) return false; + return true; }; @@ -125,8 +175,11 @@ public: { AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); + function_node->resolveAsAggregateFunction(std::move(aggregate_function)); function_node->getArguments().getNodes().clear(); + + /// Update projection columns query_node->resolveProjectionColumns({{"count()", function_node->getResultType()}}); } } @@ -135,7 +188,10 @@ public: void UniqToCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - UniqToCountVisitor visitor(std::move(context)); + if (!context->getSettings().optimize_uniq_to_count) + return; + + UniqToCountVisitor visitor; visitor.visit(query_tree_node); } diff --git a/tests/integration/test_rewrite_uniq_to_count/test.py b/tests/integration/test_rewrite_uniq_to_count/test.py index d7fa9f39441..e38e57f5cee 100644 --- a/tests/integration/test_rewrite_uniq_to_count/test.py +++ b/tests/integration/test_rewrite_uniq_to_count/test.py @@ -83,13 +83,13 @@ def test_rewrite_distinct(started_cluster): ) # test select expression alias - check_by_old_analyzer( - "SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", + check( + "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", 3, ) # test select expression alias - check_by_old_analyzer( + check( "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t", 3, ) @@ -109,19 +109,19 @@ def test_rewrite_group_by(started_cluster): ) # test select expression alias - check_by_old_analyzer( + check( "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", 3, ) # test select expression alias - check_by_old_analyzer( - "SELECT uniq(t.a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + check( + "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", 3, ) # test select expression alias - check_by_old_analyzer( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + check( + "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", 3, ) From 95c41f49e03645a24cd2421672eeef4d161bc291 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 27 Jul 2023 15:14:55 +0800 Subject: [PATCH 0090/1687] not change projection columns --- src/Analyzer/Passes/UniqToCountPass.cpp | 17 +++++++---------- tests/performance/uniq_to_count.xml | 9 +++++++++ 2 files changed, 16 insertions(+), 10 deletions(-) create mode 100644 tests/performance/uniq_to_count.xml diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index 7533a99107b..200c8cef343 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -104,14 +104,17 @@ bool nodeListContainsAll(const QueryTreeNodes & query_columns, const NamesAndTyp } -class UniqToCountVisitor : public InDepthQueryTreeVisitor +class UniqToCountVisitor : public InDepthQueryTreeVisitorWithContext { public: - using Base = InDepthQueryTreeVisitor; + using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; void visitImpl(QueryTreeNodePtr & node) { + if (!getSettings().optimize_uniq_to_count) + return; + auto * query_node = node->as(); if (!query_node) return; @@ -176,11 +179,8 @@ public: AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); function_node->getArguments().getNodes().clear(); - - /// Update projection columns - query_node->resolveProjectionColumns({{"count()", function_node->getResultType()}}); + function_node->resolveAsAggregateFunction(std::move(aggregate_function)); } } }; @@ -188,10 +188,7 @@ public: void UniqToCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - if (!context->getSettings().optimize_uniq_to_count) - return; - - UniqToCountVisitor visitor; + UniqToCountVisitor visitor(context); visitor.visit(query_tree_node); } diff --git a/tests/performance/uniq_to_count.xml b/tests/performance/uniq_to_count.xml new file mode 100644 index 00000000000..7e51d7d0763 --- /dev/null +++ b/tests/performance/uniq_to_count.xml @@ -0,0 +1,9 @@ + + CREATE VIEW numbers_view AS SELECT number from numbers_mt(100000000) order by number desc + + + select number from (select number from numbers(500000000) order by -number) limit 10 + select number from (select number from numbers_mt(1500000000) order by -number) limit 10 + + select number from numbers_view limit 100 + From 5f47aacef2ee1d96e3d6d9d96d46468bdad24f96 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 27 Jul 2023 15:15:13 +0800 Subject: [PATCH 0091/1687] add performance tests --- tests/performance/uniq_to_count.xml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/performance/uniq_to_count.xml b/tests/performance/uniq_to_count.xml index 7e51d7d0763..580013f2e81 100644 --- a/tests/performance/uniq_to_count.xml +++ b/tests/performance/uniq_to_count.xml @@ -1,9 +1,10 @@ - CREATE VIEW numbers_view AS SELECT number from numbers_mt(100000000) order by number desc + 1 - - select number from (select number from numbers(500000000) order by -number) limit 10 - select number from (select number from numbers_mt(1500000000) order by -number) limit 10 + select uniq(number) from (select DISTINCT number from numbers(100000000)) + select uniq(number) from (select number from numbers(100000000) group by number) - select number from numbers_view limit 100 + + select uniq(number) from (select DISTINCT number from numbers(100000000)) settings allow_experimental_analyzer = 1 + select uniq(number) from (select number from numbers(100000000) group by number) settings allow_experimental_analyzer = 1 From 6da386073831ecd9f01ae0c41ecbbad8fccb6ef4 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 27 Jul 2023 15:19:08 +0800 Subject: [PATCH 0092/1687] fix tests --- tests/integration/test_rewrite_uniq_to_count/test.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tests/integration/test_rewrite_uniq_to_count/test.py b/tests/integration/test_rewrite_uniq_to_count/test.py index e38e57f5cee..82a979c6e63 100644 --- a/tests/integration/test_rewrite_uniq_to_count/test.py +++ b/tests/integration/test_rewrite_uniq_to_count/test.py @@ -53,14 +53,7 @@ def check(query, result): # new analyzer query = query + ", allow_experimental_analyzer = 1" assert node.query(query) == f"{result}\n" - assert "count()" in node.query("EXPLAIN QUERY TREE " + query) - - -def check_by_old_analyzer(query, result): - # only old analyzer - query = query + " settings optimize_uniq_to_count = 1" - assert node.query(query) == f"{result}\n" - assert "count()" in node.query("EXPLAIN SYNTAX " + query) + assert "function_name: count" in node.query("EXPLAIN QUERY TREE " + query) def test_rewrite_distinct(started_cluster): From a161ebdf0ee452ec0db933b247e03b27b513e129 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 27 Jul 2023 15:54:09 +0800 Subject: [PATCH 0093/1687] update settings.h --- src/Core/Settings.h | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f71346be7b8..d66305d65fd 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -127,8 +127,9 @@ class IColumn; \ M(Bool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \ M(Bool, optimize_move_to_prewhere_if_final, false, "If query has `FINAL`, the optimization `move_to_prewhere` is not always correct and it is enabled only if both settings `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are turned on", 0) \ - M(Bool, move_all_conditions_to_prewhere, false, "Move all viable conditions from WHERE to PREWHERE", 0) \ - M(Bool, enable_multiple_prewhere_read_steps, false, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \ + M(Bool, move_all_conditions_to_prewhere, true, "Move all viable conditions from WHERE to PREWHERE", 0) \ + M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \ + M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \ \ M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \ M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \ @@ -275,7 +276,7 @@ class IColumn; \ M(UInt64, http_headers_progress_interval_ms, 100, "Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval.", 0) \ M(Bool, http_wait_end_of_query, false, "Enable HTTP response buffering on the server-side.", 0) \ - M(UInt64, http_response_buffer_size, false, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \ + M(UInt64, http_response_buffer_size, 0, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \ \ M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ \ @@ -533,7 +534,6 @@ class IColumn; M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \ M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \ M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \ - M(Bool, optimize_duplicate_order_by_and_distinct, false, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \ M(Bool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \ M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ @@ -577,6 +577,7 @@ class IColumn; M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) ALIAS(allow_experimental_projection_optimization) \ + M(Bool, optimize_use_implicit_projections, true, "Automatically choose implicit projections to perform SELECT query", 0) \ M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \ @@ -621,6 +622,7 @@ class IColumn; M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \ M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \ M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \ + M(Bool, disable_url_encoding, false, " Allows to disable decoding/encoding path in uri in URL table engine", 0) \ M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \ @@ -629,7 +631,7 @@ class IColumn; M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \ - M(UInt64, distributed_ddl_entry_format_version, 3, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \ + M(UInt64, distributed_ddl_entry_format_version, 5, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \ \ M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \ M(UInt64, external_storage_max_read_bytes, 0, "Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \ @@ -657,7 +659,8 @@ class IColumn; M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \ M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \ \ - M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ + M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ + M(UInt64, function_sleep_max_microseconds_per_block, 3000000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \ @@ -671,8 +674,8 @@ class IColumn; M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \ M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \ M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \ + M(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, "Whether to use only prewhere columns size to determine reading task size.", 0) \ \ - M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \ M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \ @@ -736,7 +739,7 @@ class IColumn; M(String, workload, "default", "Name of workload to be used to access resources", 0) \ M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \ \ - M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \ + M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \ \ M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \ M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \ @@ -755,11 +758,12 @@ class IColumn; M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function JSON_VALUE to return nullable type.", 0) \ M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \ M(Bool, use_with_fill_by_sorting_prefix, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently", 0) \ + M(Bool, optimize_uniq_to_count, false, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ - M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \ + M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ @@ -773,8 +777,8 @@ class IColumn; M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \ - M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \ - M(Bool, optimize_uniq_to_count, false, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) + M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ + M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. @@ -818,6 +822,7 @@ class IColumn; MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_distributed_schedule_pool_size, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \ + MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, async_insert_threads, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0) \ /* ---- */ \ @@ -829,6 +834,7 @@ class IColumn; MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \ MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \ MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \ + MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \ /** The section above is for obsolete settings. Do not add anything there. */ @@ -873,6 +879,7 @@ class IColumn; M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \ M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \ M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \ + M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \ M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \ M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ @@ -907,6 +914,7 @@ class IColumn; \ M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \ M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \ + M(IntervalOutputFormat, interval_output_format, FormatSettings::IntervalOutputFormat::Numeric, "Textual representation of Interval. Possible values: 'kusto', 'numeric'.", 0) \ \ M(Bool, input_format_ipv4_default_on_conversion_error, false, "Deserialization of IPv4 will use default values instead of throwing exception on conversion error.", 0) \ M(Bool, input_format_ipv6_default_on_conversion_error, false, "Deserialization of IPV6 will use default values instead of throwing exception on conversion error.", 0) \ @@ -947,6 +955,10 @@ class IColumn; M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \ M(ParquetCompression, output_format_parquet_compression_method, "lz4", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \ M(Bool, output_format_parquet_compliant_nested_types, true, "In parquet file schema, use name 'element' instead of 'item' for list elements. This is a historical artifact of Arrow library implementation. Generally increases compatibility, except perhaps with some old versions of Arrow.", 0) \ + M(Bool, output_format_parquet_use_custom_encoder, true, "Use experimental faster Parquet encoder implementation.", 0) \ + M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \ + M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, "Target page size in bytes, before compression.", 0) \ + M(UInt64, output_format_parquet_batch_size, 1024, "Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ @@ -1012,6 +1024,7 @@ class IColumn; M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ + M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. From 46eda82cdcadd9432c65489ccd9a008284dab3a1 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 13 Jul 2023 19:24:30 +0800 Subject: [PATCH 0094/1687] new analyzer: move functions out of any --- src/Analyzer/Passes/AnyFunctionPass.cpp | 90 +++++++++++++++++++++++++ src/Analyzer/Passes/AnyFunctionPass.h | 25 +++++++ src/Analyzer/QueryTreePassManager.cpp | 3 + 3 files changed, 118 insertions(+) create mode 100644 src/Analyzer/Passes/AnyFunctionPass.cpp create mode 100644 src/Analyzer/Passes/AnyFunctionPass.h diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp new file mode 100644 index 00000000000..bcec31eb851 --- /dev/null +++ b/src/Analyzer/Passes/AnyFunctionPass.cpp @@ -0,0 +1,90 @@ +#include "AnyFunctionPass.h" + +#include +#include + +#include +#include +#include + +namespace DB +{ + +namespace +{ + +class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContext +{ +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + void visitImpl(QueryTreeNodePtr & node) + { + if (!getSettings().optimize_move_functions_out_of_any) + return; + + auto * function_node = node->as(); + if (!function_node) + return; + + auto is_any = [](const String & name) { return name == "any" || name == "anylast"; }; + + /// check function is any + auto lower_function_name = Poco::toLower(function_node->getFunctionName()); + if (!is_any(lower_function_name)) + return; + + auto & arguments = function_node->getArguments().getNodes(); + if (arguments.size() != 1) + return; + + auto * inside_function_node = arguments[0]->as(); + /// check argument is a function + if (!inside_function_node) + return; + + auto & inside_arguments = inside_function_node->getArguments().getNodes(); + + /// case any(f()) + if (inside_arguments.empty()) + return; + + /// checking done, rewrite function + bool pushed = false; + for (auto & inside_argument : inside_arguments) + { + if (inside_argument->as()) /// skip constant node + break; + + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get(lower_function_name, {inside_argument->getResultType()}, {}, properties); + + auto any_function = std::make_shared(lower_function_name); + any_function->resolveAsAggregateFunction(std::move(aggregate_function)); + any_function->setAlias(inside_argument->getAlias()); + + auto & any_function_arguments = any_function->getArguments().getNodes(); + any_function_arguments.push_back(std::move(inside_argument)); + inside_argument = std::move(any_function); + + pushed = true; + } + + if (pushed) + { + arguments[0]->setAlias(node->getAlias()); + node = arguments[0]; + } + } +}; + +} + +void AnyFunctionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) +{ + AnyFunctionVisitor visitor(std::move(context)); + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/AnyFunctionPass.h b/src/Analyzer/Passes/AnyFunctionPass.h new file mode 100644 index 00000000000..0ed83125796 --- /dev/null +++ b/src/Analyzer/Passes/AnyFunctionPass.h @@ -0,0 +1,25 @@ +#include + +namespace DB +{ + +/** Rewrite 'any' and 'anyLast' functions pushing them inside original function. + * + * Example: any(f(x, y, g(z))) + * Result: f(any(x), any(y), g(any(z))) + */ +class AnyFunctionPass final : public IQueryTreePass +{ +public: + String getName() override { return "AnyFunction"; } + + String getDescription() override + { + return "Rewrite 'any' and 'anyLast' functions pushing them inside original function."; + } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index a6da2a66615..0ccf56c96c0 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -42,6 +42,7 @@ #include #include #include +#include namespace DB { @@ -278,6 +279,8 @@ void addQueryTreePasses(QueryTreePassManager & manager) manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); + + manager.addPass(std::make_unique()); } } From f8b4bbcd23ae20b032008a4d25d2787ea4ea11f1 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 14 Jul 2023 09:50:26 +0800 Subject: [PATCH 0095/1687] fix style --- src/Analyzer/Passes/AnyFunctionPass.cpp | 10 ++++------ src/Analyzer/Passes/AnyFunctionPass.h | 2 ++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp index bcec31eb851..1fbf3479d3d 100644 --- a/src/Analyzer/Passes/AnyFunctionPass.cpp +++ b/src/Analyzer/Passes/AnyFunctionPass.cpp @@ -28,11 +28,9 @@ public: if (!function_node) return; - auto is_any = [](const String & name) { return name == "any" || name == "anylast"; }; - /// check function is any - auto lower_function_name = Poco::toLower(function_node->getFunctionName()); - if (!is_any(lower_function_name)) + const auto & function_name = function_node->getFunctionName(); + if (!(function_name == "any" || function_name == "anyLast")) return; auto & arguments = function_node->getArguments().getNodes(); @@ -58,9 +56,9 @@ public: break; AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get(lower_function_name, {inside_argument->getResultType()}, {}, properties); + auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties); - auto any_function = std::make_shared(lower_function_name); + auto any_function = std::make_shared(function_name); any_function->resolveAsAggregateFunction(std::move(aggregate_function)); any_function->setAlias(inside_argument->getAlias()); diff --git a/src/Analyzer/Passes/AnyFunctionPass.h b/src/Analyzer/Passes/AnyFunctionPass.h index 0ed83125796..0cc65d238dd 100644 --- a/src/Analyzer/Passes/AnyFunctionPass.h +++ b/src/Analyzer/Passes/AnyFunctionPass.h @@ -1,3 +1,5 @@ +#pragma once + #include namespace DB From eb6c1cb549e53e0b181a024c943ea4c0ef8e593e Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 14 Jul 2023 16:01:17 +0800 Subject: [PATCH 0096/1687] add tests --- src/Analyzer/Passes/AnyFunctionPass.cpp | 23 +++- ...3_analyzer_push_any_to_functions.reference | 124 ++++++++++++++++++ .../02813_analyzer_push_any_to_functions.sql | 33 +++++ 3 files changed, 175 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference create mode 100644 tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp index 1fbf3479d3d..aada2d3a4a7 100644 --- a/src/Analyzer/Passes/AnyFunctionPass.cpp +++ b/src/Analyzer/Passes/AnyFunctionPass.cpp @@ -38,8 +38,10 @@ public: return; auto * inside_function_node = arguments[0]->as(); - /// check argument is a function - if (!inside_function_node) + + /// check argument is a function and can not be arrayJoin or lambda + if (!inside_function_node || inside_function_node->getFunctionName() == "arrayJoin" + || inside_function_node->getFunctionName() == "lambda") return; auto & inside_arguments = inside_function_node->getArguments().getNodes(); @@ -48,6 +50,12 @@ public: if (inside_arguments.empty()) return; + if (rewritten.count(node.get())) + { + node = rewritten.at(node.get()); + return; + } + /// checking done, rewrite function bool pushed = false; for (auto & inside_argument : inside_arguments) @@ -60,21 +68,26 @@ public: auto any_function = std::make_shared(function_name); any_function->resolveAsAggregateFunction(std::move(aggregate_function)); - any_function->setAlias(inside_argument->getAlias()); auto & any_function_arguments = any_function->getArguments().getNodes(); any_function_arguments.push_back(std::move(inside_argument)); - inside_argument = std::move(any_function); + inside_argument = std::move(any_function); pushed = true; } if (pushed) { - arguments[0]->setAlias(node->getAlias()); + rewritten.insert({node.get(), arguments[0]}); node = arguments[0]; } } + +private: + /// After query analysis alias will be rewritten to QueryTreeNode + /// whose memory address is same with the original one. + /// So we can reuse the rewritten one. + std::unordered_map rewritten; }; } diff --git a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference new file mode 100644 index 00000000000..025c04af1da --- /dev/null +++ b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference @@ -0,0 +1,124 @@ +-- { echoOn } +SET optimize_move_functions_out_of_any = 1; +EXPLAIN QUERY TREE SELECT any(number + number * 2) FROM numbers(1, 2); +QUERY id: 0 + PROJECTION COLUMNS + any(plus(number, multiply(number, 2))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: any, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + FUNCTION id: 10, function_name: any, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 7, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 2 + CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 15, constant_value: UInt64_2, constant_value_type: UInt8 +SELECT any(number + number * 2) FROM numbers(1, 2); +3 +EXPLAIN QUERY TREE SELECT anyLast(number + number * 2) FROM numbers(1, 2); +QUERY id: 0 + PROJECTION COLUMNS + anyLast(plus(number, multiply(number, 2))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + FUNCTION id: 10, function_name: anyLast, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 7, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 2 + CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 15, constant_value: UInt64_2, constant_value_type: UInt8 +SELECT anyLast(number + number * 2) FROM numbers(1, 2); +6 +EXPLAIN QUERY TREE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); +QUERY id: 0 + PROJECTION COLUMNS + x UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: any, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 7, table_function_name: numbers + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8 +WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); +3 +EXPLAIN QUERY TREE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); +QUERY id: 0 + PROJECTION COLUMNS + x UInt64 + x UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 7, table_function_name: numbers + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8 +SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); +6 6 +SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 } +SET optimize_move_functions_out_of_any = 0; +SELECT any(number + number * 2) FROM numbers(1, 2); +3 +SELECT anyLast(number + number * 2) FROM numbers(1, 2); +6 +WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); +3 +SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); +6 6 +SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 } diff --git a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql new file mode 100644 index 00000000000..c9707d10fde --- /dev/null +++ b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql @@ -0,0 +1,33 @@ +SET allow_experimental_analyzer = 1; + +-- { echoOn } +SET optimize_move_functions_out_of_any = 1; + +EXPLAIN QUERY TREE SELECT any(number + number * 2) FROM numbers(1, 2); +SELECT any(number + number * 2) FROM numbers(1, 2); + +EXPLAIN QUERY TREE SELECT anyLast(number + number * 2) FROM numbers(1, 2); +SELECT anyLast(number + number * 2) FROM numbers(1, 2); + +EXPLAIN QUERY TREE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); +WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); + +EXPLAIN QUERY TREE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); +SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); + +SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 } + + + +SET optimize_move_functions_out_of_any = 0; + +SELECT any(number + number * 2) FROM numbers(1, 2); + +SELECT anyLast(number + number * 2) FROM numbers(1, 2); + +WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); + +SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); + +SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 } +-- { echoOff } From a2dce9663e488841b8407e7556a0eb55da758790 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 14 Jul 2023 17:27:32 +0800 Subject: [PATCH 0097/1687] skip rewriting for lambda and arrayJoin --- src/Analyzer/Passes/AnyFunctionPass.cpp | 54 +++++++++++++++++++++---- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp index aada2d3a4a7..b785df7fb05 100644 --- a/src/Analyzer/Passes/AnyFunctionPass.cpp +++ b/src/Analyzer/Passes/AnyFunctionPass.cpp @@ -5,7 +5,9 @@ #include #include +#include #include +#include namespace DB { @@ -15,6 +17,39 @@ namespace class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContext { +private: + bool canRewrite(const FunctionNode * function_node) + { + for (auto & argument : function_node->getArguments().getNodes()) + { + /// arrayJoin() is special and should not be optimized (think about + /// it as a an aggregate function), otherwise wrong result will be + /// produced: + /// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number + /// ┌─number─┬─arrayJoin(array(array(), array()))─┐ + /// │ 0 │ [] │ + /// │ 0 │ [] │ + /// └────────┴────────────────────────────────────┘ + /// While should be: + /// ┌─number─┬─any(arrayJoin(array(array(), array())))─┐ + /// │ 0 │ [] │ + /// └────────┴─────────────────────────────────────────┘ + if (argument->as()) + return false; + + if (argument->as()) + return false; + + if (const auto * inside_function = argument->as()) + { + if (!canRewrite(inside_function)) + return false; + } + } + + return true; + } + public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; @@ -24,6 +59,12 @@ public: if (!getSettings().optimize_move_functions_out_of_any) return; + if (rewritten.count(node.get())) + { + node = rewritten.at(node.get()); + return; + } + auto * function_node = node->as(); if (!function_node) return; @@ -40,8 +81,11 @@ public: auto * inside_function_node = arguments[0]->as(); /// check argument is a function and can not be arrayJoin or lambda - if (!inside_function_node || inside_function_node->getFunctionName() == "arrayJoin" - || inside_function_node->getFunctionName() == "lambda") + if (!inside_function_node) + return; + + /// check arguments can not contain arrayJoin or lambda + if (!canRewrite(inside_function_node)) return; auto & inside_arguments = inside_function_node->getArguments().getNodes(); @@ -50,12 +94,6 @@ public: if (inside_arguments.empty()) return; - if (rewritten.count(node.get())) - { - node = rewritten.at(node.get()); - return; - } - /// checking done, rewrite function bool pushed = false; for (auto & inside_argument : inside_arguments) From cbd4358bac116cf7dd184b5f48978ae34c2d105e Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 14 Jul 2023 17:29:33 +0800 Subject: [PATCH 0098/1687] fix special build error --- src/Analyzer/Passes/AnyFunctionPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp index b785df7fb05..6aba5a6cfae 100644 --- a/src/Analyzer/Passes/AnyFunctionPass.cpp +++ b/src/Analyzer/Passes/AnyFunctionPass.cpp @@ -59,7 +59,7 @@ public: if (!getSettings().optimize_move_functions_out_of_any) return; - if (rewritten.count(node.get())) + if (rewritten.contains(node.get())) { node = rewritten.at(node.get()); return; From 7ae0c3425f712ea0ca7bc9e5fb2daa547132e149 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 14 Jul 2023 18:30:37 +0800 Subject: [PATCH 0099/1687] fix test error --- src/Analyzer/Passes/AnyFunctionPass.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp index 6aba5a6cfae..63221a4d197 100644 --- a/src/Analyzer/Passes/AnyFunctionPass.cpp +++ b/src/Analyzer/Passes/AnyFunctionPass.cpp @@ -59,12 +59,6 @@ public: if (!getSettings().optimize_move_functions_out_of_any) return; - if (rewritten.contains(node.get())) - { - node = rewritten.at(node.get()); - return; - } - auto * function_node = node->as(); if (!function_node) return; @@ -94,6 +88,12 @@ public: if (inside_arguments.empty()) return; + if (rewritten.contains(node.get())) + { + node = rewritten.at(node.get()); + return; + } + /// checking done, rewrite function bool pushed = false; for (auto & inside_argument : inside_arguments) From f1044386ddf0beb7e7f80668a53d815377078c32 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 14 Jul 2023 19:42:56 +0800 Subject: [PATCH 0100/1687] fix style --- src/Analyzer/Passes/AnyFunctionPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp index 63221a4d197..28de49cb9e9 100644 --- a/src/Analyzer/Passes/AnyFunctionPass.cpp +++ b/src/Analyzer/Passes/AnyFunctionPass.cpp @@ -20,7 +20,7 @@ class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContextgetArguments().getNodes()) + for (const auto & argument : function_node->getArguments().getNodes()) { /// arrayJoin() is special and should not be optimized (think about /// it as a an aggregate function), otherwise wrong result will be From 4cd6737cafb9a8c2a833fcf53905ae191c27e199 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 19 Jul 2023 15:42:47 +0800 Subject: [PATCH 0101/1687] little optimization --- src/Analyzer/Passes/AnyFunctionPass.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp index 28de49cb9e9..f361b89f022 100644 --- a/src/Analyzer/Passes/AnyFunctionPass.cpp +++ b/src/Analyzer/Passes/AnyFunctionPass.cpp @@ -15,7 +15,7 @@ namespace DB namespace { -class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContext +class AnyFunctionVisitor : public InDepthQueryTreeVisitor { private: bool canRewrite(const FunctionNode * function_node) @@ -51,14 +51,11 @@ private: } public: - using Base = InDepthQueryTreeVisitorWithContext; + using Base = InDepthQueryTreeVisitor; using Base::Base; void visitImpl(QueryTreeNodePtr & node) { - if (!getSettings().optimize_move_functions_out_of_any) - return; - auto * function_node = node->as(); if (!function_node) return; @@ -132,7 +129,10 @@ private: void AnyFunctionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - AnyFunctionVisitor visitor(std::move(context)); + if (!context->getSettings().optimize_move_functions_out_of_any) + return; + + AnyFunctionVisitor visitor; visitor.visit(query_tree_node); } From 98a30d635c456469ace74bb0b09db681bdd6c672 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 27 Jul 2023 17:46:01 +0800 Subject: [PATCH 0102/1687] remove rewritten --- src/Analyzer/Passes/AnyFunctionPass.cpp | 58 ++++++------------- ...3_analyzer_push_any_to_functions.reference | 34 +++++------ 2 files changed, 32 insertions(+), 60 deletions(-) diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp index f361b89f022..5fd6beec4d8 100644 --- a/src/Analyzer/Passes/AnyFunctionPass.cpp +++ b/src/Analyzer/Passes/AnyFunctionPass.cpp @@ -1,4 +1,4 @@ -#include "AnyFunctionPass.h" +#include #include #include @@ -7,7 +7,6 @@ #include #include #include -#include namespace DB { @@ -15,30 +14,27 @@ namespace DB namespace { -class AnyFunctionVisitor : public InDepthQueryTreeVisitor +class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContext { private: bool canRewrite(const FunctionNode * function_node) { for (const auto & argument : function_node->getArguments().getNodes()) { - /// arrayJoin() is special and should not be optimized (think about - /// it as a an aggregate function), otherwise wrong result will be - /// produced: + if (argument->as()) + return false; + + /// Function arrayJoin is special and should be skipped (think about it as a + /// an aggregate function), otherwise wrong result will be produced. + /// For example: /// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number /// ┌─number─┬─arrayJoin(array(array(), array()))─┐ /// │ 0 │ [] │ /// │ 0 │ [] │ /// └────────┴────────────────────────────────────┘ - /// While should be: - /// ┌─number─┬─any(arrayJoin(array(array(), array())))─┐ - /// │ 0 │ [] │ - /// └────────┴─────────────────────────────────────────┘ - if (argument->as()) - return false; - - if (argument->as()) - return false; + if (const auto * inside_function = argument->as()) + if (inside_function->getFunctionName() == "arrayJoin") + return false; if (const auto * inside_function = argument->as()) { @@ -51,11 +47,14 @@ private: } public: - using Base = InDepthQueryTreeVisitor; + using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; void visitImpl(QueryTreeNodePtr & node) { + if (!getSettings().optimize_move_functions_out_of_any) + return; + auto * function_node = node->as(); if (!function_node) return; @@ -71,7 +70,7 @@ public: auto * inside_function_node = arguments[0]->as(); - /// check argument is a function and can not be arrayJoin or lambda + /// check argument is a function if (!inside_function_node) return; @@ -85,14 +84,7 @@ public: if (inside_arguments.empty()) return; - if (rewritten.contains(node.get())) - { - node = rewritten.at(node.get()); - return; - } - /// checking done, rewrite function - bool pushed = false; for (auto & inside_argument : inside_arguments) { if (inside_argument->as()) /// skip constant node @@ -108,31 +100,17 @@ public: any_function_arguments.push_back(std::move(inside_argument)); inside_argument = std::move(any_function); - pushed = true; - } - - if (pushed) - { - rewritten.insert({node.get(), arguments[0]}); - node = arguments[0]; } + node = arguments[0]; } -private: - /// After query analysis alias will be rewritten to QueryTreeNode - /// whose memory address is same with the original one. - /// So we can reuse the rewritten one. - std::unordered_map rewritten; }; } void AnyFunctionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - if (!context->getSettings().optimize_move_functions_out_of_any) - return; - - AnyFunctionVisitor visitor; + AnyFunctionVisitor visitor(context); visitor.visit(query_tree_node); } diff --git a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference index 025c04af1da..3afb2cc353f 100644 --- a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference +++ b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference @@ -93,32 +93,26 @@ QUERY id: 0 FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64 ARGUMENTS LIST id: 5, nodes: 1 - COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 - CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 6, function_name: anyLast, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64 ARGUMENTS LIST id: 3, nodes: 2 FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64 ARGUMENTS LIST id: 5, nodes: 1 - COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 - CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 6, function_name: anyLast, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 JOIN TREE - TABLE_FUNCTION id: 7, table_function_name: numbers + TABLE_FUNCTION id: 9, table_function_name: numbers ARGUMENTS - LIST id: 9, nodes: 2 - CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 - CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8 + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8 SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); -6 6 -SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 } -SET optimize_move_functions_out_of_any = 0; -SELECT any(number + number * 2) FROM numbers(1, 2); -3 -SELECT anyLast(number + number * 2) FROM numbers(1, 2); -6 -WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); -3 -SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); -6 6 -SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 } From 3a831adcfba0e26d0c5dd4cbb9d75e0699003216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 27 Jul 2023 17:37:04 +0200 Subject: [PATCH 0103/1687] Improve retries on keeper session expiration --- .../MergeTree/ReplicatedMergeTreeSink.cpp | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 0db3464a637..b85bc3eaa17 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -305,16 +305,6 @@ ReplicatedMergeTreeSinkImpl::ReplicatedMergeTreeSinkImpl( template ReplicatedMergeTreeSinkImpl::~ReplicatedMergeTreeSinkImpl() = default; -/// Allow to verify that the session in ZooKeeper is still alive. -static void assertSessionIsNotExpired(const zkutil::ZooKeeperPtr & zookeeper) -{ - if (!zookeeper) - throw Exception(ErrorCodes::NO_ZOOKEEPER, "No ZooKeeper session."); - - if (zookeeper->expired()) - throw Exception(ErrorCodes::NO_ZOOKEEPER, "ZooKeeper session has been expired."); -} - template size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const ZooKeeperWithFaultInjectionPtr & zookeeper) { @@ -638,10 +628,16 @@ void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData: { /// NOTE: No delay in this case. That's Ok. auto origin_zookeeper = storage.getZooKeeper(); - assertSessionIsNotExpired(origin_zookeeper); auto zookeeper = std::make_shared(origin_zookeeper); - size_t replicas_num = checkQuorumPrecondition(zookeeper); + size_t replicas_num = 0; + ZooKeeperRetriesControl quorum_retries_ctl("checkQuorumPrecondition", zookeeper_retries_info, context->getProcessListElement()); + quorum_retries_ctl.retryLoop( + [&]() + { + zookeeper->setKeeper(storage.getZooKeeper()); + replicas_num = checkQuorumPrecondition(zookeeper); + }); Stopwatch watch; ProfileEventsScope profile_events_scope; @@ -1185,7 +1181,6 @@ template void ReplicatedMergeTreeSinkImpl::onFinish() { auto zookeeper = storage.getZooKeeper(); - assertSessionIsNotExpired(zookeeper); finishDelayedChunk(std::make_shared(zookeeper)); } From 5c217467e86e159666e6179ed38760ff6e136fd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 27 Jul 2023 17:45:15 +0200 Subject: [PATCH 0104/1687] Unify retries under checkQuorumPrecondition --- .../MergeTree/ReplicatedMergeTreeSink.cpp | 125 +++++++++--------- 1 file changed, 63 insertions(+), 62 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b85bc3eaa17..336c5d692e5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -311,65 +311,80 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const if (!isQuorumEnabled()) return 0; - quorum_info.status_path = storage.zookeeper_path + "/quorum/status"; + size_t replicas_number = 0; - Strings replicas = zookeeper->getChildren(fs::path(storage.zookeeper_path) / "replicas"); + ZooKeeperRetriesControl quorum_retries_ctl("checkQuorumPrecondition", zookeeper_retries_info, context->getProcessListElement()); + quorum_retries_ctl.retryLoop( + [&]() + { + zookeeper->setKeeper(storage.getZooKeeper()); - Strings exists_paths; - exists_paths.reserve(replicas.size()); - for (const auto & replica : replicas) - if (replica != storage.replica_name) - exists_paths.emplace_back(fs::path(storage.zookeeper_path) / "replicas" / replica / "is_active"); + quorum_info.status_path = storage.zookeeper_path + "/quorum/status"; - auto exists_result = zookeeper->exists(exists_paths); - auto get_results = zookeeper->get(Strings{storage.replica_path + "/is_active", storage.replica_path + "/host"}); + Strings replicas = zookeeper->getChildren(fs::path(storage.zookeeper_path) / "replicas"); - Coordination::Error keeper_error = Coordination::Error::ZOK; - size_t active_replicas = 1; /// Assume current replica is active (will check below) - for (size_t i = 0; i < exists_paths.size(); ++i) - { - auto error = exists_result[i].error; - if (error == Coordination::Error::ZOK) - ++active_replicas; - else if (Coordination::isHardwareError(error)) - keeper_error = error; - } + Strings exists_paths; + exists_paths.reserve(replicas.size()); + for (const auto & replica : replicas) + if (replica != storage.replica_name) + exists_paths.emplace_back(fs::path(storage.zookeeper_path) / "replicas" / replica / "is_active"); - size_t replicas_number = replicas.size(); - size_t quorum_size = getQuorumSize(replicas_number); + auto exists_result = zookeeper->exists(exists_paths); + auto get_results = zookeeper->get(Strings{storage.replica_path + "/is_active", storage.replica_path + "/host"}); - if (active_replicas < quorum_size) - { - if (Coordination::isHardwareError(keeper_error)) - throw Coordination::Exception("Failed to check number of alive replicas", keeper_error); + Coordination::Error keeper_error = Coordination::Error::ZOK; + size_t active_replicas = 1; /// Assume current replica is active (will check below) + for (size_t i = 0; i < exists_paths.size(); ++i) + { + auto error = exists_result[i].error; + if (error == Coordination::Error::ZOK) + ++active_replicas; + else if (Coordination::isHardwareError(error)) + keeper_error = error; + } - throw Exception(ErrorCodes::TOO_FEW_LIVE_REPLICAS, "Number of alive replicas ({}) is less than requested quorum ({}/{}).", - active_replicas, quorum_size, replicas_number); - } + replicas_number = replicas.size(); + size_t quorum_size = getQuorumSize(replicas_number); - /** Is there a quorum for the last part for which a quorum is needed? - * Write of all the parts with the included quorum is linearly ordered. - * This means that at any time there can be only one part, - * for which you need, but not yet reach the quorum. - * Information about this part will be located in `/quorum/status` node. - * If the quorum is reached, then the node is deleted. - */ + if (active_replicas < quorum_size) + { + if (Coordination::isHardwareError(keeper_error)) + throw Coordination::Exception("Failed to check number of alive replicas", keeper_error); - String quorum_status; - if (!quorum_parallel && zookeeper->tryGet(quorum_info.status_path, quorum_status)) - throw Exception(ErrorCodes::UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE, - "Quorum for previous write has not been satisfied yet. Status: {}", quorum_status); + throw Exception( + ErrorCodes::TOO_FEW_LIVE_REPLICAS, + "Number of alive replicas ({}) is less than requested quorum ({}/{}).", + active_replicas, + quorum_size, + replicas_number); + } - /// Both checks are implicitly made also later (otherwise there would be a race condition). + /** Is there a quorum for the last part for which a quorum is needed? + * Write of all the parts with the included quorum is linearly ordered. + * This means that at any time there can be only one part, + * for which you need, but not yet reach the quorum. + * Information about this part will be located in `/quorum/status` node. + * If the quorum is reached, then the node is deleted. + */ - auto is_active = get_results[0]; - auto host = get_results[1]; + String quorum_status; + if (!quorum_parallel && zookeeper->tryGet(quorum_info.status_path, quorum_status)) + throw Exception( + ErrorCodes::UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE, + "Quorum for previous write has not been satisfied yet. Status: {}", + quorum_status); - if (is_active.error == Coordination::Error::ZNONODE || host.error == Coordination::Error::ZNONODE) - throw Exception(ErrorCodes::READONLY, "Replica is not active right now"); + /// Both checks are implicitly made also later (otherwise there would be a race condition). - quorum_info.is_active_node_version = is_active.stat.version; - quorum_info.host_node_version = host.stat.version; + auto is_active = get_results[0]; + auto host = get_results[1]; + + if (is_active.error == Coordination::Error::ZNONODE || host.error == Coordination::Error::ZNONODE) + throw Exception(ErrorCodes::READONLY, "Replica is not active right now"); + + quorum_info.is_active_node_version = is_active.stat.version; + quorum_info.host_node_version = host.stat.version; + }); return replicas_number; } @@ -402,14 +417,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) * And also check that during the insertion, the replica was not reinitialized or disabled (by the value of `is_active` node). * TODO Too complex logic, you can do better. */ - size_t replicas_num = 0; - ZooKeeperRetriesControl quorum_retries_ctl("checkQuorumPrecondition", zookeeper_retries_info, context->getProcessListElement()); - quorum_retries_ctl.retryLoop( - [&]() - { - zookeeper->setKeeper(storage.getZooKeeper()); - replicas_num = checkQuorumPrecondition(zookeeper); - }); + size_t replicas_num = checkQuorumPrecondition(zookeeper); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -630,14 +638,7 @@ void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData: auto origin_zookeeper = storage.getZooKeeper(); auto zookeeper = std::make_shared(origin_zookeeper); - size_t replicas_num = 0; - ZooKeeperRetriesControl quorum_retries_ctl("checkQuorumPrecondition", zookeeper_retries_info, context->getProcessListElement()); - quorum_retries_ctl.retryLoop( - [&]() - { - zookeeper->setKeeper(storage.getZooKeeper()); - replicas_num = checkQuorumPrecondition(zookeeper); - }); + size_t replicas_num = checkQuorumPrecondition(zookeeper); Stopwatch watch; ProfileEventsScope profile_events_scope; From 05427c2983a17c2516de607277e8530b92bbf95a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 27 Jul 2023 18:31:02 +0200 Subject: [PATCH 0105/1687] Style --- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 336c5d692e5..3f442c42106 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -33,7 +33,6 @@ namespace ErrorCodes extern const int TOO_FEW_LIVE_REPLICAS; extern const int UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE; extern const int UNEXPECTED_ZOOKEEPER_ERROR; - extern const int NO_ZOOKEEPER; extern const int READONLY; extern const int UNKNOWN_STATUS_OF_INSERT; extern const int INSERT_WAS_DEDUPLICATED; From e2a3a0e37ad46189d930d37ac844bbc578cfa74a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 28 Jul 2023 13:33:14 +0200 Subject: [PATCH 0106/1687] Adapt test --- tests/integration/test_storage_kafka/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index d0686c7c36f..9f41b960364 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -2916,7 +2916,7 @@ def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster): # while materialized view is working to inject zookeeper failure pm.drop_instance_zk_connections(instance) instance.wait_for_log_line( - "Error.*(session has been expired|Connection loss).*while pushing to view" + "Error.*(session has been expired|Connection loss|Coordination::Exception).*while pushing to view" ) pm.heal_all() instance.wait_for_log_line("Committed offset 22") From fa74d6c69ace05076aaa0a57e626ece1feabc5a1 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 31 Jul 2023 17:01:27 +0000 Subject: [PATCH 0107/1687] Add hideRecursive() and hideElements() --- src/Common/Config/ConfigProcessor.cpp | 26 ++++++++++++++++++++++++++ src/Common/Config/ConfigProcessor.h | 3 +++ 2 files changed, 29 insertions(+) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 04f55600b40..cfbcd0bd258 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -254,6 +254,23 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) #endif +void ConfigProcessor::hideRecursive(Poco::XML::Node * config_root) +{ + for (Node * node = config_root->firstChild(); node; node = node->nextSibling()) + { + if (node->nodeType() == Node::ELEMENT_NODE) + { + Element & element = dynamic_cast(*node); + if (element.hasAttribute("hidden") && element.getAttribute("hidden") == "true") + { + config_root->removeChild(node); + } else + hideRecursive(node); + } + } +} + + void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root) { const NodeListPtr with_nodes = with_root->childNodes(); @@ -792,10 +809,19 @@ void ConfigProcessor::decryptEncryptedElements(LoadedConfig & loaded_config) #endif +void ConfigProcessor::hideElements(LoadedConfig & loaded_config) +{ + Node * config_root = getRootNode(loaded_config.preprocessed_xml.get()); + hideRecursive(config_root); + // loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml); +} + void ConfigProcessor::savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir) { try { + hideElements(loaded_config); + if (preprocessed_path.empty()) { fs::path preprocessed_configs_path("preprocessed_configs/"); diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index b4f85b10526..0d41372e796 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -142,6 +142,9 @@ private: void decryptEncryptedElements(LoadedConfig & loaded_config); #endif + void hideRecursive(Poco::XML::Node * config_root); + void hideElements(LoadedConfig & loaded_config); + void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); /// If config root node name is not 'clickhouse' and merging config's root node names doesn't match, bypasses merging and returns false. From e851be78bce89b5cf652c710714277824c4f85e8 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 31 Jul 2023 23:56:07 +0200 Subject: [PATCH 0108/1687] fix fs_error on attempt to read unavailable dirs --- src/Storages/StorageFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index cbd32460f7e..7e214b76e3d 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -122,7 +122,7 @@ void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls, return; const fs::directory_iterator end; - for (fs::directory_iterator it(path_for_ls); it != end; ++it) + for (fs::directory_iterator it(path_for_ls, std::filesystem::directory_options::skip_permission_denied); it != end; ++it) { const std::string full_path = it->path().string(); const size_t last_slash = full_path.rfind('/'); From c8d995e42e4e2595981fed21191a20da00f80192 Mon Sep 17 00:00:00 2001 From: UnamedRus Date: Tue, 1 Aug 2023 01:21:46 +0300 Subject: [PATCH 0109/1687] add optimize to bitmap write method --- src/AggregateFunctions/AggregateFunctionGroupBitmapData.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index 7ea1ebe7749..f92f8c1b5e5 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -151,6 +151,7 @@ public: } else if (BitmapKind::Bitmap == kind) { + roaring_bitmap->runOptimize(); auto size = roaring_bitmap->getSizeInBytes(); writeVarUInt(size, out); std::unique_ptr buf(new char[size]); From a71cd56a906a05fd31e878112b79f58676a8156e Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 1 Aug 2023 10:06:56 +0000 Subject: [PATCH 0110/1687] Output valid JSON/XML on excetpion during HTTP query execution --- docs/en/interfaces/http.md | 13 + src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/Formats/FormatFactory.cpp | 8 +- src/Formats/FormatSettings.h | 6 + src/Formats/JSONUtils.cpp | 6 + src/Formats/JSONUtils.h | 2 + src/IO/PeekableWriteBuffer.cpp | 85 ++++ src/IO/PeekableWriteBuffer.h | 59 +++ src/Interpreters/executeQuery.cpp | 18 +- src/Interpreters/executeQuery.h | 5 +- src/Processors/Formats/IOutputFormat.h | 8 + .../Impl/JSONColumnsBlockOutputFormatBase.cpp | 3 +- .../Impl/JSONColumnsBlockOutputFormatBase.h | 1 + .../JSONCompactEachRowRowOutputFormat.cpp | 16 +- .../Impl/JSONCompactEachRowRowOutputFormat.h | 8 +- .../Impl/JSONEachRowRowOutputFormat.cpp | 18 +- .../Formats/Impl/JSONEachRowRowOutputFormat.h | 9 +- .../Impl/JSONObjectEachRowRowOutputFormat.cpp | 7 + .../Formats/Impl/JSONRowOutputFormat.cpp | 13 +- .../Formats/Impl/JSONRowOutputFormat.h | 5 +- .../Impl/ParallelFormattingOutputFormat.cpp | 48 +- .../Impl/ParallelFormattingOutputFormat.h | 27 ++ .../Formats/Impl/XMLRowOutputFormat.cpp | 13 +- .../Formats/Impl/XMLRowOutputFormat.h | 5 +- .../OutputFormatWithExceptionHandlerAdaptor.h | 75 +++ .../OutputFormatWithUTF8ValidationAdaptor.h | 32 +- ...wOutputFormatWithExceptionHandlerAdaptor.h | 104 +++++ src/Server/HTTPHandler.cpp | 80 ++-- src/Server/HTTPHandler.h | 2 + ...d_json_and_xml_on_http_exception.reference | 432 ++++++++++++++++++ ...41_valid_json_and_xml_on_http_exception.sh | 106 +++++ 32 files changed, 1138 insertions(+), 78 deletions(-) create mode 100644 src/IO/PeekableWriteBuffer.cpp create mode 100644 src/IO/PeekableWriteBuffer.h create mode 100644 src/Processors/Formats/OutputFormatWithExceptionHandlerAdaptor.h create mode 100644 src/Processors/Formats/RowOutputFormatWithExceptionHandlerAdaptor.h create mode 100644 tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference create mode 100755 tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 37821f0fee1..b28180fec67 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -697,3 +697,16 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' Relative Path File * Connection #0 to host localhost left intact ``` + +## Valid JSON/XML response on exception during HTTP streaming {valid-output-on-exception-http-streaming} + +While query execution over HTTP an exception can happen when part of the data has already been sent. Usually an exception is sent to the client in plain text +even if some specific data format was used to output data and the output may become invalid in terms of specified data format. +To prevent it, you can use setting `http_write_exception_in_output_format` (enabled by default) that will tell ClickHouse to write an exception in specified format (currently supported for XML and JSON* formats). + +Examples: + +```bash + +``` + diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c69d132ea25..b8ba6454f61 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -276,6 +276,7 @@ class IColumn; \ M(UInt64, http_headers_progress_interval_ms, 100, "Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval.", 0) \ M(Bool, http_wait_end_of_query, false, "Enable HTTP response buffering on the server-side.", 0) \ + M(Bool, http_write_exception_in_output_format, true, "Write exception in output format to produce valid output. Works with JSON and XML formats.", 0) \ M(UInt64, http_response_buffer_size, 0, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \ \ M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 70b702f1b33..3172b246d68 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,6 +80,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.8", {{"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 663b7f1ba95..4cd2ad5be03 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -1,7 +1,7 @@ #include #include -#include +//#include #include #include #include @@ -224,6 +224,12 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) context->getRemoteHostFilter().checkURL(avro_schema_registry_url); } + if (context->getClientInfo().interface == ClientInfo::Interface::HTTP && context->getSettingsRef().http_write_exception_in_output_format.value) + { + format_settings.json.valid_output_on_exception = true; + format_settings.xml.valid_output_on_exception = true; + } + return format_settings; } diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 3259c46e5ff..a2ef0b035e9 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -198,6 +198,7 @@ struct FormatSettings bool validate_types_from_metadata = true; bool validate_utf8 = false; bool allow_object_type = false; + bool valid_output_on_exception = false; } json; struct @@ -399,6 +400,11 @@ struct FormatSettings { bool allow_types_conversion = true; } native; + + struct + { + bool valid_output_on_exception = false; + } xml; }; } diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 0aac72c68fe..aead2a3806a 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -451,6 +451,12 @@ namespace JSONUtils } } + void writeException(const String & exception_message, WriteBuffer & out, const FormatSettings & settings, size_t indent) + { + writeTitle("exception", out, indent, " "); + writeJSONString(exception_message, out, settings); + } + Strings makeNamesValidJSONStrings(const Strings & names, const FormatSettings & settings, bool validate_utf8) { Strings result; diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index fd1ba7db980..c023125ce66 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -105,6 +105,8 @@ namespace JSONUtils bool write_statistics, WriteBuffer & out); + void writeException(const String & exception_message, WriteBuffer & out, const FormatSettings & settings, size_t indent = 0); + void skipColon(ReadBuffer & in); void skipComma(ReadBuffer & in); diff --git a/src/IO/PeekableWriteBuffer.cpp b/src/IO/PeekableWriteBuffer.cpp new file mode 100644 index 00000000000..dc7f87dd539 --- /dev/null +++ b/src/IO/PeekableWriteBuffer.cpp @@ -0,0 +1,85 @@ +#include + +namespace DB +{ + +PeekableWriteBuffer::PeekableWriteBuffer(DB::WriteBuffer & sub_buf_) : BufferWithOwnMemory(0), sub_buf(sub_buf_) +{ + Buffer & sub_working = sub_buf.buffer(); + BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset()); +} + +void PeekableWriteBuffer::nextImpl() +{ + if (checkpoint) + { + if (write_to_own_memory) + { + size_t prev_size = position() - memory.data(); + size_t new_size = memory.size() * 2; + memory.resize(new_size); + BufferBase::set(memory.data(), memory.size(), prev_size); + return; + } + + if (memory.size() == 0) + memory.resize(DBMS_DEFAULT_BUFFER_SIZE); + + sub_buf.position() = position(); + BufferBase::set(memory.data(), memory.size(), 0); + write_to_own_memory = true; + return; + } + + sub_buf.position() = position(); + sub_buf.next(); + BufferBase::set(sub_buf.buffer().begin(), sub_buf.buffer().size(), sub_buf.offset()); +} + + +void PeekableWriteBuffer::dropCheckpoint() +{ + assert(checkpoint); + checkpoint = std::nullopt; + /// If we have saved data in own memory, write it to sub-buf. + if (write_to_own_memory) + { + try + { + sub_buf.next(); + sub_buf.write(memory.data(), position() - memory.data()); + Buffer & sub_working = sub_buf.buffer(); + BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset()); + write_to_own_memory = false; + } + catch (...) + { + /// If exception happened during writing to sub buffer, we should + /// update buffer to not leave it in invalid state. + Buffer & sub_working = sub_buf.buffer(); + BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset()); + write_to_own_memory = false; + } + } + +} + +void PeekableWriteBuffer::rollbackToCheckpoint(bool drop) +{ + assert(checkpoint); + + /// Just ignore all data written after checkpoint. + if (write_to_own_memory) + { + Buffer & sub_working = sub_buf.buffer(); + BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset()); + write_to_own_memory = false; + } + + position() = *checkpoint; + + if (drop) + checkpoint = std::nullopt; +} + +} diff --git a/src/IO/PeekableWriteBuffer.h b/src/IO/PeekableWriteBuffer.h new file mode 100644 index 00000000000..e7094f11fcb --- /dev/null +++ b/src/IO/PeekableWriteBuffer.h @@ -0,0 +1,59 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/// Similar to PeekableReadBuffer. +/// Allows to set checkpoint at some position in stream and come back to this position later. +/// When next() is called, saves data between checkpoint and current position to own memory instead of writing it to sub-buffer. +/// So, all the data after checkpoint won't be written in sub-buffer until checkpoint is dropped. +/// Rollback to checkpoint means that all data after checkpoint will be ignored and not sent to sub-buffer. +/// Sub-buffer should not be accessed directly during the lifetime of peekable buffer (unless +/// you reset() the state of peekable buffer after each change of underlying buffer) +/// If position() of peekable buffer is explicitly set to some position before checkpoint +/// (e.g. by istr.position() = prev_pos), behavior is undefined. +class PeekableWriteBuffer : public BufferWithOwnMemory +{ + friend class PeekableWriteBufferCheckpoint; +public: + explicit PeekableWriteBuffer(WriteBuffer & sub_buf_); + + /// Sets checkpoint at current position + ALWAYS_INLINE inline void setCheckpoint() + { + if (checkpoint) + throw Exception(ErrorCodes::LOGICAL_ERROR, "PeekableWriteBuffer does not support recursive checkpoints."); + + checkpoint.emplace(pos); + } + + /// Forget checkpoint and send all data from checkpoint to position to sub-buffer. + void dropCheckpoint(); + + /// Sets position at checkpoint and forget all data written from checkpoint to position. + /// All pointers (such as this->buffer().end()) may be invalidated + void rollbackToCheckpoint(bool drop = false); + + void finalizeImpl() override + { + assert(!checkpoint); + sub_buf.position() = position(); + } + +private: + void nextImpl() override; + + WriteBuffer & sub_buf; + bool write_to_own_memory = false; + std::optional checkpoint = std::nullopt; +}; + +} diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 578ca3b41f9..651305d9c52 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1250,7 +1250,8 @@ void executeQuery( bool allow_into_outfile, ContextMutablePtr context, SetResultDetailsFunc set_result_details, - const std::optional & output_format_settings) + const std::optional & output_format_settings, + HandleExceptionInOutputFormatFunc handle_exception_in_output_format) { PODArray parse_buf; const char * begin; @@ -1308,6 +1309,7 @@ void executeQuery( ASTPtr ast; BlockIO streams; + OutputFormatPtr output_format; std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, &istr); auto & pipeline = streams.pipeline; @@ -1350,30 +1352,30 @@ void executeQuery( ? getIdentifierName(ast_query_with_output->format) : context->getDefaultFormat(); - auto out = FormatFactory::instance().getOutputFormatParallelIfPossible( + output_format = FormatFactory::instance().getOutputFormatParallelIfPossible( format_name, compressed_buffer ? *compressed_buffer : *out_buf, materializeBlock(pipeline.getHeader()), context, output_format_settings); - out->setAutoFlush(); + output_format->setAutoFlush(); /// Save previous progress callback if any. TODO Do it more conveniently. auto previous_progress_callback = context->getProgressCallback(); /// NOTE Progress callback takes shared ownership of 'out'. - pipeline.setProgressCallback([out, previous_progress_callback] (const Progress & progress) + pipeline.setProgressCallback([output_format, previous_progress_callback] (const Progress & progress) { if (previous_progress_callback) previous_progress_callback(progress); - out->onProgress(progress); + output_format->onProgress(progress); }); - result_details.content_type = out->getContentType(); + result_details.content_type = output_format->getContentType(); result_details.format = format_name; - pipeline.complete(std::move(out)); + pipeline.complete(output_format); } else { @@ -1403,6 +1405,8 @@ void executeQuery( } catch (...) { + if (handle_exception_in_output_format && output_format) + handle_exception_in_output_format(*output_format); streams.onException(); throw; } diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index f2a12bbef18..11ef17aaade 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -15,6 +15,7 @@ namespace DB class IInterpreter; class ReadBuffer; class WriteBuffer; +class IOutputFormat; struct QueryStatusInfo; struct QueryResultDetails @@ -26,6 +27,7 @@ struct QueryResultDetails }; using SetResultDetailsFunc = std::function; +using HandleExceptionInOutputFormatFunc = std::function; /// Parse and execute a query. void executeQuery( @@ -34,7 +36,8 @@ void executeQuery( bool allow_into_outfile, /// If true and the query contains INTO OUTFILE section, redirect output to that file. ContextMutablePtr context, /// DB, tables, data types, storage engines, functions, aggregate functions... SetResultDetailsFunc set_result_details, /// If a non-empty callback is passed, it will be called with the query id, the content-type, the format, and the timezone. - const std::optional & output_format_settings = std::nullopt /// Format settings for output format, will be calculated from the context if not set. + const std::optional & output_format_settings = std::nullopt, /// Format settings for output format, will be calculated from the context if not set. + HandleExceptionInOutputFormatFunc handle_exception_in_output_format = {} /// If a non-empty callback is passed, it will be called on exception with created output format. ); diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 58700a978ff..cae2ab7691e 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -71,6 +71,9 @@ public: consumeExtremes(Chunk(extremes.getColumns(), extremes.rows())); } + virtual bool supportsWritingException() const { return false; } + virtual void setException(const String & /*exception_message*/) {} + size_t getResultRows() const { return result_rows; } size_t getResultBytes() const { return result_bytes; } @@ -162,6 +165,11 @@ protected: /// outputs them in finalize() method. virtual bool areTotalsAndExtremesUsedInFinalize() const { return false; } + /// Derived classes can use some wrappers around out WriteBuffer + /// and can override this method to return wrapper + /// that should be used in its derived classes. + virtual WriteBuffer * getWriteBufferPtr() { return &out; } + WriteBuffer & out; Chunk current_chunk; diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp index 490516b7eb4..72a009c20bf 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp @@ -9,10 +9,11 @@ namespace DB JSONColumnsBlockOutputFormatBase::JSONColumnsBlockOutputFormatBase( WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool validate_utf8) - : OutputFormatWithUTF8ValidationAdaptor(validate_utf8, header_, out_) + : OutputFormatWithUTF8ValidationAdaptor(header_, out_, validate_utf8) , format_settings(format_settings_) , serializations(header_.getSerializations()) { + ostr = OutputFormatWithUTF8ValidationAdaptor::getWriteBufferPtr(); } void JSONColumnsBlockOutputFormatBase::consume(Chunk chunk) diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h index 235a6d4da96..d73ac53b97a 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h @@ -38,6 +38,7 @@ protected: Chunk mono_chunk; size_t written_rows = 0; + WriteBuffer * ostr; }; } diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp index 0cafc053467..c5c9af60982 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp @@ -15,12 +15,13 @@ JSONCompactEachRowRowOutputFormat::JSONCompactEachRowRowOutputFormat(WriteBuffer bool with_names_, bool with_types_, bool yield_strings_) - : RowOutputFormatWithUTF8ValidationAdaptor(settings_.json.validate_utf8, header_, out_) + : RowOutputFormatWithExceptionHandlerAdaptor(header_, out_, settings_.json.valid_output_on_exception, settings_.json.validate_utf8) , settings(settings_) , with_names(with_names_) , with_types(with_types_) , yield_strings(yield_strings_) { + ostr = RowOutputFormatWithExceptionHandlerAdaptor::getWriteBufferPtr(); } @@ -102,6 +103,19 @@ void JSONCompactEachRowRowOutputFormat::consumeTotals(DB::Chunk chunk) IRowOutputFormat::consumeTotals(std::move(chunk)); } +void JSONCompactEachRowRowOutputFormat::writeSuffix() +{ + if (!exception_message.empty()) + { + if (haveWrittenData()) + writeRowBetweenDelimiter(); + + writeRowStartDelimiter(); + writeJSONString(exception_message, *ostr, settings); + writeRowEndDelimiter(); + } +} + void registerOutputFormatJSONCompactEachRow(FormatFactory & factory) { for (bool yield_strings : {false, true}) diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h index 2be39669dd2..a05fff699a5 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h @@ -3,15 +3,16 @@ #include #include #include +#include #include namespace DB { -/** The stream for outputting data in JSON format, by object per line. +/** The stream for outputting data in JSON format, by JSON array per line. */ -class JSONCompactEachRowRowOutputFormat final : public RowOutputFormatWithUTF8ValidationAdaptor +class JSONCompactEachRowRowOutputFormat final : public RowOutputFormatWithExceptionHandlerAdaptor { public: JSONCompactEachRowRowOutputFormat( @@ -33,6 +34,7 @@ private: void writeFieldDelimiter() override; void writeRowStartDelimiter() override; void writeRowEndDelimiter() override; + void writeSuffix() override; bool supportTotals() const override { return true; } void consumeTotals(Chunk) override; @@ -43,5 +45,7 @@ private: bool with_names; bool with_types; bool yield_strings; + + WriteBuffer * ostr; }; } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp index 5b8f6cc1af7..2169d815fbf 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -14,10 +15,12 @@ JSONEachRowRowOutputFormat::JSONEachRowRowOutputFormat( const Block & header_, const FormatSettings & settings_, bool pretty_json_) - : RowOutputFormatWithUTF8ValidationAdaptor(settings_.json.validate_utf8, header_, out_), - pretty_json(pretty_json_), - settings(settings_) + : RowOutputFormatWithExceptionHandlerAdaptor( + header_, out_, settings_.json.valid_output_on_exception, settings_.json.validate_utf8) + , pretty_json(pretty_json_) + , settings(settings_) { + ostr = RowOutputFormatWithExceptionHandlerAdaptor::getWriteBufferPtr(); fields = JSONUtils::makeNamesValidJSONStrings(getPort(PortKind::Main).getHeader().getNames(), settings, settings.json.validate_utf8); } @@ -76,6 +79,15 @@ void JSONEachRowRowOutputFormat::writePrefix() void JSONEachRowRowOutputFormat::writeSuffix() { + if (!exception_message.empty()) + { + if (haveWrittenData()) + writeRowBetweenDelimiter(); + writeRowStartDelimiter(); + JSONUtils::writeException(exception_message, *ostr, settings, pretty_json ? 1 : 0); + writeRowEndDelimiter(); + } + if (settings.json.array_of_rows) writeCString("\n]\n", *ostr); } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h index e05d189afe9..28bfbf2e6ac 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h @@ -2,7 +2,9 @@ #include #include +#include #include +#include #include @@ -11,7 +13,7 @@ namespace DB /** The stream for outputting data in JSON format, by object per line. */ -class JSONEachRowRowOutputFormat : public RowOutputFormatWithUTF8ValidationAdaptor +class JSONEachRowRowOutputFormat : public RowOutputFormatWithExceptionHandlerAdaptor { public: JSONEachRowRowOutputFormat( @@ -40,10 +42,11 @@ protected: size_t field_number = 0; bool pretty_json; + FormatSettings settings; + WriteBuffer * ostr; + private: Names fields; - - FormatSettings settings; }; } diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp index a02199d6075..8f4d11a604a 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp @@ -62,6 +62,13 @@ void JSONObjectEachRowRowOutputFormat::writeRowBetweenDelimiter() void JSONObjectEachRowRowOutputFormat::writeSuffix() { + if (!exception_message.empty()) + { + if (haveWrittenData()) + writeRowBetweenDelimiter(); + JSONUtils::writeException(exception_message, *ostr, settings, 1); + } + JSONUtils::writeObjectEnd(*ostr); writeChar('\n', *ostr); } diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index 0193ec7e3d3..e4c4e2a3bc6 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -13,9 +13,10 @@ JSONRowOutputFormat::JSONRowOutputFormat( const Block & header, const FormatSettings & settings_, bool yield_strings_) - : RowOutputFormatWithUTF8ValidationAdaptor(true, header, out_), settings(settings_), yield_strings(yield_strings_) + : RowOutputFormatWithExceptionHandlerAdaptor(header, out_, settings_.json.valid_output_on_exception, true), settings(settings_), yield_strings(yield_strings_) { names = JSONUtils::makeNamesValidJSONStrings(header.getNames(), settings, true); + ostr = RowOutputFormatWithExceptionHandlerAdaptor::getWriteBufferPtr(); } @@ -117,9 +118,15 @@ void JSONRowOutputFormat::finalizeImpl() statistics.applied_limit, statistics.watch, statistics.progress, - settings.write_statistics, + settings.write_statistics && exception_message.empty(), *ostr); + if (!exception_message.empty()) + { + writeCString(",\n\n", *ostr); + JSONUtils::writeException(exception_message, *ostr, settings, 1); + } + JSONUtils::writeObjectEnd(*ostr); writeChar('\n', *ostr); ostr->next(); @@ -127,7 +134,7 @@ void JSONRowOutputFormat::finalizeImpl() void JSONRowOutputFormat::resetFormatterImpl() { - RowOutputFormatWithUTF8ValidationAdaptor::resetFormatterImpl(); + RowOutputFormatWithExceptionHandlerAdaptor::resetFormatterImpl(); row_count = 0; statistics = Statistics(); } diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.h b/src/Processors/Formats/Impl/JSONRowOutputFormat.h index dc3f0541af0..a38cd0e8db9 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.h @@ -3,8 +3,10 @@ #include #include #include +#include #include #include +#include #include @@ -13,7 +15,7 @@ namespace DB /** Stream for output data in JSON format. */ -class JSONRowOutputFormat : public RowOutputFormatWithUTF8ValidationAdaptor +class JSONRowOutputFormat : public RowOutputFormatWithExceptionHandlerAdaptor { public: JSONRowOutputFormat( @@ -69,6 +71,7 @@ protected: FormatSettings settings; bool yield_strings; + WriteBuffer * ostr; }; } diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp index 46fe2ba26a8..3e63e2abd6c 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp @@ -8,7 +8,6 @@ namespace DB void ParallelFormattingOutputFormat::finalizeImpl() { need_flush = true; - IOutputFormat::finalized = true; /// Don't throw any background_exception here, because we want to finalize the execution. /// Exception will be checked after main thread is finished. addChunk(Chunk{}, ProcessingUnitType::FINALIZE, /*can_throw_exception*/ false); @@ -24,8 +23,29 @@ namespace DB std::lock_guard lock(mutex); if (background_exception) - std::rethrow_exception(background_exception); + { + collector_finished.set(); + rethrowBackgroundException(); + } } + + if (collected_prefix && collected_suffix && collected_finalize) + return; + + auto formatter = internal_formatter_creator(out); + formatter->setRowsReadBefore(rows_collected); + formatter->setException(exception_message); + + if (!collected_prefix) + formatter->writePrefix(); + + if (!collected_suffix) + formatter->writeSuffix(); + + if (!collected_finalize) + formatter->finalizeImpl(); + + formatter->finalizeBuffers(); } void ParallelFormattingOutputFormat::addChunk(Chunk chunk, ProcessingUnitType type, bool can_throw_exception) @@ -33,7 +53,7 @@ namespace DB { std::lock_guard lock(mutex); if (background_exception && can_throw_exception) - std::rethrow_exception(background_exception); + rethrowBackgroundException(); } const auto current_unit_number = writer_unit_number % processing_units.size(); @@ -62,7 +82,10 @@ namespace DB size_t first_row_num = rows_consumed; if (unit.type == ProcessingUnitType::PLAIN) + { rows_consumed += unit.chunk.getNumRows(); + unit.rows_num = unit.chunk.getNumRows(); + } scheduleFormatterThreadForUnitWithNumber(current_unit_number, first_row_num); ++writer_unit_number; @@ -125,7 +148,7 @@ namespace DB assert(unit.status == READY_TO_READ); /// Use this copy to after notification to stop the execution. - auto copy_if_unit_type = unit.type; + auto copy_of_unit_type = unit.type; /// Do main work here. out.write(unit.segment.data(), unit.actual_memory_size); @@ -134,6 +157,7 @@ namespace DB IOutputFormat::flush(); ++collector_unit_number; + rows_collected += unit.rows_num; { /// Notify other threads. @@ -141,9 +165,19 @@ namespace DB unit.status = READY_TO_INSERT; writer_condvar.notify_all(); } - /// We can exit only after writing last piece of to out buffer. - if (copy_if_unit_type == ProcessingUnitType::FINALIZE) + + if (copy_of_unit_type == ProcessingUnitType::START) { + collected_prefix = true; + } + else if (copy_of_unit_type == ProcessingUnitType::PLAIN_FINISH) + { + collected_suffix = true; + } + /// We can exit only after writing last piece of data to out buffer. + else if (copy_of_unit_type == ProcessingUnitType::FINALIZE) + { + collected_finalize = true; break; } } @@ -156,7 +190,6 @@ namespace DB } } - void ParallelFormattingOutputFormat::formatterThreadFunction(size_t current_unit_number, size_t first_row_num, const ThreadGroupPtr & thread_group) { SCOPE_EXIT_SAFE( @@ -184,6 +217,7 @@ namespace DB auto formatter = internal_formatter_creator(out_buffer); formatter->setRowsReadBefore(first_row_num); + formatter->setException(exception_message); switch (unit.type) { diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h index 490f033b87e..b9a3b7638fa 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h @@ -142,6 +142,14 @@ public: return internal_formatter_creator(buffer)->getContentType(); } + bool supportsWritingException() const override + { + WriteBufferFromOwnString buffer; + return internal_formatter_creator(buffer)->supportsWritingException(); + } + + void setException(const String & exception_message_) override { exception_message = exception_message_; } + private: void consume(Chunk chunk) override final { @@ -214,6 +222,7 @@ private: Memory<> segment; size_t actual_memory_size{0}; Statistics statistics; + size_t rows_num; }; Poco::Event collector_finished{}; @@ -241,12 +250,19 @@ private: std::condition_variable writer_condvar; size_t rows_consumed = 0; + size_t rows_collected = 0; std::atomic_bool are_totals_written = false; /// We change statistics in onProgress() which can be called from different threads. std::mutex statistics_mutex; bool save_totals_and_extremes_in_statistics; + String exception_message; + bool exception_is_rethrown = false; + bool collected_prefix = false; + bool collected_suffix = false; + bool collected_finalize = false; + void finishAndWait(); void onBackgroundException() @@ -261,6 +277,17 @@ private: collector_condvar.notify_all(); } + void rethrowBackgroundException() + { + /// Rethrow background exception only once, because + /// OutputFormat can be used after it to write an exception. + if (!exception_is_rethrown) + { + exception_is_rethrown = true; + std::rethrow_exception(background_exception); + } + } + void scheduleFormatterThreadForUnitWithNumber(size_t ticket_number, size_t first_row_num) { pool.scheduleOrThrowOnError([this, thread_group = CurrentThread::getGroup(), ticket_number, first_row_num] diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp index 1d6fb62275c..eb735cc93aa 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp @@ -8,8 +8,9 @@ namespace DB { XMLRowOutputFormat::XMLRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : RowOutputFormatWithUTF8ValidationAdaptor(true, header_, out_), fields(header_.getNamesAndTypes()), format_settings(format_settings_) + : RowOutputFormatWithExceptionHandlerAdaptor(header_, out_, true, format_settings_.xml.valid_output_on_exception), fields(header_.getNamesAndTypes()), format_settings(format_settings_) { + ostr = RowOutputFormatWithExceptionHandlerAdaptor::getWriteBufferPtr(); const auto & sample = getPort(PortKind::Main).getHeader(); field_tag_names.resize(sample.columns()); @@ -191,7 +192,9 @@ void XMLRowOutputFormat::finalizeImpl() writeRowsBeforeLimitAtLeast(); - if (format_settings.write_statistics) + if (!exception_message.empty()) + writeException(); + else if (format_settings.write_statistics) writeStatistics(); writeCString("\n", *ostr); @@ -230,6 +233,12 @@ void XMLRowOutputFormat::writeStatistics() writeCString("\t\n", *ostr); } +void XMLRowOutputFormat::writeException() +{ + writeCString("\t", *ostr); + writeXMLStringForTextElement(exception_message, *ostr); + writeCString("\n", *ostr); +} void registerOutputFormatXML(FormatFactory & factory) { diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.h b/src/Processors/Formats/Impl/XMLRowOutputFormat.h index e25e7129109..daf03539d0b 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.h +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -13,7 +14,7 @@ namespace DB /** A stream for outputting data in XML format. */ -class XMLRowOutputFormat final : public RowOutputFormatWithUTF8ValidationAdaptor +class XMLRowOutputFormat final : public RowOutputFormatWithExceptionHandlerAdaptor { public: XMLRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); @@ -56,6 +57,7 @@ private: void writeExtremesElement(const char * title, const Columns & columns, size_t row_num); void writeRowsBeforeLimitAtLeast(); void writeStatistics(); + void writeException(); size_t field_number = 0; size_t row_count = 0; @@ -63,6 +65,7 @@ private: Names field_tag_names; const FormatSettings format_settings; + WriteBuffer * ostr; }; } diff --git a/src/Processors/Formats/OutputFormatWithExceptionHandlerAdaptor.h b/src/Processors/Formats/OutputFormatWithExceptionHandlerAdaptor.h new file mode 100644 index 00000000000..bb318dae81e --- /dev/null +++ b/src/Processors/Formats/OutputFormatWithExceptionHandlerAdaptor.h @@ -0,0 +1,75 @@ +#pragma once + +#include +#include + +#include +#include + +namespace DB +{ + +template +class RowOutputFormatWithExceptionHandlerAdaptorBase : public Base +{ +public: + RowOutputFormatWithExceptionHandlerAdaptorBase(bool handle_exceptions, const Block & header, WriteBuffer & out_, Args... args) + : Base(header, out_, std::forward(args)...) + { + if (handle_exceptions) + peekable_out = std::make_unique(*Base::getWriteBufferPtr()); + } + + void write(const Columns & columns, size_t row_num) + { + if (!peekable_out) + Base::write(columns, row_num); + + + PeekableWriteBufferCheckpoint checkpoint(*peekable_out); + try + { + Base::write(columns, row_num); + } + catch (...) + { + peekable_out->rollbackToCheckpoint(); + throw; + } + } + + void flush() override + { + getWriteBufferPtr()->next(); + + if (peekable_out) + Base::getWriteBufferPtr()->next(); + } + + void finalizeBuffers() override + { + if (peekable_out) + peekable_out->finalize(); + } + + void resetFormatterImpl() override + { + peekable_out = std::make_unique(*Base::getWriteBufferPtr()); + } + +protected: + /// Returns buffer that should be used in derived classes instead of out. + WriteBuffer * getWriteBufferPtr() override + { + if (peekable_out) + peekable_out.get(); + return Base::getWriteBufferPtr(); + } + +private: + + std::unique_ptr peekable_out; +}; + +} + diff --git a/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h b/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h index 8d8fb9ef0c6..f86ff278b33 100644 --- a/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h +++ b/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h @@ -9,12 +9,12 @@ namespace DB { -template +template class OutputFormatWithUTF8ValidationAdaptorBase : public Base { public: - OutputFormatWithUTF8ValidationAdaptorBase(bool validate_utf8, const Block & header, WriteBuffer & out_, Args... args) - : Base(header, out_, std::forward(args)...) + OutputFormatWithUTF8ValidationAdaptorBase(const Block & header, WriteBuffer & out_, bool validate_utf8) + : Base(header, out_) { bool values_can_contain_invalid_utf8 = false; for (const auto & type : this->getPort(IOutputFormat::PortKind::Main).getHeader().getDataTypes()) @@ -24,37 +24,37 @@ public: } if (validate_utf8 && values_can_contain_invalid_utf8) - { - validating_ostr = std::make_unique(this->out); - ostr = validating_ostr.get(); - } - else - ostr = &this->out; + validating_ostr = std::make_unique(*Base::getWriteBufferPtr()); } void flush() override { - ostr->next(); - if (validating_ostr) - this->out.next(); + validating_ostr->next(); + Base::flush(); } void finalizeBuffers() override { if (validating_ostr) validating_ostr->finalize(); + Base::finalizeBuffers(); } void resetFormatterImpl() override { - validating_ostr = std::make_unique(this->out); - ostr = validating_ostr.get(); + Base::resetFormatterImpl(); + validating_ostr = std::make_unique(*Base::getWriteBufferPtr()); } protected: - /// Point to validating_ostr or out from IOutputFormat, should be used in derived classes instead of out. - WriteBuffer * ostr; + /// Returns buffer that should be used in derived classes instead of out. + WriteBuffer * getWriteBufferPtr() override + { + if (validating_ostr) + return validating_ostr.get(); + return Base::getWriteBufferPtr(); + } private: /// Validates UTF-8 sequences, replaces bad sequences with replacement character. diff --git a/src/Processors/Formats/RowOutputFormatWithExceptionHandlerAdaptor.h b/src/Processors/Formats/RowOutputFormatWithExceptionHandlerAdaptor.h new file mode 100644 index 00000000000..4e797c521c0 --- /dev/null +++ b/src/Processors/Formats/RowOutputFormatWithExceptionHandlerAdaptor.h @@ -0,0 +1,104 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace DB +{ + +template +class RowOutputFormatWithExceptionHandlerAdaptor : public Base +{ +public: + RowOutputFormatWithExceptionHandlerAdaptor(const Block & header, WriteBuffer & out_, bool handle_exceptions, Args... args) + : Base(header, out_, std::forward(args)...) + { + if (handle_exceptions) + peekable_out = std::make_unique(*Base::getWriteBufferPtr()); + } + + void consume(DB::Chunk chunk) override + { + if (!peekable_out) + { + Base::consume(std::move(chunk)); + return; + } + + auto num_rows = chunk.getNumRows(); + const auto & columns = chunk.getColumns(); + + for (size_t row = 0; row < num_rows; ++row) + { + /// It's important to set a checkpoint before writing row-between delimiter + peekable_out->setCheckpoint(); + + if (Base::haveWrittenData()) + writeRowBetweenDelimiter(); + + try + { + write(columns, row); + } + catch (...) + { + peekable_out->rollbackToCheckpoint(/*drop=*/true); + throw; + } + peekable_out->dropCheckpoint(); + + Base::first_row = false; + } + } + + void write(const Columns & columns, size_t row_num) override { Base::write(columns, row_num); } + void writeRowBetweenDelimiter() override { Base::writeRowBetweenDelimiter(); } + + void flush() override + { + if (peekable_out) + peekable_out->next(); + + Base::flush(); + } + + void finalizeBuffers() override + { + if (peekable_out) + peekable_out->finalize(); + Base::finalizeBuffers(); + } + + void resetFormatterImpl() override + { + Base::resetFormatterImpl(); + peekable_out = std::make_unique(*Base::getWriteBufferPtr()); + } + + bool supportsWritingException() const override { return true; } + + void setException(const String & exception_message_) override { exception_message = exception_message_; } + +protected: + /// Returns buffer that should be used in derived classes instead of out. + WriteBuffer * getWriteBufferPtr() override + { + if (peekable_out) + return peekable_out.get(); + return Base::getWriteBufferPtr(); + } + + String exception_message; + +private: + + std::unique_ptr peekable_out; +}; + +} + diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index a0bfcd49dfd..a5102ea9383 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include @@ -835,23 +837,40 @@ void HTTPHandler::processQuery( customizeContext(request, context, *in_post_maybe_compressed); in = has_external_data ? std::move(in_param) : std::make_unique(*in_param, *in_post_maybe_compressed); - executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context, - [&response, this] (const QueryResultDetails & details) + auto set_query_result = [&response, this] (const QueryResultDetails & details) + { + response.add("X-ClickHouse-Query-Id", details.query_id); + + if (content_type_override) + response.setContentType(*content_type_override); + else if (details.content_type) + response.setContentType(*details.content_type); + + if (details.format) + response.add("X-ClickHouse-Format", *details.format); + + if (details.timezone) + response.add("X-ClickHouse-Timezone", *details.timezone); + }; + + auto handle_exception_in_output_format = [&](IOutputFormat & output_format) + { + if (settings.http_write_exception_in_output_format && output_format.supportsWritingException()) { - response.add("X-ClickHouse-Query-Id", details.query_id); - - if (content_type_override) - response.setContentType(*content_type_override); - else if (details.content_type) - response.setContentType(*details.content_type); - - if (details.format) - response.add("X-ClickHouse-Format", *details.format); - - if (details.timezone) - response.add("X-ClickHouse-Timezone", *details.timezone); + output_format.setException(getCurrentExceptionMessage(false)); + output_format.finalize(); + used_output.exception_is_written = true; } - ); + }; + + executeQuery( + *in, + *used_output.out_maybe_delayed_and_compressed, + /* allow_into_outfile = */ false, + context, + set_query_result, + {}, + handle_exception_in_output_format); if (used_output.hasDelayed()) { @@ -895,7 +914,7 @@ try response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code)); } - if (!response.sent() && !used_output.out_maybe_compressed) + if (!response.sent() && !used_output.out_maybe_compressed && !used_output.exception_is_written) { /// If nothing was sent yet and we don't even know if we must compress the response. *response.send() << s << std::endl; @@ -911,21 +930,24 @@ try used_output.out_maybe_delayed_and_compressed.reset(); } - /// Send the error message into already used (and possibly compressed) stream. - /// Note that the error message will possibly be sent after some data. - /// Also HTTP code 200 could have already been sent. - - /// If buffer has data, and that data wasn't sent yet, then no need to send that data - bool data_sent = used_output.out->count() != used_output.out->offset(); - - if (!data_sent) + if (!used_output.exception_is_written) { - used_output.out_maybe_compressed->position() = used_output.out_maybe_compressed->buffer().begin(); - used_output.out->position() = used_output.out->buffer().begin(); - } + /// Send the error message into already used (and possibly compressed) stream. + /// Note that the error message will possibly be sent after some data. + /// Also HTTP code 200 could have already been sent. - writeString(s, *used_output.out_maybe_compressed); - writeChar('\n', *used_output.out_maybe_compressed); + /// If buffer has data, and that data wasn't sent yet, then no need to send that data + bool data_sent = used_output.out->count() != used_output.out->offset(); + + if (!data_sent) + { + used_output.out_maybe_compressed->position() = used_output.out_maybe_compressed->buffer().begin(); + used_output.out->position() = used_output.out->buffer().begin(); + } + + writeString(s, *used_output.out_maybe_compressed); + writeChar('\n', *used_output.out_maybe_compressed); + } used_output.out_maybe_compressed->next(); } diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index 5eda5927538..94b5a44f105 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -62,6 +62,8 @@ private: bool finalized = false; + bool exception_is_written = false; + inline bool hasDelayed() const { return out_maybe_delayed_and_compressed != out_maybe_compressed; diff --git a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference new file mode 100644 index 00000000000..452aa9d5022 --- /dev/null +++ b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference @@ -0,0 +1,432 @@ +One block +Parallel formatting: 0 +JSON +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONEachRow +{"exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "} +JSONCompact +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONCompactEachRow +["Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "] +JSONObjectEachRow +{ + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +XML + + + + + + number + UInt64 + + + res + UInt8 + + + + + + 0 + Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) + +Parallel formatting: 1 +JSON +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONEachRow +{"exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "} +JSONCompact +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONCompactEachRow +["Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "] +JSONObjectEachRow +{ + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +XML + + + + + + number + UInt64 + + + res + UInt8 + + + + + + 0 + Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) + +Several blocks +Without parallel formatting +JSON +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + { + "number": "0", + "res": 0 + }, + { + "number": "1", + "res": 0 + }, + { + "number": "2", + "res": 0 + }, + { + "number": "3", + "res": 0 + } + ], + + "rows": 4, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONEachRow +{"number":"0","res":0} +{"number":"1","res":0} +{"number":"2","res":0} +{"number":"3","res":0} +{"exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "} +JSONCompact +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + ["0", 0], + ["1", 0], + ["2", 0], + ["3", 0] + ], + + "rows": 4, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONCompactEachRow +["0", 0] +["1", 0] +["2", 0] +["3", 0] +["Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "] +JSONObjectEachRow +{ + "row_1": {"number":"0","res":0}, + "row_1": {"number":"1","res":0}, + "row_1": {"number":"2","res":0}, + "row_1": {"number":"3","res":0}, + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +XML + + + + + + number + UInt64 + + + res + UInt8 + + + + + + 0 + 0 + + + 1 + 0 + + + 2 + 0 + + + 3 + 0 + + + 4 + Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) + +With parallel formatting +JSON +1 +JSONCompact +1 +JSONObjectEachRow +1 +JSONEachRow +1 +JSONCompactEachRow +1 +Formatting error +Without parallel formatting +JSON +{ + "meta": + [ + { + "name": "x", + "type": "UInt32" + }, + { + "name": "s", + "type": "String" + }, + { + "name": "y", + "type": "Enum8('a' = 1)" + } + ], + + "data": + [ + { + "x": 1, + "s": "str1", + "y": "a" + }, + { + "x": 2, + "s": "str2", + "y": "a" + }, + { + "x": 3, + "s": "str3", + "y": "a" + } + ], + + "rows": 3, + + "exception": "Code: 36. : Unexpected value 99 in enum: While executing JSONRowOutputFormat. (BAD_ARGUMENTS) " +} +JSONEachRow +{"x":1,"s":"str1","y":"a"} +{"x":2,"s":"str2","y":"a"} +{"x":3,"s":"str3","y":"a"} +{"exception": "Code: 36. : Unexpected value 99 in enum: While executing JSONEachRowRowOutputFormat. (BAD_ARGUMENTS) "} +JSONCompact +{ + "meta": + [ + { + "name": "x", + "type": "UInt32" + }, + { + "name": "s", + "type": "String" + }, + { + "name": "y", + "type": "Enum8('a' = 1)" + } + ], + + "data": + [ + [1, "str1", "a"], + [2, "str2", "a"], + [3, "str3", "a"] + ], + + "rows": 3, + + "exception": "Code: 36. : Unexpected value 99 in enum: While executing JSONCompactRowOutputFormat. (BAD_ARGUMENTS) " +} +JSONCompactEachRow +[1, "str1", "a"] +[2, "str2", "a"] +[3, "str3", "a"] +["Code: 36. : Unexpected value 99 in enum: While executing JSONCompactEachRowRowOutputFormat. (BAD_ARGUMENTS) "] +JSONObjectEachRow +{ + "row_1": {"x":1,"s":"str1","y":"a"}, + "row_1": {"x":2,"s":"str2","y":"a"}, + "row_1": {"x":3,"s":"str3","y":"a"}, + "exception": "Code: 36. : Unexpected value 99 in enum: While executing JSONObjectEachRowRowOutputFormat. (BAD_ARGUMENTS) " +} +XML + + + + + + x + UInt32 + + + s + String + + + y + Enum8('a' = 1) + + + + + + 1 + str1 + a + + + 2 + str2 + a + + + 3 + str3 + a + + + 3 + Code: 36. : Unexpected value 99 in enum: While executing XMLRowOutputFormat. (BAD_ARGUMENTS) + +With parallel formatting +JSON +1 +JSONCompact +1 +JSONObjectEachRow +1 +JSONEachRow +1 +JSONCompactEachRow +1 +Test 1 +1 +1 +Test 2 +1 +1 +Test 3 +1 +1 diff --git a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh new file mode 100755 index 00000000000..cb4d1b6aee1 --- /dev/null +++ b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "One block" +for parallel in 0 1 +do + echo "Parallel formatting: $parallel" + for format in JSON JSONEachRow JSONCompact JSONCompactEachRow JSONObjectEachRow XML + do + echo $format + ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select number, throwIf(number > 3) as res from numbers(10) format $format settings output_format_parallel_formatting=$parallel" | sed "s/(version .*)//" | sed "s/DB::Exception//" + done +done + +echo "Several blocks" +echo "Without parallel formatting" +for format in JSON JSONEachRow JSONCompact JSONCompactEachRow JSONObjectEachRow XML +do + echo $format + ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select number, throwIf(number > 3) as res from system.numbers format $format settings max_block_size=1, output_format_parallel_formatting=0" | sed "s/(version .*)//" | sed "s/DB::Exception//" +done + +echo "With parallel formatting" +for format in JSON JSONCompact JSONObjectEachRow +do + echo $format + ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select number, throwIf(number > 3) as res from system.numbers format $format settings max_block_size=1, output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" +done + +for format in JSONEachRow JSONCompactEachRow +do + echo $format + ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select number, throwIf(number > 3) as res from system.numbers format $format settings max_block_size=1, output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=LineAsString -q "select min(isValidJSON(line)) from table" +done + +echo "Formatting error" +$CLICKHOUSE_CLIENT -q "drop table if exists test_02841" +$CLICKHOUSE_CLIENT -q "create table test_02841 (x UInt32, s String, y Enum('a' = 1)) engine=MergeTree order by x" +$CLICKHOUSE_CLIENT -q "system stop merges test_02841" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (1, 'str1', 1)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (2, 'str2', 1)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (3, 'str3', 1)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (5, 'str5', 99)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (6, 'str6', 1)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (7, 'str7', 1)" + +echo "Without parallel formatting" +for format in JSON JSONEachRow JSONCompact JSONCompactEachRow JSONObjectEachRow XML +do + echo $format + ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 order by x format $format settings output_format_parallel_formatting=0" | sed "s/(version .*)//" | sed "s/DB::Exception//" +done + +echo "With parallel formatting" +for format in JSON JSONCompact JSONObjectEachRow +do + echo $format + ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format $format settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" +done + +for format in JSONEachRow JSONCompactEachRow +do + echo $format + ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format $format settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=LineAsString -q "select min(isValidJSON(line)) from table" +done + + +echo "Test 1" +$CLICKHOUSE_CLIENT -q "truncate table test_02841" +$CLICKHOUSE_CLIENT -q "insert into test_02841 select 1, repeat('aaaaa', 1000000), 1" +$CLICKHOUSE_CLIENT -q "insert into test_02841 select 2, repeat('aaaaa', 1000000), 99" +$CLICKHOUSE_CLIENT -q "insert into test_02841 select 3, repeat('aaaaa', 1000000), 1" + +${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=0" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" +${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" + + +echo "Test 2" +$CLICKHOUSE_CLIENT -q "truncate table test_02841" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (1, 'str1', 1)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (2, 'str2', 1)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 select number, 'str_numbers_1', 1 from numbers(10000)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (3, 'str4', 99)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (4, 'str5', 1)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 select number, 'str_numbers_2', 1 from numbers(10000)" + +${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=0" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" +${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" + +echo "Test 3" +$CLICKHOUSE_CLIENT -q "truncate table test_02841" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (1, 'str1', 1)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (2, 'str2', 1)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 select number, 'str_numbers_1', number > 9000 ? 99 : 1 from numbers(10000)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (3, 'str4', 1)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 values (4, 'str5', 1)" +$CLICKHOUSE_CLIENT -q "insert into test_02841 select number, 'str_numbers_2', 1 from numbers(10000)" + +${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=0" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" +${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" + +$CLICKHOUSE_CLIENT -q "drop table test_02841" + From fa905ebd27f56c4dcf3b4550963d7f4cab94c2e5 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 1 Aug 2023 10:14:09 +0000 Subject: [PATCH 0111/1687] Clean up --- src/Formats/FormatFactory.cpp | 1 - .../OutputFormatWithExceptionHandlerAdaptor.h | 75 ------------------- 2 files changed, 76 deletions(-) delete mode 100644 src/Processors/Formats/OutputFormatWithExceptionHandlerAdaptor.h diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 4cd2ad5be03..1ad2c2285d9 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -1,7 +1,6 @@ #include #include -//#include #include #include #include diff --git a/src/Processors/Formats/OutputFormatWithExceptionHandlerAdaptor.h b/src/Processors/Formats/OutputFormatWithExceptionHandlerAdaptor.h deleted file mode 100644 index bb318dae81e..00000000000 --- a/src/Processors/Formats/OutputFormatWithExceptionHandlerAdaptor.h +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once - -#include -#include - -#include -#include - -namespace DB -{ - -template -class RowOutputFormatWithExceptionHandlerAdaptorBase : public Base -{ -public: - RowOutputFormatWithExceptionHandlerAdaptorBase(bool handle_exceptions, const Block & header, WriteBuffer & out_, Args... args) - : Base(header, out_, std::forward(args)...) - { - if (handle_exceptions) - peekable_out = std::make_unique(*Base::getWriteBufferPtr()); - } - - void write(const Columns & columns, size_t row_num) - { - if (!peekable_out) - Base::write(columns, row_num); - - - PeekableWriteBufferCheckpoint checkpoint(*peekable_out); - try - { - Base::write(columns, row_num); - } - catch (...) - { - peekable_out->rollbackToCheckpoint(); - throw; - } - } - - void flush() override - { - getWriteBufferPtr()->next(); - - if (peekable_out) - Base::getWriteBufferPtr()->next(); - } - - void finalizeBuffers() override - { - if (peekable_out) - peekable_out->finalize(); - } - - void resetFormatterImpl() override - { - peekable_out = std::make_unique(*Base::getWriteBufferPtr()); - } - -protected: - /// Returns buffer that should be used in derived classes instead of out. - WriteBuffer * getWriteBufferPtr() override - { - if (peekable_out) - peekable_out.get(); - return Base::getWriteBufferPtr(); - } - -private: - - std::unique_ptr peekable_out; -}; - -} - From 2adb25e5caacc87182ea5aa7c8a431b5867d3180 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 1 Aug 2023 10:21:32 +0000 Subject: [PATCH 0112/1687] Add examples in docs --- docs/en/interfaces/http.md | 68 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index b28180fec67..a66b4ff5d5d 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -707,6 +707,74 @@ To prevent it, you can use setting `http_write_exception_in_output_format` (enab Examples: ```bash +$ curl 'http://localhost:8123/?query=SELECT+number,+throwIf(number>3)+from+system.numbers+format+JSON+settings+max_block_size=1&http_write_exception_in_output_format=1' +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "throwIf(greater(number, 2))", + "type": "UInt8" + } + ], + "data": + [ + { + "number": "0", + "throwIf(greater(number, 2))": 0 + }, + { + "number": "1", + "throwIf(greater(number, 2))": 0 + }, + { + "number": "2", + "throwIf(greater(number, 2))": 0 + } + ], + + "rows": 3, + + "exception": "Code: 395. DB::Exception: Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 2) :: 2) -> throwIf(greater(number, 2)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) (version 23.8.1.1)" +} +``` + +```bash +$ curl 'http://localhost:8123/?query=SELECT+number,+throwIf(number>2)+from+system.numbers+format+XML+settings+max_block_size=1&http_write_exception_in_output_format=1' + + + + + + number + UInt64 + + + throwIf(greater(number, 2)) + UInt8 + + + + + + 0 + 0 + + + 1 + 0 + + + 2 + 0 + + + 3 + Code: 395. DB::Exception: Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 2) :: 2) -> throwIf(greater(number, 2)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) (version 23.8.1.1) + ``` From 8e3296a44a49efe5285e5f0151ef642b385c788f Mon Sep 17 00:00:00 2001 From: alexX512 Date: Tue, 1 Aug 2023 13:31:18 +0000 Subject: [PATCH 0113/1687] Add support of local progress in scheduler --- .../Executors/CompletedPipelineExecutor.cpp | 4 ++-- src/Processors/Executors/ExecutingGraph.cpp | 5 +++- .../Executors/ExecutionThreadContext.h | 13 +++++++++- src/Processors/Executors/ExecutorTasks.cpp | 21 ++++++++++++---- src/Processors/Executors/ExecutorTasks.h | 2 +- src/Processors/Executors/PipelineExecutor.cpp | 5 ++-- src/Processors/Executors/PipelineExecutor.h | 5 +++- .../PullingAsyncPipelineExecutor.cpp | 8 +++---- .../Executors/PullingPipelineExecutor.cpp | 2 +- .../PushingAsyncPipelineExecutor.cpp | 2 +- .../Executors/PushingPipelineExecutor.cpp | 2 +- src/Processors/Executors/TasksQueue.h | 1 + src/Processors/Formats/IOutputFormat.cpp | 1 + src/Processors/ISource.cpp | 24 +++++++++++++++++++ .../Transforms/LimitPartialResultTransform.h | 2 +- .../Transforms/PartialResultTransform.cpp | 12 ++++++++++ src/QueryPipeline/Pipe.cpp | 16 ++++++++----- src/QueryPipeline/Pipe.h | 1 + src/QueryPipeline/QueryPipeline.cpp | 1 + src/QueryPipeline/QueryPipeline.h | 3 +++ src/QueryPipeline/QueryPipelineBuilder.cpp | 21 +++++++++++++++- 21 files changed, 124 insertions(+), 27 deletions(-) diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp index b0f842dec1b..217b397360e 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.cpp +++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp @@ -74,7 +74,7 @@ void CompletedPipelineExecutor::execute() if (interactive_timeout_ms) { data = std::make_unique(); - data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); data->executor->setReadProgressCallback(pipeline.getReadProgressCallback()); /// Avoid passing this to lambda, copy ptr to data instead. @@ -100,7 +100,7 @@ void CompletedPipelineExecutor::execute() } else { - PipelineExecutor executor(pipeline.processors, pipeline.process_list_element); + PipelineExecutor executor(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); executor.setReadProgressCallback(pipeline.getReadProgressCallback()); executor.execute(pipeline.getNumThreads()); } diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index 27f6a454b24..e639cea75ea 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -60,6 +61,7 @@ bool ExecutingGraph::addEdges(uint64_t node) /// Add backward edges from input ports. auto & inputs = from->getInputs(); auto from_input = nodes[node]->back_edges.size(); + LOG_DEBUG(&Poco::Logger::get("ExecutingGraph::addEdges"), "Node id {}. Node name {}. Backward edges size {}. Inputs size {}. Direct edges size {}. Outputs size {}", node, from->getName(), from_input, inputs.size(), nodes[node]->direct_edges.size(), from->getOutputs().size()); if (from_input < inputs.size()) { @@ -220,6 +222,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue updated_processors.push(pid); std::shared_lock read_lock(nodes_mutex); + // LOG_DEBUG(&Poco::Logger::get("ExecutingGraph::updateNode"), "Original node for update. Node id {}. Node name {}", pid, nodes[pid]->processor->getName()); while (!updated_processors.empty() || !updated_edges.empty()) { @@ -260,7 +263,6 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue { pid = updated_processors.top(); updated_processors.pop(); - /// In this method we have ownership on node. auto & node = *nodes[pid]; @@ -282,6 +284,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue IProcessor::Status last_status = node.last_processor_status; IProcessor::Status status = processor.prepare(node.updated_input_ports, node.updated_output_ports); node.last_processor_status = status; + // LOG_DEBUG(&Poco::Logger::get("ExecutingGraph::updateNode"), "Node id {}. Node name {}. Status {}", pid, nodes[pid]->processor->getName(), status); if (profile_processors) { diff --git a/src/Processors/Executors/ExecutionThreadContext.h b/src/Processors/Executors/ExecutionThreadContext.h index eb048f8ab09..85788a70771 100644 --- a/src/Processors/Executors/ExecutionThreadContext.h +++ b/src/Processors/Executors/ExecutionThreadContext.h @@ -30,6 +30,12 @@ private: /// Callback for read progress. ReadProgressCallback * read_progress_callback = nullptr; + /// Timer that stops optimization of running local tasks instead of queuing them. + /// It provides local progress for each IProcessor task, allowing the partial result of the request to be always sended to the user. + Stopwatch watch; + /// Time period that limits the maximum allowed duration for optimizing the scheduling of local tasks within the executor + const UInt64 partial_result_duration_ms; + public: #ifndef NDEBUG /// Time for different processing stages. @@ -62,8 +68,13 @@ public: void setException(std::exception_ptr exception_) { exception = exception_; } void rethrowExceptionIfHas(); - explicit ExecutionThreadContext(size_t thread_number_, bool profile_processors_, bool trace_processors_, ReadProgressCallback * callback) + bool needWatchRestartForPartialResultProgress() { return partial_result_duration_ms != 0 && partial_result_duration_ms < watch.elapsedMilliseconds(); } + void restartWatch() { watch.restart(); } + + explicit ExecutionThreadContext(size_t thread_number_, bool profile_processors_, bool trace_processors_, ReadProgressCallback * callback, UInt64 partial_result_duration_ms_) : read_progress_callback(callback) + , watch(CLOCK_MONOTONIC) + , partial_result_duration_ms(partial_result_duration_ms_) , thread_number(thread_number_) , profile_processors(profile_processors_) , trace_processors(trace_processors_) diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index 496e859746a..9cbebb6996c 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -107,9 +107,18 @@ void ExecutorTasks::tryGetTask(ExecutionThreadContext & context) void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThreadContext & context) { context.setTask(nullptr); + // if (!queue.empty()) + // LOG_DEBUG(&Poco::Logger::get("ExecutorTasks::pushTasks"), "First task to add in queue. Processor id {}. Processor name {}", queue.front()->processors_id, queue.front()->processor->getName()); - /// Take local task from queue if has one and it's not a processor which generates partial result. - if (!queue.empty() && !queue.front()->processor->isPartialResultProcessor() && !context.hasAsyncTasks()) + /// If sending partial results is allowed and local tasks scheduling optimization is repeated longer than the limit + /// or new task need to send partial result later, skip optimization for this iteration. + /// Otherwise take local task from queue if has one. + if ((!queue.empty() && queue.front()->processor->isPartialResultProcessor()) + || context.needWatchRestartForPartialResultProgress()) + { + context.restartWatch(); + } + else if (!queue.empty() && !context.hasAsyncTasks()) { context.setTask(queue.front()); queue.pop(); @@ -117,6 +126,8 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea if (!queue.empty() || !async_queue.empty()) { + // LOG_DEBUG(&Poco::Logger::get("ExecutorTasks::pushTasks"), "After local optimizations push new tasks to queue"); + // task_queue.print(); std::unique_lock lock(mutex); #if defined(OS_LINUX) @@ -137,9 +148,11 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea /// Wake up at least one thread that will wake up other threads if required tryWakeUpAnyOtherThreadWithTasks(context, lock); } + // LOG_DEBUG(&Poco::Logger::get("ExecutorTasks::pushTasks"), "After push new tasks to queue"); + // task_queue.print(); } -void ExecutorTasks::init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback) +void ExecutorTasks::init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback, UInt64 partial_result_duration_ms) { num_threads = num_threads_; use_threads = use_threads_; @@ -151,7 +164,7 @@ void ExecutorTasks::init(size_t num_threads_, size_t use_threads_, bool profile_ executor_contexts.reserve(num_threads); for (size_t i = 0; i < num_threads; ++i) - executor_contexts.emplace_back(std::make_unique(i, profile_processors, trace_processors, callback)); + executor_contexts.emplace_back(std::make_unique(i, profile_processors, trace_processors, callback, partial_result_duration_ms)); } } diff --git a/src/Processors/Executors/ExecutorTasks.h b/src/Processors/Executors/ExecutorTasks.h index d35f8de94d1..ab6d5e91411 100644 --- a/src/Processors/Executors/ExecutorTasks.h +++ b/src/Processors/Executors/ExecutorTasks.h @@ -58,7 +58,7 @@ public: void tryGetTask(ExecutionThreadContext & context); void pushTasks(Queue & queue, Queue & async_queue, ExecutionThreadContext & context); - void init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback); + void init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback, UInt64 partial_result_duration_ms); void fill(Queue & queue); void upscale(size_t use_threads_); diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index f523e7b7cf9..29c16f31aa3 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -36,8 +36,9 @@ namespace ErrorCodes } -PipelineExecutor::PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem) +PipelineExecutor::PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem, UInt64 partial_result_duration_ms_) : process_list_element(std::move(elem)) + , partial_result_duration_ms(partial_result_duration_ms_) { if (process_list_element) { @@ -309,7 +310,7 @@ void PipelineExecutor::initializeExecution(size_t num_threads) Queue queue; graph->initializeExecution(queue); - tasks.init(num_threads, use_threads, profile_processors, trace_processors, read_progress_callback.get()); + tasks.init(num_threads, use_threads, profile_processors, trace_processors, read_progress_callback.get(), partial_result_duration_ms); tasks.fill(queue); if (num_threads > 1) diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 1e7d52d8290..fe424114bee 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -33,7 +33,7 @@ public: /// During pipeline execution new processors can appear. They will be added to existing set. /// /// Explicit graph representation is built in constructor. Throws if graph is not correct. - explicit PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem); + explicit PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem, UInt64 partial_result_duration_ms_ = 0); ~PipelineExecutor(); /// Execute pipeline in multiple threads. Must be called once. @@ -89,6 +89,9 @@ private: QueryStatusPtr process_list_element; ReadProgressCallbackPtr read_progress_callback; + + /// Duration between sending partial result through the pipeline + const UInt64 partial_result_duration_ms; using Queue = std::queue; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 581220fe26c..8875ba85a07 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -41,14 +41,14 @@ struct PullingAsyncPipelineExecutor::Data } }; -PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipeline_, bool /*has_partial_result_setting*/) : pipeline(pipeline_) +PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipeline_, bool has_partial_result_setting) : pipeline(pipeline_) { if (!pipeline.pulling()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for PullingAsyncPipelineExecutor must be pulling"); lazy_format = std::make_shared(pipeline.output->getHeader()); - // if (has_partial_result_setting) - // lazy_format->activatePartialResultProtocol(); + if (has_partial_result_setting) + lazy_format->activatePartialResultProtocol(); pipeline.complete(lazy_format); } @@ -105,7 +105,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) if (!data) { data = std::make_unique(); - data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); data->executor->setReadProgressCallback(pipeline.getReadProgressCallback()); data->lazy_format = lazy_format.get(); diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index cbf73c5cb07..f79f15c19bf 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -44,7 +44,7 @@ bool PullingPipelineExecutor::pull(Chunk & chunk) { if (!executor) { - executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); executor->setReadProgressCallback(pipeline.getReadProgressCallback()); } diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index 59d33cbffed..c01fcc634b7 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -166,7 +166,7 @@ void PushingAsyncPipelineExecutor::start() started = true; data = std::make_unique(); - data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); data->executor->setReadProgressCallback(pipeline.getReadProgressCallback()); data->source = pushing_source.get(); diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp index 696932932df..f2b018792c7 100644 --- a/src/Processors/Executors/PushingPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingPipelineExecutor.cpp @@ -87,7 +87,7 @@ void PushingPipelineExecutor::start() return; started = true; - executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); executor->setReadProgressCallback(pipeline.getReadProgressCallback()); if (!executor->executeStep(&input_wait_flag)) diff --git a/src/Processors/Executors/TasksQueue.h b/src/Processors/Executors/TasksQueue.h index bb8996fc1a6..bf8e8a1cde4 100644 --- a/src/Processors/Executors/TasksQueue.h +++ b/src/Processors/Executors/TasksQueue.h @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index 6496f5357e7..7dba4b22eab 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB diff --git a/src/Processors/ISource.cpp b/src/Processors/ISource.cpp index 6a88d3973a1..34448186fa8 100644 --- a/src/Processors/ISource.cpp +++ b/src/Processors/ISource.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB @@ -21,38 +22,61 @@ ISource::ISource(Block header, bool enable_auto_progress) ISource::Status ISource::prepare() { + const auto name = getName(); + if (name == "NullSource") + LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "KEK {}", getName()); if (finished) { + if (name == "NullSource") + LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "finished"); output.finish(); return Status::Finished; } /// Check can output. if (output.isFinished()) + { + if (name == "NullSource") + LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "output.isFinished()"); return Status::Finished; + } if (!output.canPush()) + { + if (name == "NullSource") + LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "!output.canPush()"); return Status::PortFull; + } if (!has_input) + { + if (name == "NullSource") + LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "!has_input"); return Status::Ready; + } output.pushData(std::move(current_chunk)); has_input = false; if (isCancelled()) { + if (name == "NullSource") + LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "isCancelled()"); output.finish(); return Status::Finished; } if (got_exception) { + if (name == "NullSource") + LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "got_exception"); finished = true; output.finish(); return Status::Finished; } + if (name == "NullSource") + LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "Status::PortFull"); /// Now, we pushed to output, and it must be full. return Status::PortFull; } diff --git a/src/Processors/Transforms/LimitPartialResultTransform.h b/src/Processors/Transforms/LimitPartialResultTransform.h index 8ed60d20da3..4b1f0e1026a 100644 --- a/src/Processors/Transforms/LimitPartialResultTransform.h +++ b/src/Processors/Transforms/LimitPartialResultTransform.h @@ -20,7 +20,7 @@ public: , offset(offset_) {} - String getName() const override { return "MergeSortingPartialResult"; } + String getName() const override { return "LimitPartialResultTransform"; } void transformPartialResult(Chunk & chunk) override { diff --git a/src/Processors/Transforms/PartialResultTransform.cpp b/src/Processors/Transforms/PartialResultTransform.cpp index 7cf2473684a..c3f38e77205 100644 --- a/src/Processors/Transforms/PartialResultTransform.cpp +++ b/src/Processors/Transforms/PartialResultTransform.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -14,28 +15,36 @@ PartialResultTransform::PartialResultTransform(const Block & header, UInt64 part IProcessor::Status PartialResultTransform::prepare() { + // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "KEK {}", getName()); if (output.isFinished()) { + // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "output.isFinished()"); input.close(); return Status::Finished; } if (finished_getting_snapshots) { + // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "finished_getting_snapshots"); output.finish(); return Status::Finished; } if (input.hasData()) + { + // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "input.hasData()"); partial_result = {input.pull(), SnaphotStatus::Ready}; + } /// Send partial result from real processor snapshot or from previous partial result processor if possible if (partial_result.snapshot_status == SnaphotStatus::Ready && output.canPush()) { + // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "partial_result.snapshot_status == SnaphotStatus::Ready && output.canPush()"); transformPartialResult(partial_result.chunk); partial_result.snapshot_status = SnaphotStatus::NotReady; if (partial_result.chunk.getNumRows() > 0) { + // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "partial_result.chunk.getNumRows() > 0"); output.push(std::move(partial_result.chunk)); return Status::PortFull; } @@ -45,15 +54,18 @@ IProcessor::Status PartialResultTransform::prepare() /// PartialResultTransform ready to create snapshots and send them as a partial result if (input.isFinished()) { + // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "input.isFinished()"); return Status::Ready; } input.setNeeded(); + // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "input.setNeeded()"); return Status::NeedData; } void PartialResultTransform::work() { + // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "Name {}. Duration_ms {}. Elapsed {}", getName(), partial_result_duration_ms, watch.elapsedMilliseconds()); if (partial_result_duration_ms < watch.elapsedMilliseconds()) { partial_result = getRealProcessorSnapshot(); diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 6de45d67133..454f14d5626 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -411,12 +411,11 @@ void Pipe::addExtremesSource(ProcessorPtr source) void Pipe::activatePartialResult(UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) { if (!is_partial_result_active) - { - is_partial_result_active = true; - partial_result_limit = partial_result_limit_; - partial_result_duration_ms = partial_result_duration_ms_; partial_result_ports.assign(output_ports.size(), nullptr); - } + + is_partial_result_active = true; + partial_result_limit = partial_result_limit_; + partial_result_duration_ms = partial_result_duration_ms_; } static void dropPort(OutputPort *& port, Processors & processors, Processors * collected_processors) @@ -881,7 +880,9 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) add_transform(totals_port, StreamType::Totals); add_transform(extremes_port, StreamType::Extremes); - dropPartialResult(); + + for (auto & port : partial_result_ports) + add_transform(port, StreamType::PartialResult); output_ports.clear(); header.clear(); @@ -892,6 +893,9 @@ void Pipe::transform(const Transformer & transformer, bool check_ports) if (output_ports.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot transform empty Pipe"); + /// TODO: Add functionality to work with partial result ports in transformer. + dropPartialResult(); + auto new_processors = transformer(output_ports); /// Create hash table with new processors. diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index 8b26435fd30..cbb7a8cbf5c 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -85,6 +85,7 @@ public: Main = 0, /// Stream for query data. There may be several streams of this type. Totals, /// Stream for totals. No more than one. Extremes, /// Stream for extremes. No more than one. + PartialResult, /// Stream for partial result data. There may be several streams of this type. }; using ProcessorGetter = std::function; diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index 190cc855918..8fca23dba7d 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -343,6 +343,7 @@ QueryPipeline::QueryPipeline( } QueryPipeline::QueryPipeline(Pipe pipe) + : partial_result_duration_ms(pipe.partial_result_duration_ms) { if (pipe.numOutputPorts() > 0) { diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index b93bc8647cf..cabdc8b3858 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -160,6 +160,9 @@ private: size_t num_threads = 0; + UInt64 partial_result_limit = 0; + UInt64 partial_result_duration_ms = 0; + friend class PushingPipelineExecutor; friend class PullingPipelineExecutor; friend class PushingAsyncPipelineExecutor; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 553b18dd57b..7da22b02ac6 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -110,6 +110,16 @@ void QueryPipelineBuilder::init(QueryPipeline & pipeline) pipe.header = {}; } + if (pipeline.partial_result) + { + /// Set partial result ports only after activation because when activated, it is set to nullptr + pipe.activatePartialResult(pipeline.partial_result_limit, pipeline.partial_result_duration_ms); + pipe.partial_result_ports = {pipeline.partial_result}; + } + + if (!pipeline.partial_result) + pipe.dropPartialResult(); + pipe.totals_port = pipeline.totals; pipe.extremes_port = pipeline.extremes; pipe.max_parallel_streams = pipeline.num_threads; @@ -347,6 +357,10 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped { left->checkInitializedAndNotCompleted(); right->checkInitializedAndNotCompleted(); + + /// TODO: Support joining of partial results from different pipelines. + left->pipe.dropPartialResult(); + right->pipe.dropPartialResult(); left->pipe.dropExtremes(); right->pipe.dropExtremes(); @@ -360,6 +374,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped auto joining = std::make_shared(join, inputs, out_header, max_block_size); + /// TODO: Support partial results in merge pipelines after joining support above. return mergePipelines(std::move(left), std::move(right), std::move(joining), collected_processors); } @@ -380,6 +395,10 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe left->pipe.dropExtremes(); right->pipe.dropExtremes(); + /// TODO: Support joining of partial results from different pipelines. + left->pipe.dropPartialResult(); + right->pipe.dropPartialResult(); + left->pipe.collected_processors = collected_processors; /// Collect the NEW processors for the right pipeline. @@ -634,7 +653,7 @@ PipelineExecutorPtr QueryPipelineBuilder::execute() if (!isCompleted()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute pipeline because it is not completed"); - return std::make_shared(pipe.processors, process_list_element); + return std::make_shared(pipe.processors, process_list_element, pipe.partial_result_duration_ms); } Pipe QueryPipelineBuilder::getPipe(QueryPipelineBuilder pipeline, QueryPlanResourceHolder & resources) From f9d7fe9ab9a3cd36bcc40ec9f529c0bd4980cd63 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 1 Aug 2023 14:32:10 +0000 Subject: [PATCH 0114/1687] Fix misleading error message in OUTFILE with CapnProto/Protobuf --- src/Client/ClientBase.cpp | 4 +++- .../02842_capn_proto_outfile_without_schema.reference | 1 + .../02842_capn_proto_outfile_without_schema.sh | 10 ++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02842_capn_proto_outfile_without_schema.reference create mode 100755 tests/queries/0_stateless/02842_capn_proto_outfile_without_schema.sh diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 06dabf96c28..1434498e549 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1037,7 +1037,9 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b } catch (const LocalFormatError &) { - local_format_error = std::current_exception(); + /// Remember the first exception. + if (!local_format_error) + local_format_error = std::current_exception(); connection->sendCancel(); } } diff --git a/tests/queries/0_stateless/02842_capn_proto_outfile_without_schema.reference b/tests/queries/0_stateless/02842_capn_proto_outfile_without_schema.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02842_capn_proto_outfile_without_schema.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02842_capn_proto_outfile_without_schema.sh b/tests/queries/0_stateless/02842_capn_proto_outfile_without_schema.sh new file mode 100755 index 00000000000..fde08484026 --- /dev/null +++ b/tests/queries/0_stateless/02842_capn_proto_outfile_without_schema.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +$CLICKHOUSE_LOCAL -q "select * from numbers(10) into outfile '$CLICKHOUSE_TEST_UNIQUE_NAME.capnp'" 2>&1 | grep "The format CapnProto requires a schema" -c +rm $CLICKHOUSE_TEST_UNIQUE_NAME.capnp + From d12e96177a390464349d71888aef3d1b19243c2d Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 1 Aug 2023 16:17:03 +0000 Subject: [PATCH 0115/1687] Fix tests --- .../OptimizeDateOrDateTimeConverterWithPreimagePass.cpp | 2 +- src/IO/PeekableWriteBuffer.cpp | 2 +- .../Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp | 6 ++++++ .../Formats/Impl/JSONCompactEachRowRowOutputFormat.h | 2 ++ src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp | 5 +++++ src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h | 2 ++ .../Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp | 2 +- src/Processors/Formats/Impl/JSONRowOutputFormat.cpp | 1 + .../Formats/Impl/ParallelFormattingOutputFormat.cpp | 4 ++-- .../Formats/Impl/ParallelFormattingOutputFormat.h | 4 ++++ src/Processors/Formats/Impl/XMLRowOutputFormat.cpp | 5 +++-- .../Formats/OutputFormatWithUTF8ValidationAdaptor.h | 6 +++++- .../Formats/RowOutputFormatWithExceptionHandlerAdaptor.h | 4 +++- 13 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index 7205ac299a9..774c07ef1d4 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -48,7 +48,7 @@ public: return true; } - void visitImpl(QueryTreeNodePtr & node) const + void enterImpl(QueryTreeNodePtr & node) const { const static std::unordered_map swap_relations = { {"equals", "equals"}, diff --git a/src/IO/PeekableWriteBuffer.cpp b/src/IO/PeekableWriteBuffer.cpp index dc7f87dd539..87c7291c377 100644 --- a/src/IO/PeekableWriteBuffer.cpp +++ b/src/IO/PeekableWriteBuffer.cpp @@ -6,7 +6,7 @@ namespace DB PeekableWriteBuffer::PeekableWriteBuffer(DB::WriteBuffer & sub_buf_) : BufferWithOwnMemory(0), sub_buf(sub_buf_) { Buffer & sub_working = sub_buf.buffer(); - BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset()); + BufferBase::set(sub_working.begin() + sub_buf.offset(), sub_working.size() - sub_buf.offset(), 0); } void PeekableWriteBuffer::nextImpl() diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp index c5c9af60982..530d09d5c87 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp @@ -116,6 +116,12 @@ void JSONCompactEachRowRowOutputFormat::writeSuffix() } } +void JSONCompactEachRowRowOutputFormat::resetFormatterImpl() +{ + RowOutputFormatWithExceptionHandlerAdaptor::resetFormatterImpl(); + ostr = RowOutputFormatWithExceptionHandlerAdaptor::getWriteBufferPtr(); +} + void registerOutputFormatJSONCompactEachRow(FormatFactory & factory) { for (bool yield_strings : {false, true}) diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h index a05fff699a5..bd32592a4a0 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h @@ -36,6 +36,8 @@ private: void writeRowEndDelimiter() override; void writeSuffix() override; + void resetFormatterImpl() override; + bool supportTotals() const override { return true; } void consumeTotals(Chunk) override; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp index 2169d815fbf..a7118c2154a 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp @@ -92,6 +92,11 @@ void JSONEachRowRowOutputFormat::writeSuffix() writeCString("\n]\n", *ostr); } +void JSONEachRowRowOutputFormat::resetFormatterImpl() +{ + RowOutputFormatWithExceptionHandlerAdaptor::resetFormatterImpl(); + ostr = RowOutputFormatWithExceptionHandlerAdaptor::getWriteBufferPtr(); +} void registerOutputFormatJSONEachRow(FormatFactory & factory) { diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h index 28bfbf2e6ac..2de9369846b 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h @@ -39,6 +39,8 @@ protected: void writePrefix() override; void writeSuffix() override; + void resetFormatterImpl() override; + size_t field_number = 0; bool pretty_json; diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp index 8f4d11a604a..26aa0aad97c 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp @@ -68,7 +68,7 @@ void JSONObjectEachRowRowOutputFormat::writeSuffix() writeRowBetweenDelimiter(); JSONUtils::writeException(exception_message, *ostr, settings, 1); } - + JSONUtils::writeObjectEnd(*ostr); writeChar('\n', *ostr); } diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index e4c4e2a3bc6..20182d84917 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -135,6 +135,7 @@ void JSONRowOutputFormat::finalizeImpl() void JSONRowOutputFormat::resetFormatterImpl() { RowOutputFormatWithExceptionHandlerAdaptor::resetFormatterImpl(); + ostr = RowOutputFormatWithExceptionHandlerAdaptor::getWriteBufferPtr(); row_count = 0; statistics = Statistics(); } diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp index 3e63e2abd6c..841ef683228 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp @@ -36,10 +36,10 @@ namespace DB formatter->setRowsReadBefore(rows_collected); formatter->setException(exception_message); - if (!collected_prefix) + if (!collected_prefix && (need_write_prefix || started_prefix)) formatter->writePrefix(); - if (!collected_suffix) + if (!collected_suffix && (need_write_suffix || started_suffix)) formatter->writeSuffix(); if (!collected_finalize) diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h index b9a3b7638fa..bf8968dd376 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h @@ -118,6 +118,7 @@ public: void writePrefix() override { addChunk(Chunk{}, ProcessingUnitType::START, /*can_throw_exception*/ true); + started_prefix = true; } void onCancel() override @@ -134,6 +135,7 @@ public: void writeSuffix() override { addChunk(Chunk{}, ProcessingUnitType::PLAIN_FINISH, /*can_throw_exception*/ true); + started_suffix = true; } String getContentType() const override @@ -259,7 +261,9 @@ private: String exception_message; bool exception_is_rethrown = false; + bool started_prefix = false; bool collected_prefix = false; + bool started_suffix = false; bool collected_suffix = false; bool collected_finalize = false; diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp index eb735cc93aa..52c161c3208 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp @@ -8,7 +8,7 @@ namespace DB { XMLRowOutputFormat::XMLRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : RowOutputFormatWithExceptionHandlerAdaptor(header_, out_, true, format_settings_.xml.valid_output_on_exception), fields(header_.getNamesAndTypes()), format_settings(format_settings_) + : RowOutputFormatWithExceptionHandlerAdaptor(header_, out_, format_settings_.xml.valid_output_on_exception, true), fields(header_.getNamesAndTypes()), format_settings(format_settings_) { ostr = RowOutputFormatWithExceptionHandlerAdaptor::getWriteBufferPtr(); const auto & sample = getPort(PortKind::Main).getHeader(); @@ -203,7 +203,8 @@ void XMLRowOutputFormat::finalizeImpl() void XMLRowOutputFormat::resetFormatterImpl() { - RowOutputFormatWithUTF8ValidationAdaptor::resetFormatterImpl(); + RowOutputFormatWithExceptionHandlerAdaptor::resetFormatterImpl(); + ostr = RowOutputFormatWithExceptionHandlerAdaptor::getWriteBufferPtr(); row_count = 0; statistics = Statistics(); } diff --git a/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h b/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h index f86ff278b33..4c5c3ef72e9 100644 --- a/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h +++ b/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h @@ -6,6 +6,8 @@ #include #include +#include + namespace DB { @@ -43,8 +45,10 @@ public: void resetFormatterImpl() override { + LOG_DEBUG(&Poco::Logger::get("RowOutputFormatWithExceptionHandlerAdaptor"), "resetFormatterImpl"); Base::resetFormatterImpl(); - validating_ostr = std::make_unique(*Base::getWriteBufferPtr()); + if (validating_ostr) + validating_ostr = std::make_unique(*Base::getWriteBufferPtr()); } protected: diff --git a/src/Processors/Formats/RowOutputFormatWithExceptionHandlerAdaptor.h b/src/Processors/Formats/RowOutputFormatWithExceptionHandlerAdaptor.h index 4e797c521c0..22232e9f654 100644 --- a/src/Processors/Formats/RowOutputFormatWithExceptionHandlerAdaptor.h +++ b/src/Processors/Formats/RowOutputFormatWithExceptionHandlerAdaptor.h @@ -76,8 +76,10 @@ public: void resetFormatterImpl() override { + LOG_DEBUG(&Poco::Logger::get("RowOutputFormatWithExceptionHandlerAdaptor"), "resetFormatterImpl"); Base::resetFormatterImpl(); - peekable_out = std::make_unique(*Base::getWriteBufferPtr()); + if (peekable_out) + peekable_out = std::make_unique(*Base::getWriteBufferPtr()); } bool supportsWritingException() const override { return true; } From ef3f5e2a7c21a9629da25434f257f725faef4307 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 2 Aug 2023 10:15:56 +0800 Subject: [PATCH 0116/1687] fix performance tests error --- tests/performance/uniq_to_count.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/performance/uniq_to_count.xml b/tests/performance/uniq_to_count.xml index 580013f2e81..a9ac1d7cd5a 100644 --- a/tests/performance/uniq_to_count.xml +++ b/tests/performance/uniq_to_count.xml @@ -1,10 +1,10 @@ 1 - select uniq(number) from (select DISTINCT number from numbers(100000000)) - select uniq(number) from (select number from numbers(100000000) group by number) + select uniq(number) from (select DISTINCT number from numbers(100000000)) SETTINGS optimize_uniq_to_count=1 + select uniq(number) from (select number from numbers(100000000) group by number) SETTINGS optimize_uniq_to_count=1 - select uniq(number) from (select DISTINCT number from numbers(100000000)) settings allow_experimental_analyzer = 1 - select uniq(number) from (select number from numbers(100000000) group by number) settings allow_experimental_analyzer = 1 + select uniq(number) from (select DISTINCT number from numbers(100000000)) SETTINGS allow_experimental_analyzer=1, optimize_uniq_to_count=1 + select uniq(number) from (select number from numbers(100000000) group by number) SETTINGS allow_experimental_analyzer=1, optimize_uniq_to_count=1 From 162c674d747244a262ebc0db4c4d04f464acf58e Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 2 Aug 2023 10:50:04 +0800 Subject: [PATCH 0117/1687] remove settings in uniq_to_count --- tests/performance/uniq_to_count.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/performance/uniq_to_count.xml b/tests/performance/uniq_to_count.xml index a9ac1d7cd5a..204bc829951 100644 --- a/tests/performance/uniq_to_count.xml +++ b/tests/performance/uniq_to_count.xml @@ -1,6 +1,4 @@ - 1 - select uniq(number) from (select DISTINCT number from numbers(100000000)) SETTINGS optimize_uniq_to_count=1 select uniq(number) from (select number from numbers(100000000) group by number) SETTINGS optimize_uniq_to_count=1 From 1c930f34de2ae3135dc28f64ac066eff5dbafbf1 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 2 Aug 2023 18:10:01 +0800 Subject: [PATCH 0118/1687] reduce performance time --- src/Analyzer/Passes/UniqToCountPass.cpp | 2 +- tests/performance/uniq_to_count.xml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index 200c8cef343..4373918a8cc 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -110,7 +110,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_uniq_to_count) return; diff --git a/tests/performance/uniq_to_count.xml b/tests/performance/uniq_to_count.xml index 204bc829951..c7403bf7bd8 100644 --- a/tests/performance/uniq_to_count.xml +++ b/tests/performance/uniq_to_count.xml @@ -1,8 +1,8 @@ - select uniq(number) from (select DISTINCT number from numbers(100000000)) SETTINGS optimize_uniq_to_count=1 - select uniq(number) from (select number from numbers(100000000) group by number) SETTINGS optimize_uniq_to_count=1 + select uniq(number) from (select DISTINCT number from numbers(1000000)) SETTINGS optimize_uniq_to_count=1 + select uniq(number) from (select number from numbers(1000000) group by number) SETTINGS optimize_uniq_to_count=1 - select uniq(number) from (select DISTINCT number from numbers(100000000)) SETTINGS allow_experimental_analyzer=1, optimize_uniq_to_count=1 - select uniq(number) from (select number from numbers(100000000) group by number) SETTINGS allow_experimental_analyzer=1, optimize_uniq_to_count=1 + select uniq(number) from (select DISTINCT number from numbers(1000000)) SETTINGS allow_experimental_analyzer=1, optimize_uniq_to_count=1 + select uniq(number) from (select number from numbers(1000000) group by number) SETTINGS allow_experimental_analyzer=1, optimize_uniq_to_count=1 From 43ea21a4ce93e4a0bd3ad41baba0f20795417562 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 2 Aug 2023 18:28:22 +0800 Subject: [PATCH 0119/1687] make default optimize_uniq_to_count to true --- src/Core/Settings.h | 2 +- tests/performance/uniq_to_count.xml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 74bb56bebee..b56d155cd6f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -762,7 +762,7 @@ class IColumn; M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function JSON_VALUE to return nullable type.", 0) \ M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \ M(Bool, use_with_fill_by_sorting_prefix, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently", 0) \ - M(Bool, optimize_uniq_to_count, false, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \ + M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ diff --git a/tests/performance/uniq_to_count.xml b/tests/performance/uniq_to_count.xml index c7403bf7bd8..64e4cf1cc0d 100644 --- a/tests/performance/uniq_to_count.xml +++ b/tests/performance/uniq_to_count.xml @@ -1,8 +1,8 @@ - select uniq(number) from (select DISTINCT number from numbers(1000000)) SETTINGS optimize_uniq_to_count=1 - select uniq(number) from (select number from numbers(1000000) group by number) SETTINGS optimize_uniq_to_count=1 + select uniq(number) from (select DISTINCT number from numbers(1000000)) + select uniq(number) from (select number from numbers(1000000) group by number) - select uniq(number) from (select DISTINCT number from numbers(1000000)) SETTINGS allow_experimental_analyzer=1, optimize_uniq_to_count=1 - select uniq(number) from (select number from numbers(1000000) group by number) SETTINGS allow_experimental_analyzer=1, optimize_uniq_to_count=1 + select uniq(number) from (select DISTINCT number from numbers(1000000)) SETTINGS allow_experimental_analyzer=1 + select uniq(number) from (select number from numbers(1000000) group by number) SETTINGS allow_experimental_analyzer=1 From 81866bcc9c3f7fcf68ec9cd908a0fa7013dbf980 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 2 Aug 2023 12:35:58 +0200 Subject: [PATCH 0120/1687] Fix special build --- .../Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp index 26aa0aad97c..46b3f56f3cc 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp @@ -28,7 +28,7 @@ void JSONObjectEachRowRowOutputFormat::write(const Columns & columns, size_t row else object_name = "row_" + std::to_string(row + 1); - IRowOutputFormat::write(columns, row); + RowOutputFormatWithExceptionHandlerAdaptor::write(columns, row); } void JSONObjectEachRowRowOutputFormat::writeFieldDelimiter() From 77cc84a4d2937bea024c1cefbcf17f99ee7ac7c7 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 2 Aug 2023 12:43:25 +0200 Subject: [PATCH 0121/1687] Fix test --- ...41_valid_json_and_xml_on_http_exception.sh | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh index cb4d1b6aee1..60ce7eb3b6f 100755 --- a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh +++ b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh @@ -4,6 +4,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh +CH_URL='$CLICKHOUSE_URL&http_write_exception_in_output_format=1&allow_experimental_analyzer=0' + echo "One block" for parallel in 0 1 do @@ -11,7 +13,7 @@ do for format in JSON JSONEachRow JSONCompact JSONCompactEachRow JSONObjectEachRow XML do echo $format - ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select number, throwIf(number > 3) as res from numbers(10) format $format settings output_format_parallel_formatting=$parallel" | sed "s/(version .*)//" | sed "s/DB::Exception//" + ${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select number, throwIf(number > 3) as res from numbers(10) format $format settings output_format_parallel_formatting=$parallel" | sed "s/(version .*)//" | sed "s/DB::Exception//" done done @@ -20,20 +22,20 @@ echo "Without parallel formatting" for format in JSON JSONEachRow JSONCompact JSONCompactEachRow JSONObjectEachRow XML do echo $format - ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select number, throwIf(number > 3) as res from system.numbers format $format settings max_block_size=1, output_format_parallel_formatting=0" | sed "s/(version .*)//" | sed "s/DB::Exception//" + ${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select number, throwIf(number > 3) as res from system.numbers format $format settings max_block_size=1, output_format_parallel_formatting=0" | sed "s/(version .*)//" | sed "s/DB::Exception//" done echo "With parallel formatting" for format in JSON JSONCompact JSONObjectEachRow do echo $format - ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select number, throwIf(number > 3) as res from system.numbers format $format settings max_block_size=1, output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" + ${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select number, throwIf(number > 3) as res from system.numbers format $format settings max_block_size=1, output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" done for format in JSONEachRow JSONCompactEachRow do echo $format - ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select number, throwIf(number > 3) as res from system.numbers format $format settings max_block_size=1, output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=LineAsString -q "select min(isValidJSON(line)) from table" + ${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select number, throwIf(number > 3) as res from system.numbers format $format settings max_block_size=1, output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=LineAsString -q "select min(isValidJSON(line)) from table" done echo "Formatting error" @@ -51,20 +53,20 @@ echo "Without parallel formatting" for format in JSON JSONEachRow JSONCompact JSONCompactEachRow JSONObjectEachRow XML do echo $format - ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 order by x format $format settings output_format_parallel_formatting=0" | sed "s/(version .*)//" | sed "s/DB::Exception//" + ${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select * from test_02841 order by x format $format settings output_format_parallel_formatting=0" | sed "s/(version .*)//" | sed "s/DB::Exception//" done echo "With parallel formatting" for format in JSON JSONCompact JSONObjectEachRow do echo $format - ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format $format settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" + ${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select * from test_02841 format $format settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" done for format in JSONEachRow JSONCompactEachRow do echo $format - ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format $format settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=LineAsString -q "select min(isValidJSON(line)) from table" + ${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select * from test_02841 format $format settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=LineAsString -q "select min(isValidJSON(line)) from table" done @@ -74,8 +76,8 @@ $CLICKHOUSE_CLIENT -q "insert into test_02841 select 1, repeat('aaaaa', 1000000) $CLICKHOUSE_CLIENT -q "insert into test_02841 select 2, repeat('aaaaa', 1000000), 99" $CLICKHOUSE_CLIENT -q "insert into test_02841 select 3, repeat('aaaaa', 1000000), 1" -${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=0" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" -${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" +${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=0" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" +${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" echo "Test 2" @@ -87,8 +89,8 @@ $CLICKHOUSE_CLIENT -q "insert into test_02841 values (3, 'str4', 99)" $CLICKHOUSE_CLIENT -q "insert into test_02841 values (4, 'str5', 1)" $CLICKHOUSE_CLIENT -q "insert into test_02841 select number, 'str_numbers_2', 1 from numbers(10000)" -${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=0" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" -${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" +${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=0" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" +${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" echo "Test 3" $CLICKHOUSE_CLIENT -q "truncate table test_02841" @@ -99,8 +101,8 @@ $CLICKHOUSE_CLIENT -q "insert into test_02841 values (3, 'str4', 1)" $CLICKHOUSE_CLIENT -q "insert into test_02841 values (4, 'str5', 1)" $CLICKHOUSE_CLIENT -q "insert into test_02841 select number, 'str_numbers_2', 1 from numbers(10000)" -${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=0" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" -${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL&http_write_exception_in_output_format=1" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" +${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=0" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" +${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select * from test_02841 format JSON settings output_format_parallel_formatting=1" | $CLICKHOUSE_LOCAL --input-format=JSONAsString -q "select isValidJSON(json) from table" $CLICKHOUSE_CLIENT -q "drop table test_02841" From d85b16dc71256049269d279bbcc2aab45ff688ee Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 2 Aug 2023 12:45:26 +0200 Subject: [PATCH 0122/1687] Fix test reference --- ...2841_valid_json_and_xml_on_http_exception.reference | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference index 452aa9d5022..1818ca7b5f2 100644 --- a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference +++ b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference @@ -230,9 +230,9 @@ JSONCompactEachRow JSONObjectEachRow { "row_1": {"number":"0","res":0}, - "row_1": {"number":"1","res":0}, - "row_1": {"number":"2","res":0}, - "row_1": {"number":"3","res":0}, + "row_2": {"number":"1","res":0}, + "row_3": {"number":"2","res":0}, + "row_4": {"number":"3","res":0}, "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " } XML @@ -367,8 +367,8 @@ JSONCompactEachRow JSONObjectEachRow { "row_1": {"x":1,"s":"str1","y":"a"}, - "row_1": {"x":2,"s":"str2","y":"a"}, - "row_1": {"x":3,"s":"str3","y":"a"}, + "row_2": {"x":2,"s":"str2","y":"a"}, + "row_3": {"x":3,"s":"str3","y":"a"}, "exception": "Code: 36. : Unexpected value 99 in enum: While executing JSONObjectEachRowRowOutputFormat. (BAD_ARGUMENTS) " } XML From 9ac2d12f3bd81cae5f2c449b70ada1c30f7421cf Mon Sep 17 00:00:00 2001 From: alexX512 Date: Wed, 2 Aug 2023 13:07:02 +0000 Subject: [PATCH 0123/1687] Add test for partial sorting result --- src/Processors/Executors/ExecutingGraph.cpp | 4 - src/Processors/Executors/ExecutorTasks.cpp | 6 - src/Processors/Executors/PipelineExecutor.h | 2 +- src/Processors/Executors/TasksQueue.h | 1 - src/Processors/ISource.cpp | 24 -- .../Transforms/PartialResultTransform.cpp | 12 - src/QueryPipeline/Pipe.cpp | 2 +- src/QueryPipeline/QueryPipelineBuilder.cpp | 4 +- .../0_stateless/02010_lc_native.python | 283 ++---------------- .../02458_insert_select_progress_tcp.python | 236 ++------------- .../02750_settings_alias_tcp_protocol.python | 211 +------------ ...rting_result_during_query_execution.python | 49 +++ ...ng_result_during_query_execution.reference | 21 ++ ...l_sorting_result_during_query_execution.sh | 8 + .../queries/0_stateless/helpers/tcp_client.py | 274 +++++++++++++++++ 15 files changed, 415 insertions(+), 722 deletions(-) create mode 100755 tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python create mode 100644 tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.reference create mode 100755 tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.sh create mode 100644 tests/queries/0_stateless/helpers/tcp_client.py diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index e639cea75ea..6a946b4a4b9 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB { @@ -61,7 +60,6 @@ bool ExecutingGraph::addEdges(uint64_t node) /// Add backward edges from input ports. auto & inputs = from->getInputs(); auto from_input = nodes[node]->back_edges.size(); - LOG_DEBUG(&Poco::Logger::get("ExecutingGraph::addEdges"), "Node id {}. Node name {}. Backward edges size {}. Inputs size {}. Direct edges size {}. Outputs size {}", node, from->getName(), from_input, inputs.size(), nodes[node]->direct_edges.size(), from->getOutputs().size()); if (from_input < inputs.size()) { @@ -222,7 +220,6 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue updated_processors.push(pid); std::shared_lock read_lock(nodes_mutex); - // LOG_DEBUG(&Poco::Logger::get("ExecutingGraph::updateNode"), "Original node for update. Node id {}. Node name {}", pid, nodes[pid]->processor->getName()); while (!updated_processors.empty() || !updated_edges.empty()) { @@ -284,7 +281,6 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue IProcessor::Status last_status = node.last_processor_status; IProcessor::Status status = processor.prepare(node.updated_input_ports, node.updated_output_ports); node.last_processor_status = status; - // LOG_DEBUG(&Poco::Logger::get("ExecutingGraph::updateNode"), "Node id {}. Node name {}. Status {}", pid, nodes[pid]->processor->getName(), status); if (profile_processors) { diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index 9cbebb6996c..08920592391 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -107,8 +107,6 @@ void ExecutorTasks::tryGetTask(ExecutionThreadContext & context) void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThreadContext & context) { context.setTask(nullptr); - // if (!queue.empty()) - // LOG_DEBUG(&Poco::Logger::get("ExecutorTasks::pushTasks"), "First task to add in queue. Processor id {}. Processor name {}", queue.front()->processors_id, queue.front()->processor->getName()); /// If sending partial results is allowed and local tasks scheduling optimization is repeated longer than the limit /// or new task need to send partial result later, skip optimization for this iteration. @@ -126,8 +124,6 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea if (!queue.empty() || !async_queue.empty()) { - // LOG_DEBUG(&Poco::Logger::get("ExecutorTasks::pushTasks"), "After local optimizations push new tasks to queue"); - // task_queue.print(); std::unique_lock lock(mutex); #if defined(OS_LINUX) @@ -148,8 +144,6 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea /// Wake up at least one thread that will wake up other threads if required tryWakeUpAnyOtherThreadWithTasks(context, lock); } - // LOG_DEBUG(&Poco::Logger::get("ExecutorTasks::pushTasks"), "After push new tasks to queue"); - // task_queue.print(); } void ExecutorTasks::init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback, UInt64 partial_result_duration_ms) diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index fe424114bee..63524efc6d4 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -89,7 +89,7 @@ private: QueryStatusPtr process_list_element; ReadProgressCallbackPtr read_progress_callback; - + /// Duration between sending partial result through the pipeline const UInt64 partial_result_duration_ms; diff --git a/src/Processors/Executors/TasksQueue.h b/src/Processors/Executors/TasksQueue.h index bf8e8a1cde4..bb8996fc1a6 100644 --- a/src/Processors/Executors/TasksQueue.h +++ b/src/Processors/Executors/TasksQueue.h @@ -2,7 +2,6 @@ #include #include #include -#include namespace DB { diff --git a/src/Processors/ISource.cpp b/src/Processors/ISource.cpp index 34448186fa8..6a88d3973a1 100644 --- a/src/Processors/ISource.cpp +++ b/src/Processors/ISource.cpp @@ -1,6 +1,5 @@ #include #include -#include namespace DB @@ -22,61 +21,38 @@ ISource::ISource(Block header, bool enable_auto_progress) ISource::Status ISource::prepare() { - const auto name = getName(); - if (name == "NullSource") - LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "KEK {}", getName()); if (finished) { - if (name == "NullSource") - LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "finished"); output.finish(); return Status::Finished; } /// Check can output. if (output.isFinished()) - { - if (name == "NullSource") - LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "output.isFinished()"); return Status::Finished; - } if (!output.canPush()) - { - if (name == "NullSource") - LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "!output.canPush()"); return Status::PortFull; - } if (!has_input) - { - if (name == "NullSource") - LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "!has_input"); return Status::Ready; - } output.pushData(std::move(current_chunk)); has_input = false; if (isCancelled()) { - if (name == "NullSource") - LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "isCancelled()"); output.finish(); return Status::Finished; } if (got_exception) { - if (name == "NullSource") - LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "got_exception"); finished = true; output.finish(); return Status::Finished; } - if (name == "NullSource") - LOG_DEBUG(&Poco::Logger::get("ISource::prepare"), "Status::PortFull"); /// Now, we pushed to output, and it must be full. return Status::PortFull; } diff --git a/src/Processors/Transforms/PartialResultTransform.cpp b/src/Processors/Transforms/PartialResultTransform.cpp index c3f38e77205..7cf2473684a 100644 --- a/src/Processors/Transforms/PartialResultTransform.cpp +++ b/src/Processors/Transforms/PartialResultTransform.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { @@ -15,36 +14,28 @@ PartialResultTransform::PartialResultTransform(const Block & header, UInt64 part IProcessor::Status PartialResultTransform::prepare() { - // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "KEK {}", getName()); if (output.isFinished()) { - // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "output.isFinished()"); input.close(); return Status::Finished; } if (finished_getting_snapshots) { - // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "finished_getting_snapshots"); output.finish(); return Status::Finished; } if (input.hasData()) - { - // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "input.hasData()"); partial_result = {input.pull(), SnaphotStatus::Ready}; - } /// Send partial result from real processor snapshot or from previous partial result processor if possible if (partial_result.snapshot_status == SnaphotStatus::Ready && output.canPush()) { - // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "partial_result.snapshot_status == SnaphotStatus::Ready && output.canPush()"); transformPartialResult(partial_result.chunk); partial_result.snapshot_status = SnaphotStatus::NotReady; if (partial_result.chunk.getNumRows() > 0) { - // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "partial_result.chunk.getNumRows() > 0"); output.push(std::move(partial_result.chunk)); return Status::PortFull; } @@ -54,18 +45,15 @@ IProcessor::Status PartialResultTransform::prepare() /// PartialResultTransform ready to create snapshots and send them as a partial result if (input.isFinished()) { - // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "input.isFinished()"); return Status::Ready; } input.setNeeded(); - // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "input.setNeeded()"); return Status::NeedData; } void PartialResultTransform::work() { - // LOG_DEBUG(&Poco::Logger::get("PartialResultTransform::prepare"), "Name {}. Duration_ms {}. Elapsed {}", getName(), partial_result_duration_ms, watch.elapsedMilliseconds()); if (partial_result_duration_ms < watch.elapsedMilliseconds()) { partial_result = getRealProcessorSnapshot(); diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 454f14d5626..6ff2cb6a4fd 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -893,7 +893,7 @@ void Pipe::transform(const Transformer & transformer, bool check_ports) if (output_ports.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot transform empty Pipe"); - /// TODO: Add functionality to work with partial result ports in transformer. + /// TODO: Add functionality to work with partial result ports in transformer. dropPartialResult(); auto new_processors = transformer(output_ports); diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 7da22b02ac6..e22b0077d1c 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -116,7 +116,7 @@ void QueryPipelineBuilder::init(QueryPipeline & pipeline) pipe.activatePartialResult(pipeline.partial_result_limit, pipeline.partial_result_duration_ms); pipe.partial_result_ports = {pipeline.partial_result}; } - + if (!pipeline.partial_result) pipe.dropPartialResult(); @@ -357,7 +357,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped { left->checkInitializedAndNotCompleted(); right->checkInitializedAndNotCompleted(); - + /// TODO: Support joining of partial results from different pipelines. left->pipe.dropPartialResult(); right->pipe.dropPartialResult(); diff --git a/tests/queries/0_stateless/02010_lc_native.python b/tests/queries/0_stateless/02010_lc_native.python index a197d32a3b9..5054ad3d454 100755 --- a/tests/queries/0_stateless/02010_lc_native.python +++ b/tests/queries/0_stateless/02010_lc_native.python @@ -1,226 +1,26 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import socket import os -import uuid +import sys -CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") -CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) -CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -def writeVarUInt(x, ba): - for _ in range(0, 9): - byte = x & 0x7F - if x > 0x7F: - byte |= 0x80 - - ba.append(byte) - - x >>= 7 - if x == 0: - return - - -def writeStringBinary(s, ba): - b = bytes(s, "utf-8") - writeVarUInt(len(s), ba) - ba.extend(b) - - -def readStrict(s, size=1): - res = bytearray() - while size: - cur = s.recv(size) - # if not res: - # raise "Socket is closed" - size -= len(cur) - res.extend(cur) - - return res - - -def readUInt(s, size=1): - res = readStrict(s, size) - val = 0 - for i in range(len(res)): - val += res[i] << (i * 8) - return val - - -def readUInt8(s): - return readUInt(s) - - -def readUInt16(s): - return readUInt(s, 2) - - -def readUInt32(s): - return readUInt(s, 4) - - -def readUInt64(s): - return readUInt(s, 8) - - -def readVarUInt(s): - x = 0 - for i in range(9): - byte = readStrict(s)[0] - x |= (byte & 0x7F) << (7 * i) - - if not byte & 0x80: - return x - - return x - - -def readStringBinary(s): - size = readVarUInt(s) - s = readStrict(s, size) - return s.decode("utf-8") - - -def sendHello(s): - ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary("simple native protocol", ba) - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary("default", ba) # database - writeStringBinary("default", ba) # user - writeStringBinary("", ba) # pwd - s.sendall(ba) - - -def receiveHello(s): - p_type = readVarUInt(s) - assert p_type == 0 # Hello - server_name = readStringBinary(s) - # print("Server name: ", server_name) - server_version_major = readVarUInt(s) - # print("Major: ", server_version_major) - server_version_minor = readVarUInt(s) - # print("Minor: ", server_version_minor) - server_revision = readVarUInt(s) - # print("Revision: ", server_revision) - server_timezone = readStringBinary(s) - # print("Timezone: ", server_timezone) - server_display_name = readStringBinary(s) - # print("Display name: ", server_display_name) - server_version_patch = readVarUInt(s) - # print("Version patch: ", server_version_patch) - - -def serializeClientInfo(ba, query_id): - writeStringBinary("default", ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary("127.0.0.1:9000", ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary("os_user", ba) # os_user - writeStringBinary("client_hostname", ba) # client_hostname - writeStringBinary("client_name", ba) # client_name - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary("", ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry - - -def sendQuery(s, query): - ba = bytearray() - query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query - writeStringBinary(query_id, ba) - - ba.append(1) # INITIAL_QUERY - - # client info - serializeClientInfo(ba, query_id) - - writeStringBinary("", ba) # No settings - writeStringBinary("", ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally - s.sendall(ba) - - -def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num - - -def sendEmptyBlock(s): - ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary("", ba) - serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns - s.sendall(ba) - - -def assertPacket(packet, expected): - assert packet == expected, packet - - -def readHeader(s): - packet_type = readVarUInt(s) - if packet_type == 2: # Exception - raise RuntimeError(readException(s)) - assertPacket(packet_type, 1) # Data - - readStringBinary(s) # external table name - # BlockInfo - assertPacket(readVarUInt(s), 1) # 1 - assertPacket(readUInt8(s), 0) # is_overflows - assertPacket(readVarUInt(s), 2) # 2 - assertPacket(readUInt32(s), 4294967295) # bucket_num - assertPacket(readVarUInt(s), 0) # 0 - columns = readVarUInt(s) # rows - rows = readVarUInt(s) # columns - print("Rows {} Columns {}".format(rows, columns)) - for _ in range(columns): - col_name = readStringBinary(s) - type_name = readStringBinary(s) - print("Column {} type {}".format(col_name, type_name)) - - -def readException(s): - code = readUInt32(s) - name = readStringBinary(s) - text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace("DB::Exception:", "")) +from tcp_client import TCPClient, CLICKHOUSE_DATABASE, writeVarUInt, writeStringBinary, serializeBlockInfo, assertPacket def insertValidLowCardinalityRow(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) - sendQuery( - s, + with TCPClient() as client: + client.sendQuery( "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - sendEmptyBlock(s) - readHeader(s) + client.sendEmptyBlock() + client.readHeader() # Data ba = bytearray() @@ -239,31 +39,25 @@ def insertValidLowCardinalityRow(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 8) # UInt64 index (0 for 'hello') - s.sendall(ba) + client.send(ba) # Fin block - sendEmptyBlock(s) + client.sendEmptyBlock() - assertPacket(readVarUInt(s), 5) # End of stream - s.close() + assertPacket(client.readVarUInt(), 5) # End of stream def insertLowCardinalityRowWithIndexOverflow(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) - sendQuery( - s, + with TCPClient() as client: + client.sendQuery( "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - sendEmptyBlock(s) - readHeader(s) + client.sendEmptyBlock() + client.readHeader() # Data ba = bytearray() @@ -282,29 +76,23 @@ def insertLowCardinalityRowWithIndexOverflow(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 7 + [1]) # UInt64 index (overflow) - s.sendall(ba) + client.send(ba) - assertPacket(readVarUInt(s), 2) - print(readException(s)) - s.close() + assertPacket(client.readVarUInt(), 2) # Exception + print(client.readException()) def insertLowCardinalityRowWithIncorrectDictType(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) - sendQuery( - s, + with TCPClient() as client: + client.sendQuery( "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - sendEmptyBlock(s) - readHeader(s) + client.sendEmptyBlock() + client.readHeader() # Data ba = bytearray() @@ -323,29 +111,23 @@ def insertLowCardinalityRowWithIncorrectDictType(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 8) # UInt64 index (overflow) - s.sendall(ba) + client.send(ba) - assertPacket(readVarUInt(s), 2) - print(readException(s)) - s.close() + assertPacket(client.readVarUInt(), 2) # Exception + print(client.readException()) def insertLowCardinalityRowWithIncorrectAdditionalKeys(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) - sendQuery( - s, + with TCPClient() as client: + client.sendQuery( "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - sendEmptyBlock(s) - readHeader(s) + client.sendEmptyBlock() + client.readHeader() # Data ba = bytearray() @@ -364,11 +146,10 @@ def insertLowCardinalityRowWithIncorrectAdditionalKeys(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 8) # UInt64 index (0 for 'hello') - s.sendall(ba) + client.send(ba) - assertPacket(readVarUInt(s), 2) - print(readException(s)) - s.close() + assertPacket(client.readVarUInt(), 2) # Exception + print(client.readException()) def main(): diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python index 696eb01ff7e..1fe2363d7d5 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -1,187 +1,23 @@ #!/usr/bin/env python3 -import socket -import os -import uuid import json +import os +import sys -CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") -CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) -CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -def writeVarUInt(x, ba): - for _ in range(0, 9): - byte = x & 0x7F - if x > 0x7F: - byte |= 0x80 - - ba.append(byte) - - x >>= 7 - if x == 0: - return - - -def writeStringBinary(s, ba): - b = bytes(s, "utf-8") - writeVarUInt(len(s), ba) - ba.extend(b) - - -def readStrict(s, size=1): - res = bytearray() - while size: - cur = s.recv(size) - # if not res: - # raise "Socket is closed" - size -= len(cur) - res.extend(cur) - - return res - - -def readUInt(s, size=1): - res = readStrict(s, size) - val = 0 - for i in range(len(res)): - val += res[i] << (i * 8) - return val - - -def readUInt8(s): - return readUInt(s) - - -def readUInt16(s): - return readUInt(s, 2) - - -def readUInt32(s): - return readUInt(s, 4) - - -def readUInt64(s): - return readUInt(s, 8) - - -def readVarUInt(s): - x = 0 - for i in range(9): - byte = readStrict(s)[0] - x |= (byte & 0x7F) << (7 * i) - - if not byte & 0x80: - return x - - return x - - -def readStringBinary(s): - size = readVarUInt(s) - s = readStrict(s, size) - return s.decode("utf-8") - - -def sendHello(s): - ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary("simple native protocol", ba) - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary(CLICKHOUSE_DATABASE, ba) # database - writeStringBinary("default", ba) # user - writeStringBinary("", ba) # pwd - s.sendall(ba) - - -def receiveHello(s): - p_type = readVarUInt(s) - assert p_type == 0 # Hello - server_name = readStringBinary(s) - # print("Server name: ", server_name) - server_version_major = readVarUInt(s) - # print("Major: ", server_version_major) - server_version_minor = readVarUInt(s) - # print("Minor: ", server_version_minor) - server_revision = readVarUInt(s) - # print("Revision: ", server_revision) - server_timezone = readStringBinary(s) - # print("Timezone: ", server_timezone) - server_display_name = readStringBinary(s) - # print("Display name: ", server_display_name) - server_version_patch = readVarUInt(s) - # print("Version patch: ", server_version_patch) - - -def serializeClientInfo(ba, query_id): - writeStringBinary("default", ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary("127.0.0.1:9000", ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary("os_user", ba) # os_user - writeStringBinary("client_hostname", ba) # client_hostname - writeStringBinary("client_name", ba) # client_name - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary("", ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry - - -def sendQuery(s, query): - ba = bytearray() - query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query - writeStringBinary(query_id, ba) - - ba.append(1) # INITIAL_QUERY - - # client info - serializeClientInfo(ba, query_id) - - writeStringBinary("", ba) # No settings - writeStringBinary("", ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally - s.sendall(ba) - - -def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num - - -def sendEmptyBlock(s): - ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary("", ba) - serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns - s.sendall(ba) - - -def assertPacket(packet, expected): - assert packet == expected, packet +from tcp_client import TCPClient class Progress: - def __init__(self): + def __init__(self, read_rows=0, read_bytes=0, total_rows_to_read=0, written_rows=0, written_bytes=0): # NOTE: this is done in ctor to initialize __dict__ - self.read_rows = 0 - self.read_bytes = 0 - self.total_rows_to_read = 0 - self.written_rows = 0 - self.written_bytes = 0 + self.read_rows = read_rows + self.read_bytes = read_bytes + self.total_rows_to_read = total_rows_to_read + self.written_rows = written_rows + self.written_bytes = written_bytes def __str__(self): return json.dumps(self.__dict__) @@ -194,13 +30,6 @@ class Progress: self.written_bytes += b.written_bytes return self - def readPacket(self, s): - self.read_rows += readVarUInt(s) - self.read_bytes += readVarUInt(s) - self.total_rows_to_read += readVarUInt(s) - self.written_rows += readVarUInt(s) - self.written_bytes += readVarUInt(s) - def __bool__(self): return ( self.read_rows > 0 @@ -211,52 +40,25 @@ class Progress: ) -def readProgress(s): - packet_type = readVarUInt(s) - if packet_type == 2: # Exception - raise RuntimeError(readException(s)) - - if packet_type == 5: # End stream - return None - - assertPacket(packet_type, 3) # Progress - - progress = Progress() - progress.readPacket(s) - return progress - - -def readException(s): - code = readUInt32(s) - name = readStringBinary(s) - text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace("DB::Exception:", "")) - - def main(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) + with TCPClient() as client: # For 1 second sleep and 1000ms of interactive_delay we definitelly should have non zero progress packet. # NOTE: interactive_delay=0 cannot be used since in this case CompletedPipelineExecutor will not call cancelled callback. - sendQuery( - s, + client.sendQuery( "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000", ) # external tables - sendEmptyBlock(s) + client.sendEmptyBlock() summary_progress = Progress() non_empty_progress_packets = 0 while True: - progress = readProgress(s) - if progress is None: + progress_info = client.readProgress() + if progress_info is None: break + + progress = Progress(*progress_info) summary_progress += progress if progress: non_empty_progress_packets += 1 @@ -267,8 +69,6 @@ def main(): # - 1 or 2 for each SELECT block assert non_empty_progress_packets in (3, 4), f"{non_empty_progress_packets=:}" - s.close() - if __name__ == "__main__": main() diff --git a/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python index 768fb2144e3..1736807410f 100644 --- a/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python +++ b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python @@ -1,216 +1,23 @@ #!/usr/bin/env python3 -import socket + import os -import uuid -import json +import sys -CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") -CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) -CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -def writeVarUInt(x, ba): - for _ in range(0, 9): - byte = x & 0x7F - if x > 0x7F: - byte |= 0x80 - - ba.append(byte) - - x >>= 7 - if x == 0: - return - - -def writeStringBinary(s, ba): - b = bytes(s, "utf-8") - writeVarUInt(len(s), ba) - ba.extend(b) - - -def readStrict(s, size=1): - res = bytearray() - while size: - cur = s.recv(size) - # if not res: - # raise "Socket is closed" - size -= len(cur) - res.extend(cur) - - return res - - -def readUInt(s, size=1): - res = readStrict(s, size) - val = 0 - for i in range(len(res)): - val += res[i] << (i * 8) - return val - - -def readUInt8(s): - return readUInt(s) - - -def readUInt16(s): - return readUInt(s, 2) - - -def readUInt32(s): - return readUInt(s, 4) - - -def readUInt64(s): - return readUInt(s, 8) - - -def readVarUInt(s): - x = 0 - for i in range(9): - byte = readStrict(s)[0] - x |= (byte & 0x7F) << (7 * i) - - if not byte & 0x80: - return x - - return x - - -def readStringBinary(s): - size = readVarUInt(s) - s = readStrict(s, size) - return s.decode("utf-8") - - -def sendHello(s): - ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary("simple native protocol", ba) - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary(CLICKHOUSE_DATABASE, ba) # database - writeStringBinary("default", ba) # user - writeStringBinary("", ba) # pwd - s.sendall(ba) - - -def receiveHello(s): - p_type = readVarUInt(s) - assert p_type == 0 # Hello - _server_name = readStringBinary(s) - _server_version_major = readVarUInt(s) - _server_version_minor = readVarUInt(s) - _server_revision = readVarUInt(s) - _server_timezone = readStringBinary(s) - _server_display_name = readStringBinary(s) - _server_version_patch = readVarUInt(s) - - -def serializeClientInfo(ba, query_id): - writeStringBinary("default", ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary("127.0.0.1:9000", ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary("os_user", ba) # os_user - writeStringBinary("client_hostname", ba) # client_hostname - writeStringBinary("client_name", ba) # client_name - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary("", ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry - - -def sendQuery(s, query, settings): - ba = bytearray() - query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query - writeStringBinary(query_id, ba) - - ba.append(1) # INITIAL_QUERY - - # client info - serializeClientInfo(ba, query_id) - - # Settings - for key, value in settings.items(): - writeStringBinary(key, ba) - writeVarUInt(1, ba) # is_important - writeStringBinary(str(value), ba) - writeStringBinary("", ba) # End of settings - - writeStringBinary("", ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally - s.sendall(ba) - - -def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num - - -def sendEmptyBlock(s): - ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary("", ba) - serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns - s.sendall(ba) - - -def assertPacket(packet, expected): - assert packet == expected, "Got: {}, expected: {}".format(packet, expected) - - -def readResponse(s): - packet_type = readVarUInt(s) - if packet_type == 2: # Exception - raise RuntimeError(readException(s)) - - if packet_type == 1: # Data - return None - if packet_type == 3: # Progress - return None - if packet_type == 5: # End stream - return None - - raise RuntimeError("Unexpected packet: {}".format(packet_type)) - - -def readException(s): - code = readUInt32(s) - _name = readStringBinary(s) - text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace("DB::Exception:", "")) +from tcp_client import TCPClient def main(): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(30) - s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - sendHello(s) - receiveHello(s) - sendQuery(s, "select 1", {"replication_alter_partitions_sync": 1}) + with TCPClient() as client: + client.sendQuery("select 1", {"replication_alter_partitions_sync": 1}) # external tables - sendEmptyBlock(s) + client.sendEmptyBlock() - while readResponse(s) is not None: + while client.readResponse() is not None: pass - - s.close() print("OK") diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python new file mode 100755 index 00000000000..00c11d0d233 --- /dev/null +++ b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + + +import os +import sys + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + +from tcp_client import TCPClient, assertPacket + + +def main(): + with TCPClient() as client: + client.sendQuery( + f"SELECT number FROM numbers_mt(1e7+1) ORDER BY -number LIMIT 15 SETTINGS max_threads = 1, partial_result_update_duration_ms=1, max_rows_in_partial_result=10" + ) + + # external tables + client.sendEmptyBlock() + client.readHeader() + + # Partial result + _, partial_result = client.readHeader(need_read_data=True)[0] + + assert_message = "There should be at least one block of data with partial result" + assert len(partial_result) > 0, assert_message + + while True: + _, new_partial_result = client.readHeader(need_read_data=True, need_print_info=False)[0] + if (len(new_partial_result) == 0): + break + + data_size = len(partial_result) + assert_message = f"New block contains more info about the full data so sorted results should not be less then in the previous iteration. New result {new_partial_result}. Previous result {partial_result}" + assert all(partial_result[i] <= new_partial_result[i] for i in range(data_size)), assert_message + + partial_result = new_partial_result + + # Full result + _, full_result = client.readHeader(need_read_data=True)[0] + for result in full_result: + print(result) + + +if __name__ == "__main__": + main() + diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.reference b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.reference new file mode 100644 index 00000000000..4d706474b22 --- /dev/null +++ b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.reference @@ -0,0 +1,21 @@ +Rows 0 Columns 1 +Column number type UInt64 +Rows 10 Columns 1 +Column number type UInt64 +Rows 15 Columns 1 +Column number type UInt64 +10000000 +9999999 +9999998 +9999997 +9999996 +9999995 +9999994 +9999993 +9999992 +9999991 +9999990 +9999989 +9999988 +9999987 +9999986 diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.sh b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.sh new file mode 100755 index 00000000000..1ed15197dbf --- /dev/null +++ b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test +python3 "$CURDIR"/02833_partial_sorting_result_during_query_execution.python diff --git a/tests/queries/0_stateless/helpers/tcp_client.py b/tests/queries/0_stateless/helpers/tcp_client.py new file mode 100644 index 00000000000..d1c6cb4894c --- /dev/null +++ b/tests/queries/0_stateless/helpers/tcp_client.py @@ -0,0 +1,274 @@ +import socket +import os +import uuid + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") + + +def writeVarUInt(x, ba): + for _ in range(0, 9): + byte = x & 0x7F + if x > 0x7F: + byte |= 0x80 + + ba.append(byte) + + x >>= 7 + if x == 0: + return + + +def writeStringBinary(s, ba): + b = bytes(s, "utf-8") + writeVarUInt(len(s), ba) + ba.extend(b) + + +def serializeClientInfo(ba, query_id): + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary("client_name", ba) # client_name + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry + + +def serializeBlockInfo(ba): + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num + + +def assertPacket(packet, expected): + assert packet == expected, "Got: {}, expected: {}".format(packet, expected) + + +class TCPClient(object): + def __init__(self, timeout=30): + self.timeout = timeout + self.socket = None + + def __enter__(self): + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.socket.settimeout(self.timeout) + self.socket.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + + self.sendHello() + self.receiveHello() + + return self + + def __exit__(self, exc_type, exc_value, traceback): + if self.socket: + self.socket.close() + + def readStrict(self, size=1): + res = bytearray() + while size: + cur = self.socket.recv(size) + # if not res: + # raise "Socket is closed" + size -= len(cur) + res.extend(cur) + + return res + + def readUInt(self, size=1): + res = self.readStrict(size) + val = 0 + for i in range(len(res)): + val += res[i] << (i * 8) + return val + + def readUInt8(self): + return self.readUInt() + + def readUInt16(self): + return self.readUInt(2) + + def readUInt32(self): + return self.readUInt(4) + + def readUInt64(self): + return self.readUInt(8) + + def readVarUInt(self): + x = 0 + for i in range(9): + byte = self.readStrict()[0] + x |= (byte & 0x7F) << (7 * i) + + if not byte & 0x80: + return x + + return x + + def readStringBinary(self): + size = self.readVarUInt() + s = self.readStrict(size) + return s.decode("utf-8") + + def send(self, byte_array): + self.socket.sendall(byte_array) + + def sendHello(self): + ba = bytearray() + writeVarUInt(0, ba) # Hello + writeStringBinary("simple native protocol", ba) + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary(CLICKHOUSE_DATABASE, ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd + self.send(ba) + + def receiveHello(self): + p_type = self.readVarUInt() + assert p_type == 0 # Hello + _server_name = self.readStringBinary() + _server_version_major = self.readVarUInt() + _server_version_minor = self.readVarUInt() + _server_revision = self.readVarUInt() + _server_timezone = self.readStringBinary() + _server_display_name = self.readStringBinary() + _server_version_patch = self.readVarUInt() + + def sendQuery(self, query, settings=None): + if settings == None: + settings = {} # No settings + + ba = bytearray() + query_id = uuid.uuid4().hex + writeVarUInt(1, ba) # query + writeStringBinary(query_id, ba) + + ba.append(1) # INITIAL_QUERY + + # client info + serializeClientInfo(ba, query_id) + + # Settings + for key, value in settings.items(): + writeStringBinary(key, ba) + writeVarUInt(1, ba) # is_important + writeStringBinary(str(value), ba) + writeStringBinary("", ba) # End of settings + + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally + self.send(ba) + + def sendEmptyBlock(self): + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) + serializeBlockInfo(ba) + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns + self.send(ba) + + def readException(self): + code = self.readUInt32() + _name = self.readStringBinary() + text = self.readStringBinary() + self.readStringBinary() # trace + assertPacket(self.readUInt8(), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) + + def readResponse(self): + packet_type = self.readVarUInt() + if packet_type == 2: # Exception + raise RuntimeError(self.readException()) + + if packet_type == 1: # Data + return None + if packet_type == 3: # Progress + return None + if packet_type == 5: # End stream + return None + + raise RuntimeError("Unexpected packet: {}".format(packet_type)) + + def readData(self): + packet_type = self.readVarUInt() + if packet_type == 2: # Exception + raise RuntimeError(self.readException()) + + if packet_type == 5: # End stream + return None + + assertPacket(packet_type, 1) # Data + + self.readStringBinary() # external table name + + + def readProgress(self): + packet_type = self.readVarUInt() + if packet_type == 2: # Exception + raise RuntimeError(self.readException()) + + if packet_type == 5: # End stream + return None + + assertPacket(packet_type, 3) # Progress + + read_rows = self.readVarUInt() + read_bytes = self.readVarUInt() + total_rows_to_read = self.readVarUInt() + written_rows = self.readVarUInt() + written_bytes = self.readVarUInt() + return read_rows, read_bytes, total_rows_to_read, written_rows, written_bytes + + def readRow(self, row_type, rows): + if (row_type == 'UInt64'): + row = [self.readUInt64() for _ in range(rows)] + return row + else: + raise RuntimeError("Currently python version of tcp client doesn't support the following type of row: {}".format(row_type)) + + def readHeader(self, need_read_data=False, need_print_info=True): + packet_type = self.readVarUInt() + if packet_type == 2: # Exception + raise RuntimeError(self.readException()) + assertPacket(packet_type, 1) # Data + + self.readStringBinary() # external table name + # BlockInfo + assertPacket(self.readVarUInt(), 1) # field number 1 + assertPacket(self.readUInt8(), 0) # is_overflows + assertPacket(self.readVarUInt(), 2) # field number 2 + assertPacket(self.readUInt32(), 4294967295) # bucket_num + assertPacket(self.readVarUInt(), 0) # 0 + columns = self.readVarUInt() # rows + rows = self.readVarUInt() # columns + + data = [] if need_read_data else None + if need_print_info: + print("Rows {} Columns {}".format(rows, columns)) + + for _ in range(columns): + col_name = self.readStringBinary() + type_name = self.readStringBinary() + if need_print_info: + print("Column {} type {}".format(col_name, type_name)) + + if need_read_data: + data.append((col_name, self.readRow(type_name, rows))) + + return data From c0f9dbbacb34fabbb2ce8431476f53d3086a1eb9 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 2 Aug 2023 15:51:27 +0200 Subject: [PATCH 0124/1687] shorten --- src/Storages/StorageFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7e214b76e3d..c6c9fefac97 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -122,7 +122,7 @@ void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls, return; const fs::directory_iterator end; - for (fs::directory_iterator it(path_for_ls, std::filesystem::directory_options::skip_permission_denied); it != end; ++it) + for (fs::directory_iterator it(path_for_ls, fs::directory_options::skip_permission_denied); it != end; ++it) { const std::string full_path = it->path().string(); const size_t last_slash = full_path.rfind('/'); From 06e187e58a68ae5e68cc4022b578933986d87dc6 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Wed, 2 Aug 2023 18:15:25 +0000 Subject: [PATCH 0125/1687] Split mergeSortingTransform and mergeSortingPartialResultTransform into 2 files --- .../PullingAsyncPipelineExecutor.cpp | 4 +- src/Processors/Formats/IOutputFormat.cpp | 58 ++++++++++--------- src/Processors/Formats/IOutputFormat.h | 8 +-- src/Processors/Formats/LazyOutputFormat.h | 4 +- .../LimitPartialResultTransform.cpp | 42 ++++++++++++++ .../Transforms/LimitPartialResultTransform.h | 36 +++--------- .../MergeSortingPartialResultTransform.cpp | 49 ++++++++++++++++ .../MergeSortingPartialResultTransform.h | 25 ++++++++ .../Transforms/MergeSortingTransform.cpp | 54 +---------------- .../Transforms/PartialResultTransform.cpp | 35 ++++++----- .../Transforms/PartialResultTransform.h | 6 +- src/QueryPipeline/Pipe.cpp | 5 +- src/QueryPipeline/Pipe.h | 2 +- src/QueryPipeline/QueryPipeline.cpp | 2 +- 14 files changed, 190 insertions(+), 140 deletions(-) create mode 100644 src/Processors/Transforms/LimitPartialResultTransform.cpp create mode 100644 src/Processors/Transforms/MergeSortingPartialResultTransform.cpp create mode 100644 src/Processors/Transforms/MergeSortingPartialResultTransform.h diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 8875ba85a07..4b2e8e55904 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -46,9 +46,7 @@ PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipel if (!pipeline.pulling()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for PullingAsyncPipelineExecutor must be pulling"); - lazy_format = std::make_shared(pipeline.output->getHeader()); - if (has_partial_result_setting) - lazy_format->activatePartialResultProtocol(); + lazy_format = std::make_shared(pipeline.output->getHeader(), /*is_partial_result_protocol_active*/ has_partial_result_setting); pipeline.complete(lazy_format); } diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index 7dba4b22eab..6104037a72b 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -6,30 +6,18 @@ namespace DB { -IOutputFormat::IOutputFormat(const Block & header_, WriteBuffer & out_) - : IProcessor({header_, header_, header_, header_}, {}), out(out_) +IOutputFormat::IOutputFormat(const Block & header_, WriteBuffer & out_, bool is_partial_result_protocol_active_) + : IProcessor({header_, header_, header_, header_}, {}) + , out(out_) + , is_partial_result_protocol_active(is_partial_result_protocol_active_) { } -IOutputFormat::Status IOutputFormat::prepare() +void IOutputFormat::setCurrentChunk(InputPort & input, PortKind kind) { - if (has_input) - return Status::Ready; - - auto status = prepareMainAndPartialResult(); - if (status != Status::Finished) - return status; - - status = prepareTotalsAndExtremes(); - if (status != Status::Finished) - return status; - - finished = true; - - if (!finalized) - return Status::Ready; - - return Status::Finished; + current_chunk = input.pull(true); + current_block_kind = kind; + has_input = true; } IOutputFormat::Status IOutputFormat::prepareMainAndPartialResult() @@ -42,7 +30,7 @@ IOutputFormat::Status IOutputFormat::prepareMainAndPartialResult() if (input.isFinished()) continue; - if (kind == PartialResult && was_main_input) + if (kind == PartialResult && main_input_activated) { input.close(); continue; @@ -84,11 +72,25 @@ IOutputFormat::Status IOutputFormat::prepareTotalsAndExtremes() return Status::Finished; } -void IOutputFormat::setCurrentChunk(InputPort & input, PortKind kind) +IOutputFormat::Status IOutputFormat::prepare() { - current_chunk = input.pull(true); - current_block_kind = kind; - has_input = true; + if (has_input) + return Status::Ready; + + auto status = prepareMainAndPartialResult(); + if (status != Status::Finished) + return status; + + status = prepareTotalsAndExtremes(); + if (status != Status::Finished) + return status; + + finished = true; + + if (!finalized) + return Status::Ready; + + return Status::Finished; } static Chunk prepareTotals(Chunk chunk) @@ -130,10 +132,12 @@ void IOutputFormat::work() case Main: result_rows += current_chunk.getNumRows(); result_bytes += current_chunk.allocatedBytes(); - if (is_partial_result_protocol_active && !was_main_input && current_chunk.hasRows()) + if (is_partial_result_protocol_active && !main_input_activated && current_chunk.hasRows()) { + /// Sending an empty block signals to the client that partial results are terminated, + /// and only data from the main pipeline will be forwarded. consume(Chunk(current_chunk.cloneEmptyColumns(), 0)); - was_main_input = true; + main_input_activated = true; } consume(std::move(current_chunk)); break; diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 6e5e21055a3..fc85b0d063b 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -25,7 +25,7 @@ class IOutputFormat : public IProcessor public: enum PortKind { Main = 0, Totals = 1, Extremes = 2, PartialResult = 3 }; - IOutputFormat(const Block & header_, WriteBuffer & out_); + IOutputFormat(const Block & header_, WriteBuffer & out_, bool is_partial_result_protocol_active_ = false); Status prepare() override; void work() override; @@ -105,8 +105,6 @@ public: void clearLastLines(size_t lines_number); - void activatePartialResultProtocol() { is_partial_result_protocol_active = true; } - protected: friend class ParallelFormattingOutputFormat; @@ -172,7 +170,7 @@ protected: Chunk current_chunk; PortKind current_block_kind = PortKind::Main; - bool was_main_input = false; + bool main_input_activated = false; bool has_input = false; bool finished = false; bool finalized = false; @@ -187,9 +185,9 @@ protected: Statistics statistics; private: + void setCurrentChunk(InputPort & input, PortKind kind); IOutputFormat::Status prepareMainAndPartialResult(); IOutputFormat::Status prepareTotalsAndExtremes(); - void setCurrentChunk(InputPort & input, PortKind kind); size_t rows_read_before = 0; bool are_totals_written = false; diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index 3b3dd365458..bbcfdbb7193 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -14,8 +14,8 @@ class LazyOutputFormat : public IOutputFormat { public: - explicit LazyOutputFormat(const Block & header) - : IOutputFormat(header, out), queue(2) {} + explicit LazyOutputFormat(const Block & header, bool is_partial_result_protocol_active = false) + : IOutputFormat(header, out, is_partial_result_protocol_active), queue(2) {} String getName() const override { return "LazyOutputFormat"; } diff --git a/src/Processors/Transforms/LimitPartialResultTransform.cpp b/src/Processors/Transforms/LimitPartialResultTransform.cpp new file mode 100644 index 00000000000..fdaa95aac6c --- /dev/null +++ b/src/Processors/Transforms/LimitPartialResultTransform.cpp @@ -0,0 +1,42 @@ +#include +#include + +namespace DB +{ + +LimitPartialResultTransform::LimitPartialResultTransform( + const Block & header, + UInt64 partial_result_limit_, + UInt64 partial_result_duration_ms_, + UInt64 limit_, + UInt64 offset_) + : PartialResultTransform(header, partial_result_limit_, partial_result_duration_ms_) + , limit(limit_) + , offset(offset_) + {} + +void LimitPartialResultTransform::transformPartialResult(Chunk & chunk) +{ + UInt64 num_rows = chunk.getNumRows(); + if (num_rows < offset || limit == 0) + { + chunk = {}; + return; + } + + UInt64 length = std::min(limit, num_rows - offset); + + /// Check if some rows should be removed + if (length < num_rows) + { + auto columns = chunk.detachColumns(); + UInt64 num_columns = chunk.getNumColumns(); + + for (UInt64 i = 0; i < num_columns; ++i) + columns[i] = columns[i]->cut(offset, limit); + + chunk.setColumns(std::move(columns), length); + } +} + +} diff --git a/src/Processors/Transforms/LimitPartialResultTransform.h b/src/Processors/Transforms/LimitPartialResultTransform.h index 4b1f0e1026a..0f5549323c0 100644 --- a/src/Processors/Transforms/LimitPartialResultTransform.h +++ b/src/Processors/Transforms/LimitPartialResultTransform.h @@ -5,50 +5,30 @@ namespace DB { +class LimitTransform; + /// Currently support only single thread implementation with one input and one output ports class LimitPartialResultTransform : public PartialResultTransform { public: + using LimitTransformPtr = std::shared_ptr; + LimitPartialResultTransform( const Block & header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_, UInt64 limit_, - UInt64 offset_) - : PartialResultTransform(header, partial_result_limit_, partial_result_duration_ms_) - , limit(limit_) - , offset(offset_) - {} + UInt64 offset_); String getName() const override { return "LimitPartialResultTransform"; } - void transformPartialResult(Chunk & chunk) override - { - UInt64 num_rows = chunk.getNumRows(); - if (num_rows < offset || limit == 0) - { - chunk = {}; - return; - } - - UInt64 length = std::min(limit, num_rows - offset); - - /// Check if some rows should be removed - if (length < num_rows) - { - auto columns = chunk.detachColumns(); - UInt64 num_columns = chunk.getNumColumns(); - - for (UInt64 i = 0; i < num_columns; ++i) - columns[i] = columns[i]->cut(offset, limit); - - chunk.setColumns(std::move(columns), length); - } - } + void transformPartialResult(Chunk & chunk) override; private: UInt64 limit; UInt64 offset; + + LimitTransformPtr limit_transform; }; } diff --git a/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp b/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp new file mode 100644 index 00000000000..21513fe1faf --- /dev/null +++ b/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp @@ -0,0 +1,49 @@ +#include +#include + +namespace DB +{ + +MergeSortingPartialResultTransform::MergeSortingPartialResultTransform( + const Block & header, MergeSortingTransformPtr merge_sorting_transform_, + UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) + : PartialResultTransform(header, partial_result_limit_, partial_result_duration_ms_) + , merge_sorting_transform(std::move(merge_sorting_transform_)) + {} + +PartialResultTransform::ShaphotResult MergeSortingPartialResultTransform::getRealProcessorSnapshot() +{ + std::lock_guard lock(merge_sorting_transform->snapshot_mutex); + if (merge_sorting_transform->generated_prefix) + return {{}, SnaphotStatus::Stopped}; + + if (merge_sorting_transform->chunks.empty()) + return {{}, SnaphotStatus::NotReady}; + + /// Sort all input data + merge_sorting_transform->remerge(); + /// Add a copy of the first `partial_result_limit` rows to a generated_chunk + /// to send it later as a partial result in the next prepare stage of the current processor + auto generated_columns = merge_sorting_transform->chunks[0].cloneEmptyColumns(); + size_t total_rows = 0; + for (const auto & merged_chunk : merge_sorting_transform->chunks) + { + size_t rows = std::min(merged_chunk.getNumRows(), partial_result_limit - total_rows); + if (rows == 0) + break; + + for (size_t position = 0; position < generated_columns.size(); ++position) + { + auto column = merged_chunk.getColumns()[position]; + generated_columns[position]->insertRangeFrom(*column, 0, rows); + } + + total_rows += rows; + } + + auto partial_result = Chunk(std::move(generated_columns), total_rows, merge_sorting_transform->chunks[0].getChunkInfo()); + merge_sorting_transform->enrichChunkWithConstants(partial_result); + return {std::move(partial_result), SnaphotStatus::Ready}; +} + +} diff --git a/src/Processors/Transforms/MergeSortingPartialResultTransform.h b/src/Processors/Transforms/MergeSortingPartialResultTransform.h new file mode 100644 index 00000000000..9ba937d9109 --- /dev/null +++ b/src/Processors/Transforms/MergeSortingPartialResultTransform.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ + +class MergeSortingPartialResultTransform : public PartialResultTransform +{ +public: + using MergeSortingTransformPtr = std::shared_ptr; + + MergeSortingPartialResultTransform( + const Block & header, MergeSortingTransformPtr merge_sorting_transform_, + UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); + + String getName() const override { return "MergeSortingPartialResultTransform"; } + + ShaphotResult getRealProcessorSnapshot() override; + +private: + MergeSortingTransformPtr merge_sorting_transform; +}; + +} diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index 23114244c7a..d21cb179b0a 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -82,58 +82,6 @@ private: Poco::Logger * log; }; -class MergeSortingPartialResultTransform : public PartialResultTransform -{ -public: - using MergeSortingTransformPtr = std::shared_ptr; - - MergeSortingPartialResultTransform(const Block & header, MergeSortingTransformPtr merge_sorting_transform_, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) - : PartialResultTransform(header, partial_result_limit_, partial_result_duration_ms_) - , merge_sorting_transform(std::move(merge_sorting_transform_)) - { - } - - String getName() const override { return "MergeSortingPartialResultTransform"; } - - ShaphotResult getRealProcessorSnapshot() override - { - std::lock_guard lock(merge_sorting_transform->snapshot_mutex); - if (merge_sorting_transform->generated_prefix) - return {{}, SnaphotStatus::Stopped}; - - if (merge_sorting_transform->chunks.empty()) - return {{}, SnaphotStatus::NotReady}; - - /// Sort all input data - merge_sorting_transform->remerge(); - /// Add a copy of the first `partial_result_limit` rows to a generated_chunk - /// to send it as a partial result in the next prepare stage - auto generated_columns = merge_sorting_transform->chunks[0].cloneEmptyColumns(); - size_t total_rows = 0; - for (const auto & merged_chunk : merge_sorting_transform->chunks) - { - size_t rows = std::min(merged_chunk.getNumRows(), partial_result_limit - total_rows); - if (rows == 0) - break; - - for (size_t position = 0; position < generated_columns.size(); ++position) - { - auto column = merged_chunk.getColumns()[position]; - generated_columns[position]->insertRangeFrom(*column, 0, rows); - } - - total_rows += rows; - } - - auto partial_result = Chunk(std::move(generated_columns), total_rows, merge_sorting_transform->chunks[0].getChunkInfo()); - merge_sorting_transform->enrichChunkWithConstants(partial_result); - return {std::move(partial_result), SnaphotStatus::Ready}; - } - -private: - MergeSortingTransformPtr merge_sorting_transform; -}; - MergeSortingTransform::MergeSortingTransform( const Block & header, const SortDescription & description_, diff --git a/src/Processors/Transforms/PartialResultTransform.cpp b/src/Processors/Transforms/PartialResultTransform.cpp index 7cf2473684a..4703eceddda 100644 --- a/src/Processors/Transforms/PartialResultTransform.cpp +++ b/src/Processors/Transforms/PartialResultTransform.cpp @@ -26,29 +26,38 @@ IProcessor::Status PartialResultTransform::prepare() return Status::Finished; } - if (input.hasData()) - partial_result = {input.pull(), SnaphotStatus::Ready}; - - /// Send partial result from real processor snapshot or from previous partial result processor if possible - if (partial_result.snapshot_status == SnaphotStatus::Ready && output.canPush()) + if (!output.canPush()) { - transformPartialResult(partial_result.chunk); - partial_result.snapshot_status = SnaphotStatus::NotReady; - if (partial_result.chunk.getNumRows() > 0) - { - output.push(std::move(partial_result.chunk)); - return Status::PortFull; - } - } + input.setNotNeeded(); + return Status::PortFull; + } /// If input data from previous partial result processor is finished then /// PartialResultTransform ready to create snapshots and send them as a partial result if (input.isFinished()) { + if (partial_result.snapshot_status == SnaphotStatus::Ready) + { + partial_result.snapshot_status = SnaphotStatus::NotReady; + output.push(std::move(partial_result.chunk)); + return Status::PortFull; + } + return Status::Ready; } input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + + partial_result.chunk = input.pull(); + transformPartialResult(partial_result.chunk); + if (partial_result.chunk.getNumRows() > 0) + { + output.push(std::move(partial_result.chunk)); + return Status::PortFull; + } + return Status::NeedData; } diff --git a/src/Processors/Transforms/PartialResultTransform.h b/src/Processors/Transforms/PartialResultTransform.h index 3f9089f8cce..7d4b9026ce1 100644 --- a/src/Processors/Transforms/PartialResultTransform.h +++ b/src/Processors/Transforms/PartialResultTransform.h @@ -20,9 +20,9 @@ public: protected: enum class SnaphotStatus { - NotReady, - Ready, - Stopped, + NotReady, // Waiting for data from the previous partial result processor or awaiting a timer before creating the snapshot. + Ready, // Current partial result processor has received a snapshot from the processor in the main pipeline. + Stopped, // The processor from the main pipeline has started sending data, and the pipeline for partial results should use data from the next processors of the main pipeline. }; struct ShaphotResult diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 6ff2cb6a4fd..adf06dde53b 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -325,10 +325,7 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow pipe.partial_result_ports.end()); } else - { - res.is_partial_result_active = false; - res.partial_result_ports.clear(); - } + res.dropPartialResult(); res.max_parallel_streams += pipe.max_parallel_streams; diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index cbb7a8cbf5c..70e933bcfd2 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -121,7 +121,7 @@ private: Block header; std::shared_ptr processors; - /// If is true, then on each addition of processor also try + /// If the variable is true, then each time a processor is added pipe will try /// to add processor which will send partial result from original processor bool is_partial_result_active = false; UInt64 partial_result_limit = 0; diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index 8fca23dba7d..741729cbd48 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -127,7 +127,7 @@ static void checkPulling( if (partial_result && !found_partial_result) throw Exception( ErrorCodes::LOGICAL_ERROR, - "Cannot create pulling QueryPipeline because its partial_result port does not belong to any processor"); + "Cannot create pulling QueryPipeline because its partial result port does not belong to any processor"); } static void checkCompleted(Processors & processors) From ee01d643506ea9231619b88caeeade5d753c7ab9 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Wed, 2 Aug 2023 18:42:57 +0000 Subject: [PATCH 0126/1687] Style fix --- .../MergeSortingPartialResultTransform.cpp | 2 +- .../MergeSortingPartialResultTransform.h | 2 +- .../Transforms/PartialResultTransform.cpp | 4 ++-- .../0_stateless/02010_lc_native.python | 9 +++++++- .../02458_insert_select_progress_tcp.python | 9 +++++++- ...rting_result_during_query_execution.python | 23 +++++++++++-------- .../queries/0_stateless/helpers/tcp_client.py | 21 +++++++++-------- 7 files changed, 46 insertions(+), 24 deletions(-) diff --git a/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp b/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp index 21513fe1faf..2c4d6bea3c5 100644 --- a/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp +++ b/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp @@ -5,7 +5,7 @@ namespace DB { MergeSortingPartialResultTransform::MergeSortingPartialResultTransform( - const Block & header, MergeSortingTransformPtr merge_sorting_transform_, + const Block & header, MergeSortingTransformPtr merge_sorting_transform_, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) : PartialResultTransform(header, partial_result_limit_, partial_result_duration_ms_) , merge_sorting_transform(std::move(merge_sorting_transform_)) diff --git a/src/Processors/Transforms/MergeSortingPartialResultTransform.h b/src/Processors/Transforms/MergeSortingPartialResultTransform.h index 9ba937d9109..f08f74dad6b 100644 --- a/src/Processors/Transforms/MergeSortingPartialResultTransform.h +++ b/src/Processors/Transforms/MergeSortingPartialResultTransform.h @@ -11,7 +11,7 @@ public: using MergeSortingTransformPtr = std::shared_ptr; MergeSortingPartialResultTransform( - const Block & header, MergeSortingTransformPtr merge_sorting_transform_, + const Block & header, MergeSortingTransformPtr merge_sorting_transform_, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); String getName() const override { return "MergeSortingPartialResultTransform"; } diff --git a/src/Processors/Transforms/PartialResultTransform.cpp b/src/Processors/Transforms/PartialResultTransform.cpp index 4703eceddda..5e9704d38a2 100644 --- a/src/Processors/Transforms/PartialResultTransform.cpp +++ b/src/Processors/Transforms/PartialResultTransform.cpp @@ -30,7 +30,7 @@ IProcessor::Status PartialResultTransform::prepare() { input.setNotNeeded(); return Status::PortFull; - } + } /// If input data from previous partial result processor is finished then /// PartialResultTransform ready to create snapshots and send them as a partial result @@ -48,7 +48,7 @@ IProcessor::Status PartialResultTransform::prepare() input.setNeeded(); if (!input.hasData()) - return Status::NeedData; + return Status::NeedData; partial_result.chunk = input.pull(); transformPartialResult(partial_result.chunk); diff --git a/tests/queries/0_stateless/02010_lc_native.python b/tests/queries/0_stateless/02010_lc_native.python index 5054ad3d454..219fdf04472 100755 --- a/tests/queries/0_stateless/02010_lc_native.python +++ b/tests/queries/0_stateless/02010_lc_native.python @@ -7,7 +7,14 @@ import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(CURDIR, "helpers")) -from tcp_client import TCPClient, CLICKHOUSE_DATABASE, writeVarUInt, writeStringBinary, serializeBlockInfo, assertPacket +from tcp_client import ( + TCPClient, + CLICKHOUSE_DATABASE, + writeVarUInt, + writeStringBinary, + serializeBlockInfo, + assertPacket, +) def insertValidLowCardinalityRow(): diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python index 1fe2363d7d5..929107093d0 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -11,7 +11,14 @@ from tcp_client import TCPClient class Progress: - def __init__(self, read_rows=0, read_bytes=0, total_rows_to_read=0, written_rows=0, written_bytes=0): + def __init__( + self, + read_rows=0, + read_bytes=0, + total_rows_to_read=0, + written_rows=0, + written_bytes=0 + ): # NOTE: this is done in ctor to initialize __dict__ self.read_rows = read_rows self.read_bytes = read_bytes diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python index 00c11d0d233..a341cbd3106 100755 --- a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python +++ b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python @@ -20,24 +20,30 @@ def main(): # external tables client.sendEmptyBlock() client.readHeader() - + # Partial result _, partial_result = client.readHeader(need_read_data=True)[0] - assert_message = "There should be at least one block of data with partial result" + assert_message = ( + "There should be at least one block of data with partial result" + ) assert len(partial_result) > 0, assert_message - + while True: - _, new_partial_result = client.readHeader(need_read_data=True, need_print_info=False)[0] - if (len(new_partial_result) == 0): + _, new_partial_result = client.readHeader( + need_read_data=True, need_print_info=False + )[0] + if len(new_partial_result) == 0: break - + data_size = len(partial_result) assert_message = f"New block contains more info about the full data so sorted results should not be less then in the previous iteration. New result {new_partial_result}. Previous result {partial_result}" - assert all(partial_result[i] <= new_partial_result[i] for i in range(data_size)), assert_message + assert all( + partial_result[i] <= new_partial_result[i] for i in range(data_size) + ), assert_message partial_result = new_partial_result - + # Full result _, full_result = client.readHeader(need_read_data=True)[0] for result in full_result: @@ -46,4 +52,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/helpers/tcp_client.py b/tests/queries/0_stateless/helpers/tcp_client.py index d1c6cb4894c..ca10f74b812 100644 --- a/tests/queries/0_stateless/helpers/tcp_client.py +++ b/tests/queries/0_stateless/helpers/tcp_client.py @@ -17,7 +17,7 @@ def writeVarUInt(x, ba): x >>= 7 if x == 0: - return + return def writeStringBinary(s, ba): @@ -68,7 +68,7 @@ class TCPClient(object): self.sendHello() self.receiveHello() - + return self def __exit__(self, exc_type, exc_value, traceback): @@ -85,7 +85,7 @@ class TCPClient(object): res.extend(cur) return res - + def readUInt(self, size=1): res = self.readStrict(size) val = 0 @@ -182,7 +182,7 @@ class TCPClient(object): writeVarUInt(0, ba) # rows writeVarUInt(0, ba) # columns self.send(ba) - + def readException(self): code = self.readUInt32() _name = self.readStringBinary() @@ -217,7 +217,6 @@ class TCPClient(object): self.readStringBinary() # external table name - def readProgress(self): packet_type = self.readVarUInt() if packet_type == 2: # Exception @@ -236,11 +235,15 @@ class TCPClient(object): return read_rows, read_bytes, total_rows_to_read, written_rows, written_bytes def readRow(self, row_type, rows): - if (row_type == 'UInt64'): + if row_type == 'UInt64': row = [self.readUInt64() for _ in range(rows)] return row else: - raise RuntimeError("Currently python version of tcp client doesn't support the following type of row: {}".format(row_type)) + raise RuntimeError( + "Currently python version of tcp client doesn't support the following type of row: {}".format( + row_type + ) + ) def readHeader(self, need_read_data=False, need_print_info=True): packet_type = self.readVarUInt() @@ -257,11 +260,11 @@ class TCPClient(object): assertPacket(self.readVarUInt(), 0) # 0 columns = self.readVarUInt() # rows rows = self.readVarUInt() # columns - + data = [] if need_read_data else None if need_print_info: print("Rows {} Columns {}".format(rows, columns)) - + for _ in range(columns): col_name = self.readStringBinary() type_name = self.readStringBinary() From ff8027b74b1bbf65111a134e80fa14296afacbbc Mon Sep 17 00:00:00 2001 From: alexX512 Date: Wed, 2 Aug 2023 19:01:08 +0000 Subject: [PATCH 0127/1687] Style fix --- .../0_stateless/02458_insert_select_progress_tcp.python | 2 +- ...02833_partial_sorting_result_during_query_execution.python | 2 +- tests/queries/0_stateless/helpers/tcp_client.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python index 929107093d0..fdc64a8dba8 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -17,7 +17,7 @@ class Progress: read_bytes=0, total_rows_to_read=0, written_rows=0, - written_bytes=0 + written_bytes=0, ): # NOTE: this is done in ctor to initialize __dict__ self.read_rows = read_rows diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python index a341cbd3106..ce31431ba97 100755 --- a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python +++ b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python @@ -23,7 +23,7 @@ def main(): # Partial result _, partial_result = client.readHeader(need_read_data=True)[0] - + assert_message = ( "There should be at least one block of data with partial result" ) diff --git a/tests/queries/0_stateless/helpers/tcp_client.py b/tests/queries/0_stateless/helpers/tcp_client.py index ca10f74b812..5b1e312b229 100644 --- a/tests/queries/0_stateless/helpers/tcp_client.py +++ b/tests/queries/0_stateless/helpers/tcp_client.py @@ -214,7 +214,7 @@ class TCPClient(object): return None assertPacket(packet_type, 1) # Data - + self.readStringBinary() # external table name def readProgress(self): @@ -235,7 +235,7 @@ class TCPClient(object): return read_rows, read_bytes, total_rows_to_read, written_rows, written_bytes def readRow(self, row_type, rows): - if row_type == 'UInt64': + if row_type == "UInt64": row = [self.readUInt64() for _ in range(rows)] return row else: From 1aac8bf12933a77ab52689e2090ff2a59f7dc0bc Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 3 Aug 2023 11:46:26 +0800 Subject: [PATCH 0128/1687] fix tests --- src/Analyzer/Passes/AnyFunctionPass.cpp | 23 +++++++++++-- ...3_analyzer_push_any_to_functions.reference | 34 +++++++++++-------- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp index 5fd6beec4d8..5fa709e71cf 100644 --- a/src/Analyzer/Passes/AnyFunctionPass.cpp +++ b/src/Analyzer/Passes/AnyFunctionPass.cpp @@ -50,7 +50,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_move_functions_out_of_any) return; @@ -84,7 +84,14 @@ public: if (inside_arguments.empty()) return; + if (rewritten.contains(node.get())) + { + node = rewritten.at(node.get()); + return; + } + /// checking done, rewrite function + bool pushed = false; for (auto & inside_argument : inside_arguments) { if (inside_argument->as()) /// skip constant node @@ -100,10 +107,22 @@ public: any_function_arguments.push_back(std::move(inside_argument)); inside_argument = std::move(any_function); + pushed = true; + } + + if (pushed) + { + rewritten.insert({node.get(), arguments[0]}); + node = arguments[0]; } - node = arguments[0]; } +private: + /// After query analysis alias will be rewritten to QueryTreeNode + /// whose memory address is same with the original one. + /// So we can reuse the rewritten one. + std::unordered_map rewritten; + }; } diff --git a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference index 3afb2cc353f..025c04af1da 100644 --- a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference +++ b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference @@ -93,26 +93,32 @@ QUERY id: 0 FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64 ARGUMENTS LIST id: 5, nodes: 1 - FUNCTION id: 6, function_name: anyLast, function_type: aggregate, result_type: UInt64 - ARGUMENTS - LIST id: 7, nodes: 1 - COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 - CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8 FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64 ARGUMENTS LIST id: 3, nodes: 2 FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64 ARGUMENTS LIST id: 5, nodes: 1 - FUNCTION id: 6, function_name: anyLast, function_type: aggregate, result_type: UInt64 - ARGUMENTS - LIST id: 7, nodes: 1 - COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 - CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8 JOIN TREE - TABLE_FUNCTION id: 9, table_function_name: numbers + TABLE_FUNCTION id: 7, table_function_name: numbers ARGUMENTS - LIST id: 11, nodes: 2 - CONSTANT id: 12, constant_value: UInt64_1, constant_value_type: UInt8 - CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8 + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8 SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); +6 6 +SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 } +SET optimize_move_functions_out_of_any = 0; +SELECT any(number + number * 2) FROM numbers(1, 2); +3 +SELECT anyLast(number + number * 2) FROM numbers(1, 2); +6 +WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); +3 +SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); +6 6 +SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 } From 9335a494dae52e7652d26d5cb0983fcf5c02bca9 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Thu, 3 Aug 2023 07:51:25 +0000 Subject: [PATCH 0129/1687] Read data without progress for partial result test --- ...rting_result_during_query_execution.python | 8 +- .../queries/0_stateless/helpers/tcp_client.py | 89 ++++++++++--------- 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python index ce31431ba97..14f5a912672 100755 --- a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python +++ b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python @@ -22,7 +22,7 @@ def main(): client.readHeader() # Partial result - _, partial_result = client.readHeader(need_read_data=True)[0] + _, partial_result = client.readDataWithoutProgress()[0] assert_message = ( "There should be at least one block of data with partial result" @@ -30,8 +30,8 @@ def main(): assert len(partial_result) > 0, assert_message while True: - _, new_partial_result = client.readHeader( - need_read_data=True, need_print_info=False + _, new_partial_result = client.readDataWithoutProgress( + need_print_info=False )[0] if len(new_partial_result) == 0: break @@ -45,7 +45,7 @@ def main(): partial_result = new_partial_result # Full result - _, full_result = client.readHeader(need_read_data=True)[0] + _, full_result = client.readDataWithoutProgress()[0] for result in full_result: print(result) diff --git a/tests/queries/0_stateless/helpers/tcp_client.py b/tests/queries/0_stateless/helpers/tcp_client.py index 5b1e312b229..1c8b7ae0498 100644 --- a/tests/queries/0_stateless/helpers/tcp_client.py +++ b/tests/queries/0_stateless/helpers/tcp_client.py @@ -191,11 +191,15 @@ class TCPClient(object): assertPacket(self.readUInt8(), 0) # has_nested return "code {}: {}".format(code, text.replace("DB::Exception:", "")) - def readResponse(self): + def readPacketType(self): packet_type = self.readVarUInt() if packet_type == 2: # Exception raise RuntimeError(self.readException()) + return packet_type + + def readResponse(self): + packet_type = self.readPacketType() if packet_type == 1: # Data return None if packet_type == 3: # Progress @@ -205,35 +209,46 @@ class TCPClient(object): raise RuntimeError("Unexpected packet: {}".format(packet_type)) - def readData(self): - packet_type = self.readVarUInt() - if packet_type == 2: # Exception - raise RuntimeError(self.readException()) - - if packet_type == 5: # End stream - return None - - assertPacket(packet_type, 1) # Data - - self.readStringBinary() # external table name - - def readProgress(self): - packet_type = self.readVarUInt() - if packet_type == 2: # Exception - raise RuntimeError(self.readException()) - - if packet_type == 5: # End stream - return None - - assertPacket(packet_type, 3) # Progress - + def readProgressData(self): read_rows = self.readVarUInt() read_bytes = self.readVarUInt() total_rows_to_read = self.readVarUInt() written_rows = self.readVarUInt() written_bytes = self.readVarUInt() + return read_rows, read_bytes, total_rows_to_read, written_rows, written_bytes + def readProgress(self): + packet_type = self.readPacketType() + if packet_type == 5: # End stream + return None + assertPacket(packet_type, 3) # Progress + return self.readProgressData() + + def readHeaderInfo(self): + self.readStringBinary() # external table name + # BlockInfo + assertPacket(self.readVarUInt(), 1) # field number 1 + assertPacket(self.readUInt8(), 0) # is_overflows + assertPacket(self.readVarUInt(), 2) # field number 2 + assertPacket(self.readUInt32(), 4294967295) # bucket_num + assertPacket(self.readVarUInt(), 0) # 0 + columns = self.readVarUInt() # rows + rows = self.readVarUInt() # columns + + return columns, rows + + def readHeader(self): + packet_type = self.readPacketType() + assertPacket(packet_type, 1) # Data + + columns, rows = self.readHeaderInfo() + print("Rows {} Columns {}".format(rows, columns)) + for _ in range(columns): + col_name = self.readStringBinary() + type_name = self.readStringBinary() + print("Column {} type {}".format(col_name, type_name)) + def readRow(self, row_type, rows): if row_type == "UInt64": row = [self.readUInt64() for _ in range(rows)] @@ -245,23 +260,18 @@ class TCPClient(object): ) ) - def readHeader(self, need_read_data=False, need_print_info=True): - packet_type = self.readVarUInt() - if packet_type == 2: # Exception - raise RuntimeError(self.readException()) + def readDataWithoutProgress(self, need_print_info=True): + packet_type = self.readPacketType() + while packet_type == 3: # Progress + self.readProgressData() + packet_type = self.readPacketType() + + if packet_type == 5: # End stream + return None assertPacket(packet_type, 1) # Data - self.readStringBinary() # external table name - # BlockInfo - assertPacket(self.readVarUInt(), 1) # field number 1 - assertPacket(self.readUInt8(), 0) # is_overflows - assertPacket(self.readVarUInt(), 2) # field number 2 - assertPacket(self.readUInt32(), 4294967295) # bucket_num - assertPacket(self.readVarUInt(), 0) # 0 - columns = self.readVarUInt() # rows - rows = self.readVarUInt() # columns - - data = [] if need_read_data else None + columns, rows = self.readHeaderInfo() + data = [] if need_print_info: print("Rows {} Columns {}".format(rows, columns)) @@ -271,7 +281,6 @@ class TCPClient(object): if need_print_info: print("Column {} type {}".format(col_name, type_name)) - if need_read_data: - data.append((col_name, self.readRow(type_name, rows))) + data.append((col_name, self.readRow(type_name, rows))) return data From de62239a5db53534ad8be9cb39ed7174388f9bc2 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Thu, 3 Aug 2023 08:13:30 +0000 Subject: [PATCH 0130/1687] Style fix --- tests/queries/0_stateless/helpers/tcp_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/helpers/tcp_client.py b/tests/queries/0_stateless/helpers/tcp_client.py index 1c8b7ae0498..2f0680b4eae 100644 --- a/tests/queries/0_stateless/helpers/tcp_client.py +++ b/tests/queries/0_stateless/helpers/tcp_client.py @@ -262,7 +262,7 @@ class TCPClient(object): def readDataWithoutProgress(self, need_print_info=True): packet_type = self.readPacketType() - while packet_type == 3: # Progress + while packet_type == 3: # Progress self.readProgressData() packet_type = self.readPacketType() From e797d877480386cb38dcdd5034129b9f4a469ef1 Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Thu, 3 Aug 2023 22:43:08 +0800 Subject: [PATCH 0131/1687] More precise Integer type inference --- src/DataTypes/DataTypesNumber.h | 16 ++++ src/DataTypes/FieldToDataType.cpp | 22 ++++- src/DataTypes/IDataType.h | 2 + src/DataTypes/getLeastSupertype.cpp | 132 ++++++++++++++++++++++++++++ src/DataTypes/getLeastSupertype.h | 2 + 5 files changed, 171 insertions(+), 3 deletions(-) diff --git a/src/DataTypes/DataTypesNumber.h b/src/DataTypes/DataTypesNumber.h index 5843086248c..8511955e193 100644 --- a/src/DataTypes/DataTypesNumber.h +++ b/src/DataTypes/DataTypesNumber.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -9,10 +10,20 @@ namespace DB { +using DataTypes = std::vector; + template class DataTypeNumber final : public DataTypeNumberBase { public: + DataTypeNumber() = default; + + explicit DataTypeNumber(DataTypes data_types) + : DataTypeNumberBase() + , possible_data_types(std::move(data_types)) + { + } + bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); } bool canBeUsedAsVersion() const override { return true; } @@ -32,6 +43,11 @@ public: { return std::make_shared>(); } + + DataTypes getPossiblePtr() const override { return possible_data_types; } + +private: + DataTypes possible_data_types; }; using DataTypeUInt8 = DataTypeNumber; diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index 210dab9921e..0a48c43ceb2 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -33,18 +33,34 @@ DataTypePtr FieldToDataType::operator() (const Null &) const template DataTypePtr FieldToDataType::operator() (const UInt64 & x) const { + if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); if (x <= std::numeric_limits::max()) return std::make_shared(); + if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); if (x <= std::numeric_limits::max()) return std::make_shared(); + if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); if (x <= std::numeric_limits::max()) return std::make_shared(); + if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); return std::make_shared(); } template DataTypePtr FieldToDataType::operator() (const Int64 & x) const { - if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return std::make_shared(); - if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return std::make_shared(); - if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return std::make_shared(); + if (x >= 0) + { + if (x <= std::numeric_limits::max()) return std::make_shared(); + if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); + if (x <= std::numeric_limits::max()) return std::make_shared(); + if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); + if (x <= std::numeric_limits::max()) return std::make_shared(); + if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); + } + else + { + if (x >= std::numeric_limits::min()) return std::make_shared(); + if (x >= std::numeric_limits::min()) return std::make_shared(); + if (x >= std::numeric_limits::min()) return std::make_shared(); + } return std::make_shared(); } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 4adafe5d212..330836f9d92 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -73,6 +73,8 @@ public: DataTypePtr getPtr() const { return shared_from_this(); } + virtual DataTypes getPossiblePtr() const { return { shared_from_this() }; } + /// Name of data type family (example: FixedString, Array). virtual const char * getFamilyName() const = 0; /// Name of corresponding data type in MySQL (exampe: Bigint, Blob, etc) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 9d42d82ce91..3a7a1560af1 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -592,6 +592,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types) /// For numeric types, the most complicated part. { + optimizeTypeIds(types, type_ids); auto numeric_type = getNumericType(type_ids); if (numeric_type) return numeric_type; @@ -601,6 +602,137 @@ DataTypePtr getLeastSupertype(const DataTypes & types) return throwOrReturn(types, "", ErrorCodes::NO_COMMON_TYPE); } +void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids) +{ + // Determine whether the type_id is UInt + auto is_unsigned = [](const TypeIndex & type_id) + { + switch (type_id) + { + case TypeIndex::UInt8: + case TypeIndex::UInt16: + case TypeIndex::UInt32: + case TypeIndex::UInt64: + return true; + default: + return false; + } + }; + + auto maximize = [](size_t & what, size_t value, bool & only_unsigned, bool & only_signed, bool & both) + { + if (value > what) + { + what = value; + only_unsigned = false; + only_signed = false; + both = false; + return true; + }else if (value == what) + { + return true; + } + + return false; + }; + + size_t max_bits_of_integer = 0; + bool only_unsigned = false; + bool only_signed = false; + bool both = false; + + // Determine the distribution of maximum signed and unsigned, Example: + // Int64, Int64 = only_signed. + // UInt64, UInt64 = only_unsigned. + // UInt64(possible: Int64), Int64(possible: UInt64) = both. + // UInt64(possible: Int64), Int64 = both, only_signed. + for (const auto & type : types) + { + TypeIndex type_id = type->getTypeId(); + bool is_max_bits = false; + if (type_id == TypeIndex::UInt8 || type_id == TypeIndex::Int8) + is_max_bits = maximize(max_bits_of_integer, 8, only_unsigned, only_signed, both); + else if (type_id == TypeIndex::UInt16 || type_id == TypeIndex::Int16) + is_max_bits = maximize(max_bits_of_integer, 16, only_unsigned, only_signed, both); + else if (type_id == TypeIndex::UInt32 || type_id == TypeIndex::Int32) + is_max_bits = maximize(max_bits_of_integer, 32, only_unsigned, only_signed, both); + else if (type_id == TypeIndex::UInt64 || type_id == TypeIndex::Int64) + is_max_bits = maximize(max_bits_of_integer, 64, only_unsigned, only_signed, both); + + if (is_max_bits) + { + bool type_is_unsigned = is_unsigned(type_id); + bool type_is_both = false; + for (const auto & possible_type : type->getPossiblePtr()) + { + if (type_is_unsigned != is_unsigned(possible_type->getTypeId())) + { + type_is_both = true; + break; + } + } + + if (type_is_both) + both = true; + else if (type_is_unsigned) + only_unsigned = true; + else + only_signed = true; + } + } + + auto optimize_type_id = [&is_unsigned](const DataTypePtr & type, bool try_change_unsigned) + { + switch (type_id) + { + case TypeIndex::UInt8: + case TypeIndex::UInt16: + case TypeIndex::UInt32: + case TypeIndex::UInt64: + if (try_change_unsigned) + return type_id; + case TypeIndex::Int8: + case TypeIndex::Int16: + case TypeIndex::Int32: + case TypeIndex::Int64: + if (!try_change_unsigned) + return type_id; + default: + return type_id; + } + + for (const auto & other_type : type->getPossiblePtr()) + { + TypeIndex other_type_id = other_type->getTypeId(); + if ((try_change_unsigned && is_unsigned(other_type_id)) + || (!try_change_unsigned && !is_unsigned(other_type_id))) + { + return other_type_id; + } + } + + return type_id; + }; + + // optimize type_ids, Example: + // if only_signed. UInt64(possible: Int64), Int64 = Int64, Int64 + // if only_unsigned. Int64(possible: UInt64), UInt64 = UInt64, UInt64 + if (!(only_unsigned && only_signed) && (both || only_unsigned || only_signed)) { + type_ids.clear(); + for (const auto & type : types) + { + if (only_unsigned) + { + type_ids.insert(optimize_type_id(type, true)); + } + else if (both || only_signed) + { + type_ids.insert(optimize_type_id(type, false)); + } + } + } +} + DataTypePtr getLeastSupertypeOrString(const DataTypes & types) { return getLeastSupertype(types); diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index 2ef4a0e6850..0a3fa3c2536 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -29,6 +29,8 @@ DataTypePtr tryGetLeastSupertype(const DataTypes & types); using TypeIndexSet = std::unordered_set; +void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids); + template DataTypePtr getLeastSupertype(const TypeIndexSet & types); From 446465307d78ddaa99b8c625f0d6a8584ebccd67 Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Fri, 4 Aug 2023 09:26:42 +0800 Subject: [PATCH 0132/1687] add test. fix style. --- src/DataTypes/getLeastSupertype.cpp | 4 +++- .../0_stateless/02832_integer_type_inference.reference | 1 + tests/queries/0_stateless/02832_integer_type_inference.sql | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02832_integer_type_inference.reference create mode 100644 tests/queries/0_stateless/02832_integer_type_inference.sql diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 3a7a1560af1..07a3de4760b 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -683,6 +683,7 @@ void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids) auto optimize_type_id = [&is_unsigned](const DataTypePtr & type, bool try_change_unsigned) { + TypeIndex type_id = type->getTypeId(); switch (type_id) { case TypeIndex::UInt8: @@ -717,7 +718,8 @@ void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids) // optimize type_ids, Example: // if only_signed. UInt64(possible: Int64), Int64 = Int64, Int64 // if only_unsigned. Int64(possible: UInt64), UInt64 = UInt64, UInt64 - if (!(only_unsigned && only_signed) && (both || only_unsigned || only_signed)) { + if (!(only_unsigned && only_signed) && (both || only_unsigned || only_signed)) + { type_ids.clear(); for (const auto & type : types) { diff --git a/tests/queries/0_stateless/02832_integer_type_inference.reference b/tests/queries/0_stateless/02832_integer_type_inference.reference new file mode 100644 index 00000000000..2d072437c90 --- /dev/null +++ b/tests/queries/0_stateless/02832_integer_type_inference.reference @@ -0,0 +1 @@ +[-4741124612489978151,-3236599669630092879,5607475129431807682] diff --git a/tests/queries/0_stateless/02832_integer_type_inference.sql b/tests/queries/0_stateless/02832_integer_type_inference.sql new file mode 100644 index 00000000000..9734bc2966f --- /dev/null +++ b/tests/queries/0_stateless/02832_integer_type_inference.sql @@ -0,0 +1 @@ +select [-4741124612489978151, -3236599669630092879, 5607475129431807682]; From 57d69a10e360fd4cccb5acd0759aa9f44a21f7f6 Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Fri, 4 Aug 2023 09:55:22 +0800 Subject: [PATCH 0133/1687] add switch break --- src/DataTypes/getLeastSupertype.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 07a3de4760b..5a24a59a60e 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -692,12 +692,14 @@ void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids) case TypeIndex::UInt64: if (try_change_unsigned) return type_id; + break ; case TypeIndex::Int8: case TypeIndex::Int16: case TypeIndex::Int32: case TypeIndex::Int64: if (!try_change_unsigned) return type_id; + break ; default: return type_id; } From 5cb856d167a37c46ac31d441e5a688b45ddb0434 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 4 Aug 2023 11:53:22 +0000 Subject: [PATCH 0134/1687] Try batching multiple flush requests in Keeper --- src/Coordination/Changelog.cpp | 78 +++++++++++++++++++------ src/Coordination/KeeperStateManager.cpp | 3 + 2 files changed, 63 insertions(+), 18 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 94062140bac..7f972f67846 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -1014,8 +1014,60 @@ void Changelog::writeThread() { WriteOperation write_operation; bool batch_append_ok = true; - while (write_operations.pop(write_operation)) + size_t pending_appends = 0; + bool try_batch_flush = false; + + /// turn into setting + static constexpr size_t max_flush_batch_size = 1000; + + const auto flush_logs = [&](const auto & flush) { + LOG_INFO(log, "Flushing {} logs", pending_appends); + + { + std::lock_guard writer_lock(writer_mutex); + current_writer->flush(); + } + + { + std::lock_guard lock{durable_idx_mutex}; + last_durable_idx = flush.index; + } + + pending_appends = 0; + }; + + const auto notify_append_completion = [&] + { + durable_idx_cv.notify_all(); + + // we need to call completion callback in another thread because it takes a global lock for the NuRaft server + // NuRaft will in some places wait for flush to be done while having the same global lock leading to deadlock + // -> future write operations are blocked by flush that cannot be completed because it cannot take NuRaft lock + // -> NuRaft won't leave lock until its flush is done + if (!append_completion_queue.push(batch_append_ok)) + LOG_WARNING(log, "Changelog is shut down"); + }; + + while (true) + { + if (try_batch_flush) + { + try_batch_flush = false; + if (!write_operations.tryPop(write_operation)) + { + chassert(batch_append_ok); + const auto & flush = std::get(write_operation); + flush_logs(flush); + notify_append_completion(); + continue; + } + } + else if (!write_operations.pop(write_operation)) + { + break; + } + assert(initialized); if (auto * append_log = std::get_if(&write_operation)) @@ -1027,6 +1079,7 @@ void Changelog::writeThread() assert(current_writer); batch_append_ok = current_writer->appendRecord(buildRecord(append_log->index, append_log->log_entry)); + ++pending_appends; } else { @@ -1034,30 +1087,19 @@ void Changelog::writeThread() if (batch_append_ok) { + /// we can try batching more logs for flush + if (pending_appends < max_flush_batch_size) { - std::lock_guard writer_lock(writer_mutex); - current_writer->flush(); - } - - { - std::lock_guard lock{durable_idx_mutex}; - last_durable_idx = flush.index; + try_batch_flush = true; + continue; } + flush_logs(flush); } else { *flush.failed = true; } - - durable_idx_cv.notify_all(); - - // we need to call completion callback in another thread because it takes a global lock for the NuRaft server - // NuRaft will in some places wait for flush to be done while having the same global lock leading to deadlock - // -> future write operations are blocked by flush that cannot be completed because it cannot take NuRaft lock - // -> NuRaft won't leave lock until its flush is done - if (!append_completion_queue.push(batch_append_ok)) - LOG_WARNING(log, "Changelog is shut down"); - + notify_append_completion(); batch_append_ok = true; } } diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index cf1bad8c5fa..cc13c755629 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -184,6 +184,9 @@ KeeperStateManager::parseServersConfiguration(const Poco::Util::AbstractConfigur total_servers++; } + /// this will only apply to fresh clusters + result.cluster_config->set_async_replication(true); + if (!result.config && !allow_without_us) throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section", my_server_id); From ab78f9a94301441d077c6fb902618cdff3a3d443 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 4 Aug 2023 19:08:35 +0200 Subject: [PATCH 0135/1687] Fix test --- .../0_stateless/02841_valid_json_and_xml_on_http_exception.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh index 60ce7eb3b6f..26b3ef64d61 100755 --- a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh +++ b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_URL='$CLICKHOUSE_URL&http_write_exception_in_output_format=1&allow_experimental_analyzer=0' +CH_URL="$CLICKHOUSE_URL&http_write_exception_in_output_format=1&allow_experimental_analyzer=0" echo "One block" for parallel in 0 1 From 6f08b5411f13b529476965aa85f6f91dde5a61e4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 1 Oct 2022 16:32:04 +0200 Subject: [PATCH 0136/1687] Check what will happen if we build ClickHouse with Musl --- PreLoad.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PreLoad.cmake b/PreLoad.cmake index b456c724cc6..5d003a709f1 100644 --- a/PreLoad.cmake +++ b/PreLoad.cmake @@ -78,7 +78,7 @@ if (OS MATCHES "Linux" AND ("$ENV{CC}" MATCHES ".*clang.*" OR CMAKE_C_COMPILER MATCHES ".*clang.*")) if (ARCH MATCHES "amd64|x86_64") - set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-x86_64.cmake" CACHE INTERNAL "") + set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-x86_64-musl.cmake" CACHE INTERNAL "") elseif (ARCH MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)") set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-aarch64.cmake" CACHE INTERNAL "") elseif (ARCH MATCHES "^(ppc64le.*|PPC64LE.*)") From 39883f937391932e225e6ed5ab5e7b86fb5d12f4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jul 2023 06:57:47 +0200 Subject: [PATCH 0137/1687] Add missing targets --- contrib/corrosion-cmake/CMakeLists.txt | 20 ++++++++++---------- docker/packager/binary/Dockerfile | 6 +++++- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt index ea8f191564d..96ec0490ba3 100644 --- a/contrib/corrosion-cmake/CMakeLists.txt +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -17,20 +17,20 @@ endif() message(STATUS "Checking Rust toolchain for current target") -if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") +if((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) + set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl") +elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu") -endif() - -if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) + set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl") +elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu") -endif() - -if((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) set(Rust_CARGO_TARGET "x86_64-apple-darwin") -endif() - -if((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) set(Rust_CARGO_TARGET "x86_64-unknown-freebsd") +elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64") + set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu") endif() if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le") diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 99e748c41d4..cfeea971d2f 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -52,11 +52,15 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \ rustup toolchain install nightly-2023-07-04 && \ rustup default nightly-2023-07-04 && \ rustup component add rust-src && \ + rustup target add x86_64-unknown-linux-gnu && \ rustup target add aarch64-unknown-linux-gnu && \ rustup target add x86_64-apple-darwin && \ rustup target add x86_64-unknown-freebsd && \ rustup target add aarch64-apple-darwin && \ - rustup target add powerpc64le-unknown-linux-gnu + rustup target add powerpc64le-unknown-linux-gnu && \ + rustup target add x86_64-unknown-linux-musl && \ + rustup target add aarch64-unknown-linux-musl && \ + rustup target add riscv64gc-unknown-linux-gnu # Create vendor cache for cargo. # From 08b397a79797a0773d9f10d1d890c73a61931e66 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jul 2023 07:00:14 +0200 Subject: [PATCH 0138/1687] Add a comment --- contrib/corrosion-cmake/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt index 96ec0490ba3..8adc2c0b23a 100644 --- a/contrib/corrosion-cmake/CMakeLists.txt +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -17,6 +17,8 @@ endif() message(STATUS "Checking Rust toolchain for current target") +# See https://doc.rust-lang.org/nightly/rustc/platform-support.html + if((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl") elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") From 44b49fe8db9460d3563235bafd897411f6bd4519 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jul 2023 06:57:47 +0200 Subject: [PATCH 0139/1687] Add missing targets --- contrib/corrosion-cmake/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt index 8adc2c0b23a..302ab2c9260 100644 --- a/contrib/corrosion-cmake/CMakeLists.txt +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -18,7 +18,6 @@ endif() message(STATUS "Checking Rust toolchain for current target") # See https://doc.rust-lang.org/nightly/rustc/platform-support.html - if((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl") elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") From 396f5f0eddb4975d99dab1490f277d627b766cdf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jul 2023 07:00:14 +0200 Subject: [PATCH 0140/1687] Add a comment --- contrib/corrosion-cmake/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt index 302ab2c9260..8adc2c0b23a 100644 --- a/contrib/corrosion-cmake/CMakeLists.txt +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -18,6 +18,7 @@ endif() message(STATUS "Checking Rust toolchain for current target") # See https://doc.rust-lang.org/nightly/rustc/platform-support.html + if((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl") elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") From 41e8345169616ea052e09dcd5f54aea8c5253c62 Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Sat, 5 Aug 2023 17:35:25 +0800 Subject: [PATCH 0141/1687] Add the optimize_type_ids parameter to control whether optimization is enabled. --- src/DataTypes/FieldToDataType.cpp | 2 +- src/DataTypes/getLeastSupertype.cpp | 7 ++++--- src/DataTypes/getLeastSupertype.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index 0a48c43ceb2..9b8e476067e 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -155,7 +155,7 @@ DataTypePtr FieldToDataType::operator() (const Array & x) const for (const Field & elem : x) element_types.emplace_back(applyVisitor(*this, elem)); - return std::make_shared(getLeastSupertype(element_types)); + return std::make_shared(getLeastSupertype(element_types, true)); } template diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 5a24a59a60e..f3b396ba895 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -201,7 +201,7 @@ DataTypePtr getNumericType(const TypeIndexSet & types) } template -DataTypePtr getLeastSupertype(const DataTypes & types) +DataTypePtr getLeastSupertype(const DataTypes & types, bool optimize_type_ids) { /// Trivial cases @@ -592,7 +592,8 @@ DataTypePtr getLeastSupertype(const DataTypes & types) /// For numeric types, the most complicated part. { - optimizeTypeIds(types, type_ids); + if (optimize_type_ids) + optimizeTypeIds(types, type_ids); auto numeric_type = getNumericType(type_ids); if (numeric_type) return numeric_type; @@ -798,7 +799,7 @@ DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types) return getLeastSupertype(types); } -template DataTypePtr getLeastSupertype(const DataTypes & types); +template DataTypePtr getLeastSupertype(const DataTypes & types, bool optimize_type_ids); template DataTypePtr getLeastSupertype(const TypeIndexSet & types); } diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index 0a3fa3c2536..ba43b3966f9 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -18,7 +18,7 @@ enum class LeastSupertypeOnError * Examples: there is no least common supertype for Array(UInt8), Int8. */ template -DataTypePtr getLeastSupertype(const DataTypes & types); +DataTypePtr getLeastSupertype(const DataTypes & types, bool optimize_type_ids = false); /// Same as above but return String type instead of throwing exception. /// All types can be casted to String, because they can be serialized to String. From ad49798f4550338c314d0620e283aee800fa7664 Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Sat, 5 Aug 2023 20:17:28 +0800 Subject: [PATCH 0142/1687] fix optimizeTypeIds, Some scenarios do not require optimization: if UInt64(possible: Int64), UInt64(possible: Int64) = UInt64, UInt64 if Int64(possible: UInt32), Int64(possible: UInt32) = Int64, Int64 --- src/DataTypes/getLeastSupertype.cpp | 32 ++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index f3b396ba895..c534f3c6edc 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -641,6 +641,8 @@ void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids) bool only_unsigned = false; bool only_signed = false; bool both = false; + bool has_unsigned = false; + bool has_signed = false; // Determine the distribution of maximum signed and unsigned, Example: // Int64, Int64 = only_signed. @@ -673,6 +675,11 @@ void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids) } } + if (type_is_unsigned) + has_unsigned = true; + else + has_signed = true; + if (type_is_both) both = true; else if (type_is_unsigned) @@ -718,22 +725,23 @@ void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids) return type_id; }; - // optimize type_ids, Example: - // if only_signed. UInt64(possible: Int64), Int64 = Int64, Int64 - // if only_unsigned. Int64(possible: UInt64), UInt64 = UInt64, UInt64 - if (!(only_unsigned && only_signed) && (both || only_unsigned || only_signed)) + // optimize type_ids + if (both) { - type_ids.clear(); - for (const auto & type : types) + // Example: UInt64(possible: Int64), Int64 = Int64, Int64 + if (only_unsigned && !only_signed) { - if (only_unsigned) - { + type_ids.clear(); + for (const auto & type : types) type_ids.insert(optimize_type_id(type, true)); - } - else if (both || only_signed) - { + } + // Example: Int64(possible: UInt32), UInt64 = UInt32, UInt64 + // Int64(possible: UInt32), UInt64(possible: Int64) = Int64, Int64 + else if ((only_signed && !only_unsigned) || (has_unsigned && has_signed && !only_signed && !only_unsigned)) + { + type_ids.clear(); + for (const auto & type : types) type_ids.insert(optimize_type_id(type, false)); - } } } } From 37d382aba42710c4fdd23acd2d2b40e97d68aa03 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 4 Aug 2023 23:17:51 +0200 Subject: [PATCH 0143/1687] impl --- src/Bridge/IBridge.cpp | 4 ++-- src/Core/Defines.h | 2 +- src/Disks/IO/ReadBufferFromWebServer.cpp | 2 +- src/Disks/ObjectStorages/S3/diskSettings.cpp | 3 ++- src/Server/HTTPHandler.cpp | 2 +- src/Server/InterserverIOHTTPHandler.cpp | 2 +- src/Server/PrometheusRequestHandler.cpp | 2 +- src/Server/ReplicasStatusHandler.cpp | 2 +- src/Server/StaticRequestHandler.cpp | 2 +- src/Server/WebUIRequestHandler.cpp | 2 +- 10 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp index dd3c25e583b..c913a6e80c0 100644 --- a/src/Bridge/IBridge.cpp +++ b/src/Bridge/IBridge.cpp @@ -92,7 +92,7 @@ void IBridge::defineOptions(Poco::Util::OptionSet & options) Poco::Util::Option("max-server-connections", "", "max connections to server, default 1024").argument("max-server-connections").binding("max-server-connections")); options.addOption( - Poco::Util::Option("keep-alive-timeout", "", "keepalive timeout, default 10").argument("keep-alive-timeout").binding("keep-alive-timeout")); + Poco::Util::Option("keep-alive-timeout", "", "keepalive timeout, default 30").argument("keep-alive-timeout").binding("keep-alive-timeout")); options.addOption( Poco::Util::Option("http-max-field-value-size", "", "max http field value size, default 1048576").argument("http-max-field-value-size").binding("http-max-field-value-size")); @@ -164,7 +164,7 @@ void IBridge::initialize(Application & self) http_timeout = config().getUInt64("http-timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT); max_server_connections = config().getUInt("max-server-connections", 1024); - keep_alive_timeout = config().getUInt64("keep-alive-timeout", 10); + keep_alive_timeout = config().getUInt64("keep-alive-timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); http_max_field_value_size = config().getUInt64("http-max-field-value-size", 128 * 1024); struct rlimit limit; diff --git a/src/Core/Defines.h b/src/Core/Defines.h index efe14b93a3d..32649c5fecb 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -47,7 +47,7 @@ /// the number is unmotivated #define DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT 15 -#define DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT 10 +#define DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT 30 #define DBMS_DEFAULT_PATH "/var/lib/clickhouse/" diff --git a/src/Disks/IO/ReadBufferFromWebServer.cpp b/src/Disks/IO/ReadBufferFromWebServer.cpp index 1f4818c8cb9..46d8c41ff78 100644 --- a/src/Disks/IO/ReadBufferFromWebServer.cpp +++ b/src/Disks/IO/ReadBufferFromWebServer.cpp @@ -55,7 +55,7 @@ std::unique_ptr ReadBufferFromWebServer::initialize() const auto & settings = context->getSettingsRef(); const auto & config = context->getConfigRef(); - Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", 20), 0}; + Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}; auto res = std::make_unique( uri, diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 0bd35c07a4b..c03ba48ef3e 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -132,7 +132,8 @@ std::unique_ptr getClient( client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 3000); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); client_configuration.endpointOverride = uri.endpoint; - client_configuration.http_keep_alive_timeout_ms = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", 10000); + client_configuration.http_keep_alive_timeout_ms + = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000); client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000); client_configuration.wait_on_pool_size_limit = false; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index ff5690a3b07..add9363da4d 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -596,7 +596,7 @@ void HTTPHandler::processQuery( size_t buffer_size_http = DBMS_DEFAULT_BUFFER_SIZE; size_t buffer_size_memory = (buffer_size_total > buffer_size_http) ? buffer_size_total : 0; - unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10); + unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); used_output.out = std::make_shared( response, diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 9741592868a..5f6da208778 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -88,7 +88,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe Output used_output; const auto & config = server.config(); - unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10); + unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); used_output.out = std::make_shared( response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 79025624206..357c39e30d2 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -18,7 +18,7 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe try { const auto & config = server.config(); - unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10); + unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); setResponseDefaultHeaders(response, keep_alive_timeout); diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 8c0ab0c1a3b..ad54b24f31d 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -79,7 +79,7 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe } const auto & config = getContext()->getConfigRef(); - setResponseDefaultHeaders(response, config.getUInt("keep_alive_timeout", 10)); + setResponseDefaultHeaders(response, config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT)); if (!ok) { diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index 13a01ba8139..a33fbfbbf95 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -90,7 +90,7 @@ static inline void trySendExceptionToClient( void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", 10); + auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); const auto & out = responseWriteBuffer(request, response, keep_alive_timeout); try diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 6fa1d65de42..fdba03baa73 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -30,7 +30,7 @@ WebUIRequestHandler::WebUIRequestHandler(IServer & server_) void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", 10); + auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); response.setContentType("text/html; charset=UTF-8"); From 75a8512017027d92a2ef7311e89d84ff627629c5 Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Sun, 6 Aug 2023 09:52:53 +0800 Subject: [PATCH 0144/1687] modify comments --- src/DataTypes/getLeastSupertype.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index c534f3c6edc..c04130a58e4 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -728,14 +728,14 @@ void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids) // optimize type_ids if (both) { - // Example: UInt64(possible: Int64), Int64 = Int64, Int64 + // Example: Int64(possible: UInt32), UInt64 = UInt32, UInt64 if (only_unsigned && !only_signed) { type_ids.clear(); for (const auto & type : types) type_ids.insert(optimize_type_id(type, true)); } - // Example: Int64(possible: UInt32), UInt64 = UInt32, UInt64 + // Example: UInt64(possible: Int64), Int64 = Int64, Int64 // Int64(possible: UInt32), UInt64(possible: Int64) = Int64, Int64 else if ((only_signed && !only_unsigned) || (has_unsigned && has_signed && !only_signed && !only_unsigned)) { From 1540c02ccabaf4921ca8327d5e3779bce3da03d4 Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Mon, 7 Aug 2023 17:56:51 +0800 Subject: [PATCH 0145/1687] temporarily disable the new Integer type inference feature to verify if it is causing the zk validation failure issue. --- src/DataTypes/FieldToDataType.cpp | 2 +- .../queries/0_stateless/02832_integer_type_inference.reference | 2 +- tests/queries/0_stateless/02832_integer_type_inference.sql | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index 9b8e476067e..5947eff7ea2 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -155,7 +155,7 @@ DataTypePtr FieldToDataType::operator() (const Array & x) const for (const Field & elem : x) element_types.emplace_back(applyVisitor(*this, elem)); - return std::make_shared(getLeastSupertype(element_types, true)); + return std::make_shared(getLeastSupertype(element_types, false)); } template diff --git a/tests/queries/0_stateless/02832_integer_type_inference.reference b/tests/queries/0_stateless/02832_integer_type_inference.reference index 2d072437c90..e1bfb315d40 100644 --- a/tests/queries/0_stateless/02832_integer_type_inference.reference +++ b/tests/queries/0_stateless/02832_integer_type_inference.reference @@ -1 +1 @@ -[-4741124612489978151,-3236599669630092879,5607475129431807682] +[-4741124612489978151,-3236599669630092879] diff --git a/tests/queries/0_stateless/02832_integer_type_inference.sql b/tests/queries/0_stateless/02832_integer_type_inference.sql index 9734bc2966f..9c558b3ae67 100644 --- a/tests/queries/0_stateless/02832_integer_type_inference.sql +++ b/tests/queries/0_stateless/02832_integer_type_inference.sql @@ -1 +1 @@ -select [-4741124612489978151, -3236599669630092879, 5607475129431807682]; +select [-4741124612489978151, -3236599669630092879]; From f500b0da14d5207f4ce826ebc8a8556b8c6e3cc0 Mon Sep 17 00:00:00 2001 From: Dani Pozo Date: Mon, 7 Aug 2023 12:23:57 +0200 Subject: [PATCH 0146/1687] Document limitation of BACKUP TO Disk('s3_backup') --- docs/en/operations/backup.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 63bf3cfeb5c..3915f1b09a8 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -356,7 +356,7 @@ RESTORE TABLE data AS data_restored FROM Disk('s3_plain', 'cloud_backup'); :::note But keep in mind that: - This disk should not be used for `MergeTree` itself, only for `BACKUP`/`RESTORE` -- It has excessive API calls +- If your tables are backed by S3 storage, it doesn't use `CopyObject` calls to copy parts to the destination bucket, instead it downloads and uploads them, which is very inefficient. Prefer to use `BACKUP ... TO S3()` syntax for this usecase. ::: ## Alternatives From 4b3e399f7d450ed50b01741543e2888d5f28234c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 7 Aug 2023 09:05:21 +0000 Subject: [PATCH 0147/1687] Wait for read requests --- src/Coordination/KeeperDispatcher.cpp | 92 +++++++++++++++---------- src/Coordination/KeeperDispatcher.h | 4 +- src/Coordination/KeeperStateMachine.cpp | 7 +- src/Coordination/KeeperStateMachine.h | 2 +- src/Coordination/KeeperStateManager.cpp | 2 - 5 files changed, 65 insertions(+), 42 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 99c28674273..4608e918042 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -73,7 +74,6 @@ void KeeperDispatcher::requestThread() auto coordination_settings = configuration_and_settings->coordination_settings; uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds(); - uint64_t max_batch_size = coordination_settings->max_requests_batch_size; uint64_t max_batch_bytes_size = coordination_settings->max_requests_batch_bytes_size; /// The code below do a very simple thing: batch all write (quorum) requests into vector until @@ -136,12 +136,9 @@ void KeeperDispatcher::requestThread() return false; }; - /// TODO: Deprecate max_requests_quick_batch_size and use only max_requests_batch_size and max_requests_batch_bytes_size - size_t max_quick_batch_size = coordination_settings->max_requests_quick_batch_size; - while (!shutdown_called && !has_read_request && - !has_reconfig_request && - current_batch.size() < max_quick_batch_size && current_batch_bytes_size < max_batch_bytes_size && - try_get_request()) + size_t max_batch_size = coordination_settings->max_requests_batch_size; + while (!shutdown_called && current_batch.size() < max_batch_size && !has_reconfig_request + && current_batch_bytes_size < max_batch_bytes_size && try_get_request()) ; const auto prev_result_done = [&] @@ -152,10 +149,8 @@ void KeeperDispatcher::requestThread() }; /// Waiting until previous append will be successful, or batch is big enough - while (!shutdown_called && !has_read_request && - !has_reconfig_request && !prev_result_done() && - current_batch.size() <= max_batch_size - && current_batch_bytes_size < max_batch_bytes_size) + while (!shutdown_called && !has_reconfig_request && !prev_result_done() && current_batch.size() <= max_batch_size + && current_batch_bytes_size < max_batch_bytes_size) { try_get_request(); } @@ -166,9 +161,10 @@ void KeeperDispatcher::requestThread() if (shutdown_called) break; + nuraft::ptr result_buf = nullptr; /// Forcefully process all previous pending requests if (prev_result) - forceWaitAndProcessResult(prev_result, prev_batch); + result_buf = forceWaitAndProcessResult(prev_result, prev_batch); /// Process collected write requests batch if (!current_batch.empty()) @@ -177,13 +173,7 @@ void KeeperDispatcher::requestThread() auto result = server->putRequestBatch(current_batch); - if (result) - { - /// If we will execute read or reconfig next, we have to process result now - if (has_read_request || has_reconfig_request) - forceWaitAndProcessResult(result, current_batch); - } - else + if (!result) { addErrorResponses(current_batch, Coordination::Error::ZCONNECTIONLOSS); current_batch.clear(); @@ -194,6 +184,28 @@ void KeeperDispatcher::requestThread() prev_result = result; } + /// If we will execute read or reconfig next, we have to process result now + if (has_read_request || has_reconfig_request) + { + if (prev_result) + result_buf = forceWaitAndProcessResult(prev_result, current_batch); + + if (result_buf) + { + nuraft::buffer_serializer bs(result_buf); + auto log_idx = bs.get_u64(); + + while (true) + { + auto current_last_committed_idx = last_committed_log_idx.load(std::memory_order_relaxed); + if (current_last_committed_idx >= log_idx) + break; + + last_committed_log_idx.wait(current_last_committed_idx); + } + } + } + if (has_reconfig_request) server->getKeeperStateMachine()->reconfigure(request); @@ -360,28 +372,33 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf snapshots_queue, keeper_context, snapshot_s3, - [this](const KeeperStorage::RequestForSession & request_for_session) + [this](uint64_t log_idx, const KeeperStorage::RequestForSession & request_for_session) { - /// check if we have queue of read requests depending on this request to be committed - std::lock_guard lock(read_request_queue_mutex); - if (auto it = read_request_queue.find(request_for_session.session_id); it != read_request_queue.end()) { - auto & xid_to_request_queue = it->second; - - if (auto request_queue_it = xid_to_request_queue.find(request_for_session.request->xid); - request_queue_it != xid_to_request_queue.end()) + /// check if we have queue of read requests depending on this request to be committed + std::lock_guard lock(read_request_queue_mutex); + if (auto it = read_request_queue.find(request_for_session.session_id); it != read_request_queue.end()) { - for (const auto & read_request : request_queue_it->second) - { - if (server->isLeaderAlive()) - server->putLocalReadRequest(read_request); - else - addErrorResponses({read_request}, Coordination::Error::ZCONNECTIONLOSS); - } + auto & xid_to_request_queue = it->second; - xid_to_request_queue.erase(request_queue_it); + if (auto request_queue_it = xid_to_request_queue.find(request_for_session.request->xid); + request_queue_it != xid_to_request_queue.end()) + { + for (const auto & read_request : request_queue_it->second) + { + if (server->isLeaderAlive()) + server->putLocalReadRequest(read_request); + else + addErrorResponses({read_request}, Coordination::Error::ZCONNECTIONLOSS); + } + + xid_to_request_queue.erase(request_queue_it); + } } } + + last_committed_log_idx.store(log_idx, std::memory_order_relaxed); + last_committed_log_idx.notify_all(); }); try @@ -636,7 +653,7 @@ void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSession } } -void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions) +nuraft::ptr KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions) { if (!result->has_result()) result->get(); @@ -647,8 +664,11 @@ void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, Keep else if (result->get_result_code() != nuraft::cmd_result_code::OK) addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS); + auto result_buf = result->get(); + result = nullptr; requests_for_sessions.clear(); + return result_buf; } int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index ae592ae3fa1..31a8f80f252 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -102,11 +102,13 @@ private: /// Forcefully wait for result and sets errors if something when wrong. /// Clears both arguments - void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions); + nuraft::ptr forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions); public: std::mutex read_request_queue_mutex; + std::atomic last_committed_log_idx = 0; + /// queue of read requests that can be processed after a request with specific session ID and XID is committed std::unordered_map> read_request_queue; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 8f2e3c3ac0e..5a8cf88ea73 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -167,7 +167,10 @@ nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nur request_for_session->zxid = log_idx; preprocess(*request_for_session); - return nullptr; + auto result = nuraft::buffer::alloc(8); + nuraft::buffer_serializer ss(result); + ss.put_u64(log_idx); + return result; } std::shared_ptr KeeperStateMachine::parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version) @@ -433,7 +436,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n last_committed_idx = log_idx; if (commit_callback) - commit_callback(*request_for_session); + commit_callback(log_idx, *request_for_session); return nullptr; } diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 116fa9257a0..aad5d3aafd4 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -20,7 +20,7 @@ using SnapshotsQueue = ConcurrentBoundedQueue; class KeeperStateMachine : public nuraft::state_machine { public: - using CommitCallback = std::function; + using CommitCallback = std::function; KeeperStateMachine( ResponsesQueue & responses_queue_, diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index cc13c755629..902c2ef595f 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -184,9 +184,7 @@ KeeperStateManager::parseServersConfiguration(const Poco::Util::AbstractConfigur total_servers++; } - /// this will only apply to fresh clusters result.cluster_config->set_async_replication(true); - if (!result.config && !allow_without_us) throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section", my_server_id); From 8734ba1da540a43e4010cfa64f8b981627cedbec Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 7 Aug 2023 13:16:38 +0000 Subject: [PATCH 0148/1687] to check if tests are green or not w/feature disabled --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c58deeaf2a8..18ec88528ee 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -46,7 +46,7 @@ class IColumn; M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \ M(MaxThreads, max_final_threads, 0, "The maximum number of threads to read from table with FINAL.", 0) \ M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \ - M(Bool, use_concurrency_control, true, "Respect the server's concurrency control (see the `concurrent_threads_soft_limit_num` and `concurrent_threads_soft_limit_ratio_to_cores` global server settings). If disabled, it allows using a larger number of threads even if the server is overloaded (not recommended for normal usage, and needed mostly for tests).", 0) \ + M(Bool, use_concurrency_control, false, "Respect the server's concurrency control (see the `concurrent_threads_soft_limit_num` and `concurrent_threads_soft_limit_ratio_to_cores` global server settings). If disabled, it allows using a larger number of threads even if the server is overloaded (not recommended for normal usage, and needed mostly for tests).", 0) \ M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \ M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \ M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \ From 67ee1a2385e3dc3235bee24f7d20ff181dc3aa36 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 7 Aug 2023 15:00:25 +0000 Subject: [PATCH 0149/1687] fix tests --- .../test_compression_nested_columns/test.py | 2 +- .../configs/long_names.xml | 5 +++++ .../configs/wide_parts_only.xml | 1 - .../test_default_compression_codec/test.py | 12 ++++++++++-- tests/integration/test_filesystem_layout/test.py | 2 +- 5 files changed, 17 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_default_compression_codec/configs/long_names.xml diff --git a/tests/integration/test_compression_nested_columns/test.py b/tests/integration/test_compression_nested_columns/test.py index 55d88174287..3541a9f6061 100644 --- a/tests/integration/test_compression_nested_columns/test.py +++ b/tests/integration/test_compression_nested_columns/test.py @@ -48,7 +48,7 @@ def test_nested_compression_codec(start_cluster): column_array Array(Array(UInt64)) CODEC(T64, LZ4), column_bad LowCardinality(Int64) CODEC(Delta) ) ENGINE = ReplicatedMergeTree('/t', '{}') ORDER BY tuple() PARTITION BY key - SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, replace_long_file_name_to_hash = 0; """.format( i ), diff --git a/tests/integration/test_default_compression_codec/configs/long_names.xml b/tests/integration/test_default_compression_codec/configs/long_names.xml new file mode 100644 index 00000000000..1dc241dbf05 --- /dev/null +++ b/tests/integration/test_default_compression_codec/configs/long_names.xml @@ -0,0 +1,5 @@ + + + 0 + + diff --git a/tests/integration/test_default_compression_codec/configs/wide_parts_only.xml b/tests/integration/test_default_compression_codec/configs/wide_parts_only.xml index 4d1a3357799..10b9edef36d 100644 --- a/tests/integration/test_default_compression_codec/configs/wide_parts_only.xml +++ b/tests/integration/test_default_compression_codec/configs/wide_parts_only.xml @@ -2,6 +2,5 @@ 0 0 - 0 diff --git a/tests/integration/test_default_compression_codec/test.py b/tests/integration/test_default_compression_codec/test.py index c7c30f5eea4..abaf160e26a 100644 --- a/tests/integration/test_default_compression_codec/test.py +++ b/tests/integration/test_default_compression_codec/test.py @@ -9,12 +9,20 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", - main_configs=["configs/default_compression.xml", "configs/wide_parts_only.xml"], + main_configs=[ + "configs/default_compression.xml", + "configs/wide_parts_only.xml", + "configs/long_names.xml", + ], with_zookeeper=True, ) node2 = cluster.add_instance( "node2", - main_configs=["configs/default_compression.xml", "configs/wide_parts_only.xml"], + main_configs=[ + "configs/default_compression.xml", + "configs/wide_parts_only.xml", + "configs/long_names.xml", + ], with_zookeeper=True, ) node3 = cluster.add_instance( diff --git a/tests/integration/test_filesystem_layout/test.py b/tests/integration/test_filesystem_layout/test.py index 81f3b67cb75..4e719aa0fe9 100644 --- a/tests/integration/test_filesystem_layout/test.py +++ b/tests/integration/test_filesystem_layout/test.py @@ -48,7 +48,7 @@ def test_file_path_escaping(started_cluster): node.query( """ CREATE TABLE `test 2`.`T.a_b,l-e!` UUID '12345678-1000-4000-8000-000000000001' (`~Id` UInt32) - ENGINE = MergeTree() PARTITION BY `~Id` ORDER BY `~Id` SETTINGS min_bytes_for_wide_part = 0; + ENGINE = MergeTree() PARTITION BY `~Id` ORDER BY `~Id` SETTINGS min_bytes_for_wide_part = 0, replace_long_file_name_to_hash = 0; """ ) node.query("""INSERT INTO `test 2`.`T.a_b,l-e!` VALUES (1);""") From 080b4badbd59aafa24d00349cb4338a8b2e9d0c2 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Mon, 7 Aug 2023 15:58:14 +0000 Subject: [PATCH 0150/1687] Support partial result for aggregating transform during query execution --- src/Interpreters/Aggregator.cpp | 23 +++++ src/Interpreters/Aggregator.h | 2 + src/Processors/IProcessor.cpp | 5 ++ src/Processors/IProcessor.h | 12 +-- src/Processors/LimitTransform.h | 3 +- .../AggregatingPartialResultTransform.cpp | 42 +++++++++ .../AggregatingPartialResultTransform.h | 26 ++++++ .../Transforms/AggregatingTransform.cpp | 12 +++ .../Transforms/AggregatingTransform.h | 7 ++ .../Transforms/ExpressionTransform.h | 4 +- .../Transforms/LimitsCheckingTransform.h | 3 +- .../MergeSortingPartialResultTransform.cpp | 1 - .../MergeSortingPartialResultTransform.h | 1 + .../Transforms/MergeSortingTransform.cpp | 11 --- .../Transforms/MergeSortingTransform.h | 7 +- .../Transforms/PartialResultTransform.cpp | 6 +- .../Transforms/PartialResultTransform.h | 1 + src/QueryPipeline/Pipe.cpp | 4 +- ...rting_result_during_query_execution.python | 25 ++++-- ...ating_result_during_query_execution.python | 88 +++++++++++++++++++ ...ng_result_during_query_execution.reference | 53 +++++++++++ ...gregating_result_during_query_execution.sh | 8 ++ .../queries/0_stateless/helpers/tcp_client.py | 26 +++++- 23 files changed, 332 insertions(+), 38 deletions(-) create mode 100644 src/Processors/Transforms/AggregatingPartialResultTransform.cpp create mode 100644 src/Processors/Transforms/AggregatingPartialResultTransform.h create mode 100644 tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python create mode 100644 tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.reference create mode 100755 tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.sh diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 91cd574708a..e310c2a8315 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2273,6 +2273,29 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va return block; } +Block Aggregator::prepareBlockAndFillWithoutKeySnapshot(AggregatedDataVariants & data_variants) const +{ + size_t rows = 1; + bool final = true; + + auto && out_cols + = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), data_variants.aggregates_pools, final, rows); + auto && [key_columns, raw_key_columns, aggregate_columns, final_aggregate_columns, aggregate_columns_data] = out_cols; + + AggregatedDataWithoutKey & data = data_variants.without_key; + + /// Always single-thread. It's safe to pass current arena from 'aggregates_pool'. + for (size_t insert_i = 0; insert_i < params.aggregates_size; ++insert_i) + aggregate_functions[insert_i]->insertResultInto( + data + offsets_of_aggregate_states[insert_i], + *final_aggregate_columns[insert_i], + data_variants.aggregates_pool); + + Block block = finalizeBlock(params, getHeader(final), std::move(out_cols), final, rows); + + return block; +} + template Aggregator::ConvertToBlockRes Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 4f2c86606c5..6bfaa76f9b3 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1210,6 +1210,7 @@ private: friend class ConvertingAggregatedToChunksSource; friend class ConvertingAggregatedToChunksWithMergingSource; friend class AggregatingInOrderTransform; + friend class AggregatingPartialResultTransform; /// Data structure of source blocks. Block header; @@ -1391,6 +1392,7 @@ private: std::atomic * is_cancelled = nullptr) const; Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const; + Block prepareBlockAndFillWithoutKeySnapshot(AggregatedDataVariants & data_variants) const; BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const; template diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index 8b160153733..2f294a32531 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -40,5 +40,10 @@ std::string IProcessor::statusToName(Status status) UNREACHABLE(); } +ProcessorPtr IProcessor::getPartialResultProcessorPtr(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +{ + return current_processor->getPartialResultProcessor(current_processor, partial_result_limit, partial_result_duration_ms); +} + } diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 140e5c13f27..a156da7bad2 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -164,6 +164,8 @@ public: static std::string statusToName(Status status); + static ProcessorPtr getPartialResultProcessorPtr(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms); + /** Method 'prepare' is responsible for all cheap ("instantaneous": O(1) of data volume, no wait) calculations. * * It may access input and output ports, @@ -238,11 +240,6 @@ public: virtual bool isPartialResultProcessor() const { return false; } virtual bool supportPartialResultProcessor() const { return false; } - virtual ProcessorPtr getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getPartialResultProcessor' is not implemented for {} processor", getName()); - } - /// In case if query was cancelled executor will wait till all processors finish their jobs. /// Generally, there is no reason to check this flag. However, it may be reasonable for long operations (e.g. i/o). bool isCancelled() const { return is_cancelled.load(std::memory_order_acquire); } @@ -377,6 +374,11 @@ public: protected: virtual void onCancel() {} + virtual ProcessorPtr getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getPartialResultProcessor' is not implemented for {} processor", getName()); + } + private: /// For: /// - elapsed_us diff --git a/src/Processors/LimitTransform.h b/src/Processors/LimitTransform.h index b0ec7600406..ebdcbe49670 100644 --- a/src/Processors/LimitTransform.h +++ b/src/Processors/LimitTransform.h @@ -55,7 +55,6 @@ private: ColumnRawPtrs extractSortColumns(const Columns & columns) const; bool sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, UInt64 current_chunk_row_num) const; - bool supportPartialResultProcessor() const override { return true; } ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; public: @@ -76,6 +75,8 @@ public: void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit_at_least.swap(counter); } void setInputPortHasCounter(size_t pos) { ports_data[pos].input_port_has_counter = true; } + + bool supportPartialResultProcessor() const override { return true; } }; } diff --git a/src/Processors/Transforms/AggregatingPartialResultTransform.cpp b/src/Processors/Transforms/AggregatingPartialResultTransform.cpp new file mode 100644 index 00000000000..fbfb9be572b --- /dev/null +++ b/src/Processors/Transforms/AggregatingPartialResultTransform.cpp @@ -0,0 +1,42 @@ +#include + +namespace DB +{ + +AggregatingPartialResultTransform::AggregatingPartialResultTransform( + const Block & input_header, const Block & output_header, AggregatingTransformPtr aggregating_transform_, + UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) + : PartialResultTransform(input_header, output_header, partial_result_limit_, partial_result_duration_ms_) + , aggregating_transform(std::move(aggregating_transform_)) + {} + +PartialResultTransform::ShaphotResult AggregatingPartialResultTransform::getRealProcessorSnapshot() +{ + std::lock_guard lock(aggregating_transform->snapshot_mutex); + + auto & params = aggregating_transform->params; + /// Currently not supported cases + /// TODO: check that insert results from prepareBlockAndFillWithoutKey return values without changing of the aggregator state + if (params->params.keys_size != 0 /// has at least one key for aggregation + || params->aggregator.hasTemporaryData() /// use external storage for aggregation + || aggregating_transform->many_data->variants.size() > 1) /// use more then one stream for aggregation + return {{}, SnaphotStatus::Stopped}; + + if (aggregating_transform->is_generate_initialized) + return {{}, SnaphotStatus::Stopped}; + + if (aggregating_transform->variants.empty()) + return {{}, SnaphotStatus::NotReady}; + + auto & aggregator = params->aggregator; + + auto prepared_data = aggregator.prepareVariantsToMerge(aggregating_transform->many_data->variants); + AggregatedDataVariantsPtr & first = prepared_data.at(0); + + aggregator.mergeWithoutKeyDataImpl(prepared_data); + auto block = aggregator.prepareBlockAndFillWithoutKeySnapshot(*first); + + return {convertToChunk(block), SnaphotStatus::Ready}; +} + +} diff --git a/src/Processors/Transforms/AggregatingPartialResultTransform.h b/src/Processors/Transforms/AggregatingPartialResultTransform.h new file mode 100644 index 00000000000..e5e28dc7f7a --- /dev/null +++ b/src/Processors/Transforms/AggregatingPartialResultTransform.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class AggregatingPartialResultTransform : public PartialResultTransform +{ +public: + using AggregatingTransformPtr = std::shared_ptr; + + AggregatingPartialResultTransform( + const Block & input_header, const Block & output_header, AggregatingTransformPtr aggregating_transform_, + UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); + + String getName() const override { return "AggregatingPartialResultTransform"; } + + ShaphotResult getRealProcessorSnapshot() override; + +private: + AggregatingTransformPtr aggregating_transform; +}; + +} diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 4bd000797a6..b4d2785bed2 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -657,6 +658,8 @@ void AggregatingTransform::consume(Chunk chunk) src_rows += num_rows; src_bytes += chunk.bytes(); + std::lock_guard lock(snapshot_mutex); + if (params->params.only_merge) { auto block = getInputs().front().getHeader().cloneWithColumns(chunk.detachColumns()); @@ -676,6 +679,7 @@ void AggregatingTransform::initGenerate() if (is_generate_initialized) return; + std::lock_guard lock(snapshot_mutex); is_generate_initialized = true; /// If there was no data, and we aggregate without keys, and we must return single row with the result of empty aggregation. @@ -806,4 +810,12 @@ void AggregatingTransform::initGenerate() } } +ProcessorPtr AggregatingTransform::getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) +{ + const auto & input_header = inputs.front().getHeader(); + const auto & output_header = outputs.front().getHeader(); + auto aggregating_processor = std::dynamic_pointer_cast(current_processor); + return std::make_shared(input_header, output_header, std::move(aggregating_processor), partial_result_limit, partial_result_duration_ms); +} + } diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 38baa4d0394..649b90ae74a 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -170,9 +170,13 @@ public: void work() override; Processors expandPipeline() override; + bool supportPartialResultProcessor() const override { return true; } + protected: void consume(Chunk chunk); + ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; + private: /// To read the data that was flushed into the temporary data file. Processors processors; @@ -212,6 +216,9 @@ private: bool is_consume_started = false; + friend class AggregatingPartialResultTransform; + std::mutex snapshot_mutex; + void initGenerate(); }; diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h index bf8425b2300..a7b5ef4f51b 100644 --- a/src/Processors/Transforms/ExpressionTransform.h +++ b/src/Processors/Transforms/ExpressionTransform.h @@ -26,13 +26,15 @@ public: static Block transformHeader(Block header, const ActionsDAG & expression); + bool supportPartialResultProcessor() const override { return true; } + protected: void transform(Chunk & chunk) override; - bool supportPartialResultProcessor() const override { return true; } ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; private: + ExpressionActionsPtr expression; }; diff --git a/src/Processors/Transforms/LimitsCheckingTransform.h b/src/Processors/Transforms/LimitsCheckingTransform.h index 6e8d5547cc5..e8635384129 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.h +++ b/src/Processors/Transforms/LimitsCheckingTransform.h @@ -33,10 +33,11 @@ public: void setQuota(const std::shared_ptr & quota_) { quota = quota_; } + bool supportPartialResultProcessor() const override { return true; } + protected: void transform(Chunk & chunk) override; - bool supportPartialResultProcessor() const override { return true; } ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; private: diff --git a/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp b/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp index 2c4d6bea3c5..e4a2af2cdd8 100644 --- a/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp +++ b/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp @@ -1,4 +1,3 @@ -#include #include namespace DB diff --git a/src/Processors/Transforms/MergeSortingPartialResultTransform.h b/src/Processors/Transforms/MergeSortingPartialResultTransform.h index f08f74dad6b..1eaebf9d902 100644 --- a/src/Processors/Transforms/MergeSortingPartialResultTransform.h +++ b/src/Processors/Transforms/MergeSortingPartialResultTransform.h @@ -1,5 +1,6 @@ #pragma once +#include #include namespace DB diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index d21cb179b0a..af4a51b81ee 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -285,17 +285,6 @@ void MergeSortingTransform::remerge() ProcessorPtr MergeSortingTransform::getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) { - if (getName() != current_processor->getName() || current_processor.get() != this) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "To create partial result processor variable current_processor should use " \ - "the same class and pointer as in the original processor with class {} and pointer {}. " \ - "But current_processor has another class {} or pointer {} then original.", - getName(), - static_cast(this), - current_processor->getName(), - static_cast(current_processor.get())); - const auto & header = inputs.front().getHeader(); auto merge_sorting_processor = std::dynamic_pointer_cast(current_processor); return std::make_shared(header, std::move(merge_sorting_processor), partial_result_limit, partial_result_duration_ms); diff --git a/src/Processors/Transforms/MergeSortingTransform.h b/src/Processors/Transforms/MergeSortingTransform.h index bf4bfec72fb..e8a1f7a5853 100644 --- a/src/Processors/Transforms/MergeSortingTransform.h +++ b/src/Processors/Transforms/MergeSortingTransform.h @@ -33,6 +33,8 @@ public: String getName() const override { return "MergeSortingTransform"; } + bool supportPartialResultProcessor() const override { return true; } + protected: void consume(Chunk chunk) override; void serialize() override; @@ -40,7 +42,6 @@ protected: Processors expandPipeline() override; - bool supportPartialResultProcessor() const override { return true; } ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; private: @@ -61,10 +62,10 @@ private: /// Merge all accumulated blocks to keep no more than limit rows. void remerge(); + ProcessorPtr external_merging_sorted; + friend class MergeSortingPartialResultTransform; std::mutex snapshot_mutex; - - ProcessorPtr external_merging_sorted; }; } diff --git a/src/Processors/Transforms/PartialResultTransform.cpp b/src/Processors/Transforms/PartialResultTransform.cpp index 5e9704d38a2..97ff79dee54 100644 --- a/src/Processors/Transforms/PartialResultTransform.cpp +++ b/src/Processors/Transforms/PartialResultTransform.cpp @@ -3,8 +3,12 @@ namespace DB { + PartialResultTransform::PartialResultTransform(const Block & header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) - : IProcessor({header}, {header}) + : PartialResultTransform(header, header, partial_result_limit_, partial_result_duration_ms_) {} + +PartialResultTransform::PartialResultTransform(const Block & input_header, const Block & output_header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) + : IProcessor({input_header}, {output_header}) , input(inputs.front()) , output(outputs.front()) , partial_result_limit(partial_result_limit_) diff --git a/src/Processors/Transforms/PartialResultTransform.h b/src/Processors/Transforms/PartialResultTransform.h index 7d4b9026ce1..2bcf494400d 100644 --- a/src/Processors/Transforms/PartialResultTransform.h +++ b/src/Processors/Transforms/PartialResultTransform.h @@ -9,6 +9,7 @@ class PartialResultTransform : public IProcessor { public: PartialResultTransform(const Block & header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); + PartialResultTransform(const Block & input_header, const Block & output_header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); String getName() const override { return "PartialResultTransform"; } diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index adf06dde53b..15f6bea70a7 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -636,7 +636,7 @@ void Pipe::addPartialResultSimpleTransform(const ProcessorPtr & transform, size_ return; } - auto partial_result_transform = transform->getPartialResultProcessor(transform, partial_result_limit, partial_result_duration_ms); + auto partial_result_transform = IProcessor::getPartialResultProcessorPtr(transform, partial_result_limit, partial_result_duration_ms); connectPartialResultPort(partial_result_port, partial_result_transform->getInputs().front()); @@ -661,7 +661,7 @@ void Pipe::addPartialResultTransform(const ProcessorPtr & transform) return; } - auto partial_result_transform = transform->getPartialResultProcessor(transform, partial_result_limit, partial_result_duration_ms); + auto partial_result_transform = IProcessor::getPartialResultProcessorPtr(transform, partial_result_limit, partial_result_duration_ms); auto & inputs = partial_result_transform->getInputs(); if (inputs.size() != partial_result_ports.size()) diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python index 14f5a912672..ce80b0f29a3 100755 --- a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python +++ b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python @@ -8,13 +8,13 @@ import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(CURDIR, "helpers")) -from tcp_client import TCPClient, assertPacket +from tcp_client import TCPClient def main(): with TCPClient() as client: client.sendQuery( - f"SELECT number FROM numbers_mt(1e7+1) ORDER BY -number LIMIT 15 SETTINGS max_threads = 1, partial_result_update_duration_ms=1, max_rows_in_partial_result=10" + "SELECT number FROM numbers_mt(1e7+1) ORDER BY -number LIMIT 15 SETTINGS max_threads = 1, partial_result_update_duration_ms = 1, max_rows_in_partial_result = 10" ) # external tables @@ -23,13 +23,13 @@ def main(): # Partial result _, partial_result = client.readDataWithoutProgress()[0] - - assert_message = ( - "There should be at least one block of data with partial result" - ) - assert len(partial_result) > 0, assert_message + assert len(partial_result) > 0, "Expected at least one block with a non-empty partial result before getting the full result" while True: + assert all( + a >= b for a, b in zip(partial_result, partial_result[1:]) + ), "Partial result always should be sorted for this test" + _, new_partial_result = client.readDataWithoutProgress( need_print_info=False )[0] @@ -37,15 +37,22 @@ def main(): break data_size = len(partial_result) - assert_message = f"New block contains more info about the full data so sorted results should not be less then in the previous iteration. New result {new_partial_result}. Previous result {partial_result}" + assert all( partial_result[i] <= new_partial_result[i] for i in range(data_size) - ), assert_message + ), f"New partial result values should always be greater then old one because a new block contains more information about the full data. New result {new_partial_result}. Previous result {partial_result}" + partial_result = new_partial_result # Full result _, full_result = client.readDataWithoutProgress()[0] + + data_size = len(partial_result) + assert all( + partial_result[i] <= full_result[i] for i in range(data_size) + ), f"Full result values should always be greater then partial result values. Full result {full_result}. Partial result {partial_result}" + for result in full_result: print(result) diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python new file mode 100644 index 00000000000..6fd778c77e7 --- /dev/null +++ b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + + +import os +import sys + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + +from tcp_client import TCPClient, assertPacket + +def run_query_without_errors(query, support_partial_result, invariants=None): + if invariants is None: + invariants = {} + + with TCPClient() as client: + client.sendQuery(query) + + # external tables + client.sendEmptyBlock() + client.readHeader() + + # Partial result + partial_results = client.readDataWithoutProgress() + if support_partial_result: + assert len(partial_results[0][1]) > 0, "Expected at least one block with a non-empty partial result before getting the full result" + while True: + new_partial_results = client.readDataWithoutProgress(need_print_info=False) + if len(new_partial_results[0][1]) == 0: + break + + for new_result, old_result in zip(new_partial_results, partial_results): + assert new_result[0] == old_result[0], "Keys in blocks should be in the same order" + key = new_result[0] + if key in invariants: + old_value = old_result[1] + new_value = new_result[1] + assert invariants[key](old_value, new_value), f"Problem with the invariant between old and new versions of a partial result for key: {key}. Old value {old_value}, new value {new_value}" + else: + assert len(partial_results[0][1]) == 0, "Expected no non-empty partial result blocks before getting the full result" + + # Full result + full_results = client.readDataWithoutProgress() + if support_partial_result: + for full_result, partial_result in zip(full_results, partial_results): + assert full_result[0] == partial_result[0], "Keys in blocks should be in the same order" + key = full_result[0] + if key in invariants: + full_value = full_result[1] + partial_value = partial_result[1] + assert invariants[key](partial_value, full_value), f"Problem with the invariant between full and partial result for key: {key}. Partial value {partial_value}. Full value {full_value}" + + for key, value in full_results: + if isinstance(value[0], int): + print(key, value) + + +def supported_scenarios(): + query = "select median(number), stddevSamp(number), stddevPop(number), max(number), min(number), any(number), count(number), avg(number), sum(number) from numbers_mt(1e7+1) settings max_threads = 1, partial_result_update_duration_ms = 1" + invariants = { + "median(number)": lambda old_value, new_value: old_value <= new_value, + "max(number)": lambda old_value, new_value: old_value <= new_value, + "min(number)": lambda old_value, new_value: old_value >= new_value, + "count(number)": lambda old_value, new_value: old_value <= new_value, + "avg(number)": lambda old_value, new_value: old_value <= new_value, + "sum(number)": lambda old_value, new_value: old_value <= new_value, + } + run_query_without_errors(query, support_partial_result=True, invariants=invariants) + + +def unsupported_scenarios(): + # Currently aggregator for partial result supports only single thread aggregation without key + # Update test when multithreading or aggregation with GROUP BY will be supported for partial result updates + multithread_query = "select sum(number) from numbers_mt(1e7+1) settings max_threads = 2, partial_result_update_duration_ms = 1" + run_query_without_errors(multithread_query, support_partial_result=False) + + group_with_key_query = "select mod2, sum(number) from numbers_mt(1e7+1) group by number % 2 as mod2 settings max_threads = 1, partial_result_update_duration_ms = 1" + run_query_without_errors(group_with_key_query, support_partial_result=False) + + +def main(): + supported_scenarios() + unsupported_scenarios() + + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.reference b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.reference new file mode 100644 index 00000000000..a97ee3f8d79 --- /dev/null +++ b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.reference @@ -0,0 +1,53 @@ +Rows 0 Columns 9 +Column median(number) type Float64 +Column stddevSamp(number) type Float64 +Column stddevPop(number) type Float64 +Column max(number) type UInt64 +Column min(number) type UInt64 +Column any(number) type UInt64 +Column count(number) type UInt64 +Column avg(number) type Float64 +Column sum(number) type UInt64 +Rows 1 Columns 9 +Column median(number) type Float64 +Column stddevSamp(number) type Float64 +Column stddevPop(number) type Float64 +Column max(number) type UInt64 +Column min(number) type UInt64 +Column any(number) type UInt64 +Column count(number) type UInt64 +Column avg(number) type Float64 +Column sum(number) type UInt64 +Rows 1 Columns 9 +Column median(number) type Float64 +Column stddevSamp(number) type Float64 +Column stddevPop(number) type Float64 +Column max(number) type UInt64 +Column min(number) type UInt64 +Column any(number) type UInt64 +Column count(number) type UInt64 +Column avg(number) type Float64 +Column sum(number) type UInt64 +max(number) [10000000] +min(number) [0] +any(number) [0] +count(number) [10000001] +sum(number) [50000005000000] +Rows 0 Columns 1 +Column sum(number) type UInt64 +Rows 0 Columns 1 +Column sum(number) type UInt64 +Rows 1 Columns 1 +Column sum(number) type UInt64 +sum(number) [50000005000000] +Rows 0 Columns 2 +Column mod2 type UInt8 +Column sum(number) type UInt64 +Rows 0 Columns 2 +Column mod2 type UInt8 +Column sum(number) type UInt64 +Rows 2 Columns 2 +Column mod2 type UInt8 +Column sum(number) type UInt64 +mod2 [0, 1] +sum(number) [25000005000000, 25000000000000] diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.sh b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.sh new file mode 100755 index 00000000000..e70a3c53ec4 --- /dev/null +++ b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test +python3 "$CURDIR"/02834_partial_aggregating_result_during_query_execution.python diff --git a/tests/queries/0_stateless/helpers/tcp_client.py b/tests/queries/0_stateless/helpers/tcp_client.py index 2f0680b4eae..bab449d71fe 100644 --- a/tests/queries/0_stateless/helpers/tcp_client.py +++ b/tests/queries/0_stateless/helpers/tcp_client.py @@ -1,6 +1,7 @@ import socket import os import uuid +import struct CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) @@ -105,6 +106,15 @@ class TCPClient(object): def readUInt64(self): return self.readUInt(8) + def readFloat16(self): + return struct.unpack("e", self.readStrict(2)) + + def readFloat32(self): + return struct.unpack("f", self.readStrict(4)) + + def readFloat64(self): + return struct.unpack("d", self.readStrict(8)) + def readVarUInt(self): x = 0 for i in range(9): @@ -250,12 +260,22 @@ class TCPClient(object): print("Column {} type {}".format(col_name, type_name)) def readRow(self, row_type, rows): - if row_type == "UInt64": - row = [self.readUInt64() for _ in range(rows)] + supported_row_types = { + "UInt8": self.readUInt8, + "UInt16": self.readUInt16, + "UInt32": self.readUInt32, + "UInt64": self.readUInt64, + "Float16": self.readFloat16, + "Float32": self.readFloat32, + "Float64": self.readFloat64, + } + if row_type in supported_row_types: + read_type = supported_row_types[row_type] + row = [read_type() for _ in range(rows)] return row else: raise RuntimeError( - "Currently python version of tcp client doesn't support the following type of row: {}".format( + "Current python version of tcp client doesn't support the following type of row: {}".format( row_type ) ) From 181e214a8a34a464794efaffcac9ec2611823872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Mon, 7 Aug 2023 16:20:36 +0000 Subject: [PATCH 0151/1687] Added Codec-file and shit-coded CompressionMethodByte --- src/Compression/CompressionCodecGCD.cpp | 251 ++++++++++++++++++++++++ src/Compression/CompressionInfo.h | 1 + 2 files changed, 252 insertions(+) create mode 100644 src/Compression/CompressionCodecGCD.cpp diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp new file mode 100644 index 00000000000..c5de17e092a --- /dev/null +++ b/src/Compression/CompressionCodecGCD.cpp @@ -0,0 +1,251 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "Common/Exception.h" +#include "base/types.h" +#include "config.h" + +#include + +namespace DB +{ + +class CompressionCodecGCD : public ICompressionCodec +{ +public: + explicit CompressionCodecGCD(UInt8 gcd_bytes_size_); + + uint8_t getMethodByte() const override; + + void updateHash(SipHash & hash) const override; + +protected: + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; + void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override { return uncompressed_size + 2; } + + bool isCompression() const override { return false; } + bool isGenericCompression() const override { return false; } + +private: + const UInt8 gcd_bytes_size; +}; + + +namespace ErrorCodes +{ + extern const int CANNOT_COMPRESS; + extern const int CANNOT_DECOMPRESS; + extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; + extern const int ILLEGAL_CODEC_PARAMETER; + extern const int BAD_ARGUMENTS; +} + +CompressionCodecGCD::CompressionCodecGCD(UInt8 gcd_bytes_size_) + : gcd_bytes_size(gcd_bytes_size_) +{ + setCodecDescription("GCD", {std::make_shared(static_cast(gcd_bytes_size))}); +} + +uint8_t CompressionCodecGCD::getMethodByte() const +{ + return static_cast(CompressionMethodByte::GCD); +} + +void CompressionCodecGCD::updateHash(SipHash & hash) const +{ + getCodecDesc()->updateTreeHash(hash); +} + +namespace +{ + +template +void compressDataForType(const char * source, UInt32 source_size, char * dest) +{ + if (source_size % sizeof(T) != 0) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot GCD compress, data size {} is not aligned to {}", source_size, sizeof(T)); + + const char * const source_end = source + source_size; + + T gcd{}; + const auto * cur_source = source; + while (cur_source < source_end) + { + if (cur_source == source) + { + gcd = unalignedLoad(cur_source); + } + else + { + gcd = std::gcd(gcd, unalignedLoad(cur_source)); + } + } + + unalignedStore(dest, gcd); + dest += sizeof(T); + + cur_source = source; + while (cur_source < source_end) + { + unalignedStore(dest, unalignedLoad(cur_source) / gcd); + cur_source += sizeof(T); + dest += sizeof(T); + } +} + +template +void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size) +{ + const char * const output_end = dest + output_size; + + if (source_size % sizeof(T) != 0) + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress, data size {} is not aligned to {}", source_size, sizeof(T)); + + if (source_size < sizeof(T)) { + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress, data size {} is less than {}", source_size, sizeof(T)); + } + + const char * const source_end = source + source_size; + const T gcd = unalignedLoad(source); + source += sizeof(T); + while (source < source_end) + { + if (dest + sizeof(T) > output_end) [[unlikely]] + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data"); + unalignedStore(dest, unalignedLoad(source) * gcd); + + source += sizeof(T); + dest += sizeof(T); + } + +} + +} + +UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_size, char * dest) const +{ + UInt8 bytes_to_skip = source_size % gcd_bytes_size; + dest[0] = gcd_bytes_size; + dest[1] = bytes_to_skip; /// unused (backward compatibility) + memcpy(&dest[2], source, bytes_to_skip); + size_t start_pos = 2 + bytes_to_skip; + switch (gcd_bytes_size) + { + case 1: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; + case 2: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; + case 4: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; + case 8: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; + } + return 1 + 1 + source_size; +} + +void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const +{ + if (source_size < 2) + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header"); + + if (uncompressed_size == 0) + return; + + UInt8 bytes_size = source[0]; + + if (!(bytes_size == 1 || bytes_size == 2 || bytes_size == 4 || bytes_size == 8)) + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header"); + + UInt8 bytes_to_skip = uncompressed_size % bytes_size; + UInt32 output_size = uncompressed_size - bytes_to_skip; + + if (static_cast(2 + bytes_to_skip) > source_size) + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header"); + + memcpy(dest, &source[2], bytes_to_skip); + UInt32 source_size_no_header = source_size - bytes_to_skip - 2; + switch (bytes_size) + { + case 1: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; + case 2: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; + case 4: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; + case 8: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; + } +} + +namespace +{ + +UInt8 getGCDBytesSize(const IDataType * column_type) +{ + if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is not applicable for {} because the data type is not of fixed size", + column_type->getName()); + + size_t max_size = column_type->getSizeOfValueInMemory(); + if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8) + return static_cast(max_size); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is only applicable for data types of size 1, 2, 4, 8 bytes. Given type {}", + column_type->getName()); +} + +} + +void registerCodecGCD(CompressionCodecFactory & factory) +{ + UInt8 method_code = static_cast(CompressionMethodByte::GCD); + auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr + { + /// Default bytes size is 1. + UInt8 gcd_bytes_size = 1; + + if (arguments && !arguments->children.empty()) + { + if (arguments->children.size() > 1) + throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "GCD codec must have 1 parameter, given {}", arguments->children.size()); + + const auto children = arguments->children; + const auto * literal = children[0]->as(); + if (!literal || literal->value.getType() != Field::Types::Which::UInt64) + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD codec argument must be unsigned integer"); + + size_t user_bytes_size = literal->value.safeGet(); + if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8) + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD value for GCD codec can be 1, 2, 4 or 8, given {}", user_bytes_size); + gcd_bytes_size = static_cast(user_bytes_size); + } + else if (column_type) + { + gcd_bytes_size = getGCDBytesSize(column_type); + } + + return std::make_shared(gcd_bytes_size); + }; + factory.registerCompressionCodecWithType("GCD", method_code, codec_builder); +} + +CompressionCodecPtr getCompressionCodecGCD(UInt8 gcd_bytes_size) +{ + return std::make_shared(gcd_bytes_size); +} + +} diff --git a/src/Compression/CompressionInfo.h b/src/Compression/CompressionInfo.h index 985d74bbb74..6edcef9680d 100644 --- a/src/Compression/CompressionInfo.h +++ b/src/Compression/CompressionInfo.h @@ -47,6 +47,7 @@ enum class CompressionMethodByte : uint8_t AES_256_GCM_SIV = 0x97, FPC = 0x98, DeflateQpl = 0x99, + GCD = 0x69, // TODO: Edit this shit }; } From 90d0c4810867d3bfea805a5da6a792f2f19d9a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Mon, 7 Aug 2023 16:43:46 +0000 Subject: [PATCH 0152/1687] Edited codec-files --- src/Compression/CompressionFactory.cpp | 2 ++ src/Compression/CompressionInfo.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 61f4e3a988c..79382165051 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -179,6 +179,7 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory); void registerCodecGorilla(CompressionCodecFactory & factory); void registerCodecEncrypted(CompressionCodecFactory & factory); void registerCodecFPC(CompressionCodecFactory & factory); +void registerCodecGCD(CompressionCodecFactory & factory); #endif CompressionCodecFactory::CompressionCodecFactory() @@ -195,6 +196,7 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecGorilla(*this); registerCodecEncrypted(*this); registerCodecFPC(*this); + registerCodecGCD(*this); #ifdef ENABLE_QPL_COMPRESSION registerCodecDeflateQpl(*this); #endif diff --git a/src/Compression/CompressionInfo.h b/src/Compression/CompressionInfo.h index 6edcef9680d..6c5b478294b 100644 --- a/src/Compression/CompressionInfo.h +++ b/src/Compression/CompressionInfo.h @@ -47,7 +47,7 @@ enum class CompressionMethodByte : uint8_t AES_256_GCM_SIV = 0x97, FPC = 0x98, DeflateQpl = 0x99, - GCD = 0x69, // TODO: Edit this shit + GCD = 0x69, // TODO: IDK why }; } From 51372d841dfa0f2060201160b6d59b890ccec4d7 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Mon, 7 Aug 2023 16:52:38 +0000 Subject: [PATCH 0153/1687] Style fix --- .../Transforms/MergeSortingTransform.cpp | 5 --- ...rting_result_during_query_execution.python | 10 ++--- ...ating_result_during_query_execution.python | 41 ++++++++++++++----- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index af4a51b81ee..e801e5e16d5 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -28,11 +28,6 @@ namespace ProfileEvents namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - class BufferingToFileTransform : public IAccumulatingTransform { public: diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python index ce80b0f29a3..38a9d82d8b6 100755 --- a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python +++ b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python @@ -23,7 +23,9 @@ def main(): # Partial result _, partial_result = client.readDataWithoutProgress()[0] - assert len(partial_result) > 0, "Expected at least one block with a non-empty partial result before getting the full result" + assert ( + len(partial_result) > 0 + ), "Expected at least one block with a non-empty partial result before getting the full result" while True: assert all( @@ -37,12 +39,10 @@ def main(): break data_size = len(partial_result) - assert all( partial_result[i] <= new_partial_result[i] for i in range(data_size) ), f"New partial result values should always be greater then old one because a new block contains more information about the full data. New result {new_partial_result}. Previous result {partial_result}" - partial_result = new_partial_result # Full result @@ -50,8 +50,8 @@ def main(): data_size = len(partial_result) assert all( - partial_result[i] <= full_result[i] for i in range(data_size) - ), f"Full result values should always be greater then partial result values. Full result {full_result}. Partial result {partial_result}" + partial_result[i] <= full_result[i] for i in range(data_size) + ), f"Full result values should always be greater then partial result values. Full result {full_result}. Partial result {partial_result}" for result in full_result: print(result) diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python index 6fd778c77e7..e4fe9dcfad0 100644 --- a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python +++ b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python @@ -8,7 +8,12 @@ import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(CURDIR, "helpers")) -from tcp_client import TCPClient, assertPacket +from tcp_client import TCPClient + + +def get_keys(result): + return [key for key, _ in rasults] + def run_query_without_errors(query, support_partial_result, invariants=None): if invariants is None: @@ -24,32 +29,46 @@ def run_query_without_errors(query, support_partial_result, invariants=None): # Partial result partial_results = client.readDataWithoutProgress() if support_partial_result: - assert len(partial_results[0][1]) > 0, "Expected at least one block with a non-empty partial result before getting the full result" + assert ( + len(partial_results[0][1]) > 0 + ), "Expected at least one block with a non-empty partial result before getting the full result" while True: - new_partial_results = client.readDataWithoutProgress(need_print_info=False) + new_partial_results = client.readDataWithoutProgress( + need_print_info=False + ) if len(new_partial_results[0][1]) == 0: break for new_result, old_result in zip(new_partial_results, partial_results): - assert new_result[0] == old_result[0], "Keys in blocks should be in the same order" + assert ( + new_result[0] == old_result[0] + ), "Keys in blocks should be in the same order" + key = new_result[0] if key in invariants: old_value = old_result[1] new_value = new_result[1] assert invariants[key](old_value, new_value), f"Problem with the invariant between old and new versions of a partial result for key: {key}. Old value {old_value}, new value {new_value}" else: - assert len(partial_results[0][1]) == 0, "Expected no non-empty partial result blocks before getting the full result" + assert ( + len(partial_results[0][1]) == 0 + ), "Expected no non-empty partial result blocks before getting the full result" # Full result full_results = client.readDataWithoutProgress() if support_partial_result: for full_result, partial_result in zip(full_results, partial_results): - assert full_result[0] == partial_result[0], "Keys in blocks should be in the same order" - key = full_result[0] - if key in invariants: - full_value = full_result[1] - partial_value = partial_result[1] - assert invariants[key](partial_value, full_value), f"Problem with the invariant between full and partial result for key: {key}. Partial value {partial_value}. Full value {full_value}" + assert ( + full_result[0] == partial_result[0] + ), f"Keys in blocks should be in the same order. Full results keys {get_keys(full_results)}. Partial results keys {get_keys(partial_results)}" + + key = full_result[0] + if key in invariants: + full_value = full_result[1] + partial_value = partial_result[1] + assert ( + invariants[key](partial_value, full_value) + ), f"Problem with the invariant between full and partial result for key: {key}. Partial value {partial_value}. Full value {full_value}" for key, value in full_results: if isinstance(value[0], int): From d5c1106790d927f7ae90f3d6cf85a8504292c586 Mon Sep 17 00:00:00 2001 From: alexX512 Date: Mon, 7 Aug 2023 17:10:54 +0000 Subject: [PATCH 0154/1687] Style fix --- ...rtial_aggregating_result_during_query_execution.python | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python index e4fe9dcfad0..854c36748a2 100644 --- a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python +++ b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python @@ -48,7 +48,9 @@ def run_query_without_errors(query, support_partial_result, invariants=None): if key in invariants: old_value = old_result[1] new_value = new_result[1] - assert invariants[key](old_value, new_value), f"Problem with the invariant between old and new versions of a partial result for key: {key}. Old value {old_value}, new value {new_value}" + assert ( + invariants[key](old_value, new_value) + ), f"Problem with the invariant between old and new versions of a partial result for key: {key}. Old value {old_value}, new value {new_value}" else: assert ( len(partial_results[0][1]) == 0 @@ -66,8 +68,8 @@ def run_query_without_errors(query, support_partial_result, invariants=None): if key in invariants: full_value = full_result[1] partial_value = partial_result[1] - assert ( - invariants[key](partial_value, full_value) + assert invariants[key]( + partial_value, full_value ), f"Problem with the invariant between full and partial result for key: {key}. Partial value {partial_value}. Full value {full_value}" for key, value in full_results: From bbbf7f4c161f7bd2e6455bad164b2681f18dd5ab Mon Sep 17 00:00:00 2001 From: alexX512 Date: Mon, 7 Aug 2023 17:26:34 +0000 Subject: [PATCH 0155/1687] Style fix --- ...4_partial_aggregating_result_during_query_execution.python | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python index 854c36748a2..25e9e6b73c9 100644 --- a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python +++ b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python @@ -48,8 +48,8 @@ def run_query_without_errors(query, support_partial_result, invariants=None): if key in invariants: old_value = old_result[1] new_value = new_result[1] - assert ( - invariants[key](old_value, new_value) + assert invariants[key]( + old_value, new_value ), f"Problem with the invariant between old and new versions of a partial result for key: {key}. Old value {old_value}, new value {new_value}" else: assert ( From 12d262ed7ad779e6f2ed9961ee26719eded8d91f Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Tue, 8 Aug 2023 11:48:08 +0800 Subject: [PATCH 0156/1687] getPossiblePtr no longer has a default value, optimize the optimizeTypeIds method. --- src/DataTypes/FieldToDataType.cpp | 2 +- src/DataTypes/IDataType.h | 2 +- src/DataTypes/getLeastSupertype.cpp | 67 +++++-------------- .../02832_integer_type_inference.reference | 3 +- .../02832_integer_type_inference.sql | 3 +- 5 files changed, 24 insertions(+), 53 deletions(-) diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index 5947eff7ea2..9b8e476067e 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -155,7 +155,7 @@ DataTypePtr FieldToDataType::operator() (const Array & x) const for (const Field & elem : x) element_types.emplace_back(applyVisitor(*this, elem)); - return std::make_shared(getLeastSupertype(element_types, false)); + return std::make_shared(getLeastSupertype(element_types, true)); } template diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 330836f9d92..52962462d7e 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -73,7 +73,7 @@ public: DataTypePtr getPtr() const { return shared_from_this(); } - virtual DataTypes getPossiblePtr() const { return { shared_from_this() }; } + virtual DataTypes getPossiblePtr() const { return {}; } /// Name of data type family (example: FixedString, Array). virtual const char * getFamilyName() const = 0; diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index c04130a58e4..d8be5fab91e 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -620,24 +620,6 @@ void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids) } }; - auto maximize = [](size_t & what, size_t value, bool & only_unsigned, bool & only_signed, bool & both) - { - if (value > what) - { - what = value; - only_unsigned = false; - only_signed = false; - both = false; - return true; - }else if (value == what) - { - return true; - } - - return false; - }; - - size_t max_bits_of_integer = 0; bool only_unsigned = false; bool only_signed = false; bool both = false; @@ -652,41 +634,28 @@ void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids) for (const auto & type : types) { TypeIndex type_id = type->getTypeId(); - bool is_max_bits = false; - if (type_id == TypeIndex::UInt8 || type_id == TypeIndex::Int8) - is_max_bits = maximize(max_bits_of_integer, 8, only_unsigned, only_signed, both); - else if (type_id == TypeIndex::UInt16 || type_id == TypeIndex::Int16) - is_max_bits = maximize(max_bits_of_integer, 16, only_unsigned, only_signed, both); - else if (type_id == TypeIndex::UInt32 || type_id == TypeIndex::Int32) - is_max_bits = maximize(max_bits_of_integer, 32, only_unsigned, only_signed, both); - else if (type_id == TypeIndex::UInt64 || type_id == TypeIndex::Int64) - is_max_bits = maximize(max_bits_of_integer, 64, only_unsigned, only_signed, both); - - if (is_max_bits) + bool type_is_unsigned = is_unsigned(type_id); + bool type_is_both = false; + for (const auto & possible_type : type->getPossiblePtr()) { - bool type_is_unsigned = is_unsigned(type_id); - bool type_is_both = false; - for (const auto & possible_type : type->getPossiblePtr()) + if (type_is_unsigned != is_unsigned(possible_type->getTypeId())) { - if (type_is_unsigned != is_unsigned(possible_type->getTypeId())) - { - type_is_both = true; - break; - } + type_is_both = true; + break; } - - if (type_is_unsigned) - has_unsigned = true; - else - has_signed = true; - - if (type_is_both) - both = true; - else if (type_is_unsigned) - only_unsigned = true; - else - only_signed = true; } + + if (type_is_unsigned) + has_unsigned = true; + else + has_signed = true; + + if (type_is_both) + both = true; + else if (type_is_unsigned) + only_unsigned = true; + else + only_signed = true; } auto optimize_type_id = [&is_unsigned](const DataTypePtr & type, bool try_change_unsigned) diff --git a/tests/queries/0_stateless/02832_integer_type_inference.reference b/tests/queries/0_stateless/02832_integer_type_inference.reference index e1bfb315d40..5a01bd4cd11 100644 --- a/tests/queries/0_stateless/02832_integer_type_inference.reference +++ b/tests/queries/0_stateless/02832_integer_type_inference.reference @@ -1 +1,2 @@ -[-4741124612489978151,-3236599669630092879] +[-4741124612489978151,-3236599669630092879,5607475129431807682] +[100,-100,5607475129431807682,5607475129431807683] diff --git a/tests/queries/0_stateless/02832_integer_type_inference.sql b/tests/queries/0_stateless/02832_integer_type_inference.sql index 9c558b3ae67..221e929d705 100644 --- a/tests/queries/0_stateless/02832_integer_type_inference.sql +++ b/tests/queries/0_stateless/02832_integer_type_inference.sql @@ -1 +1,2 @@ -select [-4741124612489978151, -3236599669630092879]; +select [-4741124612489978151, -3236599669630092879, 5607475129431807682]; +select [100, -100, 5607475129431807682, 5607475129431807683]; From 8fa1f69fdf7143d0fbf27ff93853e6f64844f23c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 8 Aug 2023 11:16:37 +0000 Subject: [PATCH 0157/1687] fix part columns modification time --- src/Storages/MergeTree/MergeTreeDataPartWide.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index b3ef45b46e5..1b0de863289 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -271,7 +271,8 @@ std::optional MergeTreeDataPartWide::getColumnModificationTime(const Str { try { - return getDataPartStorage().getFileLastModified(column_name + DATA_FILE_EXTENSION).epochTime(); + auto name_on_disk = checksums.getFileNameOrHash(column_name); + return getDataPartStorage().getFileLastModified(name_on_disk + DATA_FILE_EXTENSION).epochTime(); } catch (const fs::filesystem_error &) { From 1dbe007ee58337b7ae8894f8fbf1f8d9078bec27 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 8 Aug 2023 13:12:07 +0000 Subject: [PATCH 0158/1687] Add coordination setting for async replication --- src/Coordination/Changelog.cpp | 11 +++++------ src/Coordination/Changelog.h | 8 ++++++++ src/Coordination/CoordinationSettings.h | 6 ++++-- src/Coordination/KeeperLogStore.cpp | 5 ++--- src/Coordination/KeeperLogStore.h | 2 +- src/Coordination/KeeperServer.cpp | 13 +++++++++---- src/Coordination/KeeperStateManager.cpp | 18 +++++++++++++----- src/Coordination/KeeperStateManager.h | 4 ++-- 8 files changed, 44 insertions(+), 23 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 7f972f67846..2af68173588 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -586,13 +586,15 @@ private: std::unique_ptr read_buf; }; -Changelog::Changelog(Poco::Logger * log_, LogFileSettings log_file_settings, KeeperContextPtr keeper_context_) +Changelog::Changelog( + Poco::Logger * log_, LogFileSettings log_file_settings, FlushSettings flush_settings_, KeeperContextPtr keeper_context_) : changelogs_detached_dir("detached") , rotate_interval(log_file_settings.rotate_interval) , log(log_) , write_operations(std::numeric_limits::max()) , append_completion_queue(std::numeric_limits::max()) , keeper_context(std::move(keeper_context_)) + , flush_settings(flush_settings_) { if (auto latest_log_disk = getLatestLogDisk(); log_file_settings.force_sync && dynamic_cast(latest_log_disk.get()) == nullptr) @@ -1017,12 +1019,9 @@ void Changelog::writeThread() size_t pending_appends = 0; bool try_batch_flush = false; - /// turn into setting - static constexpr size_t max_flush_batch_size = 1000; - const auto flush_logs = [&](const auto & flush) { - LOG_INFO(log, "Flushing {} logs", pending_appends); + LOG_TRACE(log, "Flushing {} logs", pending_appends); { std::lock_guard writer_lock(writer_mutex); @@ -1088,7 +1087,7 @@ void Changelog::writeThread() if (batch_append_ok) { /// we can try batching more logs for flush - if (pending_appends < max_flush_batch_size) + if (pending_appends < flush_settings.max_flush_batch_size) { try_batch_flush = true; continue; diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 9789a6b03dd..dddcb9aa218 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -82,6 +82,11 @@ struct LogFileSettings uint64_t overallocate_size = 0; }; +struct FlushSettings +{ + uint64_t max_flush_batch_size = 1000; +}; + /// Simplest changelog with files rotation. /// No compression, no metadata, just entries with headers one by one. /// Able to read broken files/entries and discard them. Not thread safe. @@ -91,6 +96,7 @@ public: Changelog( Poco::Logger * log_, LogFileSettings log_file_settings, + FlushSettings flush_settings, KeeperContextPtr keeper_context_); Changelog(Changelog &&) = delete; @@ -229,6 +235,8 @@ private: KeeperContextPtr keeper_context; + const FlushSettings flush_settings; + bool initialized = false; }; diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index 220265b6cb0..80a7c245de3 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -38,8 +38,9 @@ struct Settings; M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \ M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \ M(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \ - M(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \ + M(UInt64, max_requests_batch_size, 1000, "Max size of batch of requests that can be sent to RAFT", 0) \ M(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \ + M(UInt64, max_flush_batch_size, 1000, "Max size of batch of requests that can be flushed together", 0) \ M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \ M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \ M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \ @@ -48,7 +49,8 @@ struct Settings; M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \ M(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \ M(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0) \ - M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) + M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \ + M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) diff --git a/src/Coordination/KeeperLogStore.cpp b/src/Coordination/KeeperLogStore.cpp index a9153475dbc..b55b083cb5c 100644 --- a/src/Coordination/KeeperLogStore.cpp +++ b/src/Coordination/KeeperLogStore.cpp @@ -6,9 +6,8 @@ namespace DB { -KeeperLogStore::KeeperLogStore(LogFileSettings log_file_settings, KeeperContextPtr keeper_context) - : log(&Poco::Logger::get("KeeperLogStore")) - , changelog(log, log_file_settings, keeper_context) +KeeperLogStore::KeeperLogStore(LogFileSettings log_file_settings, FlushSettings flush_settings, KeeperContextPtr keeper_context) + : log(&Poco::Logger::get("KeeperLogStore")), changelog(log, log_file_settings, flush_settings, keeper_context) { if (log_file_settings.force_sync) LOG_INFO(log, "force_sync enabled"); diff --git a/src/Coordination/KeeperLogStore.h b/src/Coordination/KeeperLogStore.h index 6e71d8c55cf..de9205241bd 100644 --- a/src/Coordination/KeeperLogStore.h +++ b/src/Coordination/KeeperLogStore.h @@ -14,7 +14,7 @@ namespace DB class KeeperLogStore : public nuraft::log_store { public: - KeeperLogStore(LogFileSettings log_file_settings, KeeperContextPtr keeper_context); + KeeperLogStore(LogFileSettings log_file_settings, FlushSettings flush_settings, KeeperContextPtr keeper_context); /// Read log storage from filesystem starting from last_commited_log_index void init(uint64_t last_commited_log_index, uint64_t logs_to_keep); diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 88b9f1cedb4..06d396e46ce 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -208,28 +208,33 @@ void KeeperServer::loadLatestConfig() { auto latest_snapshot_config = state_machine->getClusterConfig(); auto latest_log_store_config = state_manager->getLatestConfigFromLogStore(); + auto async_replication = coordination_settings->async_replication; if (latest_snapshot_config && latest_log_store_config) { if (latest_snapshot_config->get_log_idx() > latest_log_store_config->get_log_idx()) { LOG_INFO(log, "Will use config from snapshot with log index {}", latest_snapshot_config->get_log_idx()); + latest_snapshot_config->set_async_replication(async_replication); state_manager->save_config(*latest_snapshot_config); } else { - LOG_INFO(log, "Will use config from log store with log index {}", latest_snapshot_config->get_log_idx()); + LOG_INFO(log, "Will use config from log store with log index {}", latest_log_store_config->get_log_idx()); + latest_log_store_config->set_async_replication(async_replication); state_manager->save_config(*latest_log_store_config); } } else if (latest_snapshot_config) { LOG_INFO(log, "No config in log store, will use config from snapshot with log index {}", latest_snapshot_config->get_log_idx()); + latest_snapshot_config->set_async_replication(async_replication); state_manager->save_config(*latest_snapshot_config); } else if (latest_log_store_config) { LOG_INFO(log, "No config in snapshot, will use config from log store with log index {}", latest_log_store_config->get_log_idx()); + latest_log_store_config->set_async_replication(async_replication); state_manager->save_config(*latest_log_store_config); } else @@ -413,7 +418,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo loadLatestConfig(); - last_local_config = state_manager->parseServersConfiguration(config, true).cluster_config; + last_local_config = state_manager->parseServersConfiguration(config, true, coordination_settings->async_replication).cluster_config; launchRaftServer(config, enable_ipv6); @@ -837,12 +842,12 @@ bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction & action) ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config) { - auto diff = state_manager->getRaftConfigurationDiff(config); + auto diff = state_manager->getRaftConfigurationDiff(config, coordination_settings); if (!diff.empty()) { std::lock_guard lock{server_write_mutex}; - last_local_config = state_manager->parseServersConfiguration(config, true).cluster_config; + last_local_config = state_manager->parseServersConfiguration(config, true, coordination_settings->async_replication).cluster_config; } return diff; diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index 902c2ef595f..f52d0749b10 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -10,6 +10,7 @@ #include #include #include +#include "Coordination/CoordinationSettings.h" namespace DB { @@ -74,7 +75,7 @@ std::unordered_map getClientPorts(const Poco::Util::Abstrac /// 4. No duplicate IDs /// 5. Our ID present in hostnames list KeeperStateManager::KeeperConfigurationWrapper -KeeperStateManager::parseServersConfiguration(const Poco::Util::AbstractConfiguration & config, bool allow_without_us) const +KeeperStateManager::parseServersConfiguration(const Poco::Util::AbstractConfiguration & config, bool allow_without_us, bool enable_async_replication) const { const bool hostname_checks_enabled = config.getBool(config_prefix + ".hostname_checks_enabled", true); @@ -184,7 +185,8 @@ KeeperStateManager::parseServersConfiguration(const Poco::Util::AbstractConfigur total_servers++; } - result.cluster_config->set_async_replication(true); + result.cluster_config->set_async_replication(enable_async_replication); + if (!result.config && !allow_without_us) throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section", my_server_id); @@ -221,6 +223,7 @@ KeeperStateManager::KeeperStateManager(int server_id_, const std::string & host, , secure(false) , log_store(nuraft::cs_new( LogFileSettings{.force_sync = false, .compress_logs = false, .rotate_interval = 5000}, + FlushSettings{}, keeper_context_)) , server_state_file_name("state") , keeper_context(keeper_context_) @@ -243,7 +246,7 @@ KeeperStateManager::KeeperStateManager( : my_server_id(my_server_id_) , secure(config.getBool(config_prefix_ + ".raft_configuration.secure", false)) , config_prefix(config_prefix_) - , configuration_wrapper(parseServersConfiguration(config, false)) + , configuration_wrapper(parseServersConfiguration(config, false, coordination_settings->async_replication)) , log_store(nuraft::cs_new( LogFileSettings { @@ -253,6 +256,10 @@ KeeperStateManager::KeeperStateManager( .max_size = coordination_settings->max_log_file_size, .overallocate_size = coordination_settings->log_file_overallocate_size }, + FlushSettings + { + .max_flush_batch_size = coordination_settings->max_flush_batch_size, + }, keeper_context_)) , server_state_file_name(server_state_file_name_) , keeper_context(keeper_context_) @@ -452,9 +459,10 @@ nuraft::ptr KeeperStateManager::read_state() return nullptr; } -ClusterUpdateActions KeeperStateManager::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config) const +ClusterUpdateActions KeeperStateManager::getRaftConfigurationDiff( + const Poco::Util::AbstractConfiguration & config, const CoordinationSettingsPtr & coordination_settings) const { - auto new_configuration_wrapper = parseServersConfiguration(config, true); + auto new_configuration_wrapper = parseServersConfiguration(config, true, coordination_settings->async_replication); std::unordered_map new_ids, old_ids; for (const auto & new_server : new_configuration_wrapper.cluster_config->get_servers()) diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h index 5abeea604b5..e402143c179 100644 --- a/src/Coordination/KeeperStateManager.h +++ b/src/Coordination/KeeperStateManager.h @@ -93,7 +93,7 @@ public: ClusterConfigPtr getLatestConfigFromLogStore() const; // TODO (myrrc) This should be removed once "reconfig" is stabilized - ClusterUpdateActions getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config) const; + ClusterUpdateActions getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config, const CoordinationSettingsPtr & coordination_settings) const; private: const String & getOldServerStatePath(); @@ -131,7 +131,7 @@ private: public: /// Parse configuration from xml config. - KeeperConfigurationWrapper parseServersConfiguration(const Poco::Util::AbstractConfiguration & config, bool allow_without_us) const; + KeeperConfigurationWrapper parseServersConfiguration(const Poco::Util::AbstractConfiguration & config, bool allow_without_us, bool enable_async_replication) const; }; } From 0a4c3f1dac3fa3a7be335e8a9f6b44a5a145d579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Tue, 8 Aug 2023 15:16:31 +0000 Subject: [PATCH 0159/1687] Fix --- src/Compression/CompressionCodecGCD.cpp | 177 +++++++++++++++++++++--- src/Compression/CompressionFactory.cpp | 4 +- 2 files changed, 161 insertions(+), 20 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index c5de17e092a..74b633708c2 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -7,18 +7,55 @@ #include #include #include "Common/Exception.h" +#include "base/Decimal_fwd.h" #include "base/types.h" #include "config.h" +#include #include namespace DB { +enum class GCDTypes : Int8 +{ + UInt8_type = 1, + UInt16_type = 2, + UInt32_type = 4, + UInt64_type = 8, + UInt128_type = 16, + UInt256_type = 32, + Int8_type = -1, + Int16_type = -2, + Int32_type = -4, + Int64_type = -8, + Int128_type = -16, + Int256_type = -32, +}; + +template +T gcd_func(T a, T b) +{ + if (a < 0) + { + a = -a; + } + if (b < 0) { + b = -b; + } + while (b != 0) + { + T c = a % b; + a = b; + b = c; + } + return a; +} + class CompressionCodecGCD : public ICompressionCodec { public: - explicit CompressionCodecGCD(UInt8 gcd_bytes_size_); + explicit CompressionCodecGCD(Int8 gcd_bytes_size_); uint8_t getMethodByte() const override; @@ -33,7 +70,7 @@ protected: bool isGenericCompression() const override { return false; } private: - const UInt8 gcd_bytes_size; + const Int8 gcd_bytes_size; }; @@ -46,10 +83,10 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -CompressionCodecGCD::CompressionCodecGCD(UInt8 gcd_bytes_size_) +CompressionCodecGCD::CompressionCodecGCD(Int8 gcd_bytes_size_) : gcd_bytes_size(gcd_bytes_size_) { - setCodecDescription("GCD", {std::make_shared(static_cast(gcd_bytes_size))}); + setCodecDescription("GCD", {std::make_shared(static_cast(gcd_bytes_size))}); } uint8_t CompressionCodecGCD::getMethodByte() const @@ -83,7 +120,7 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest) } else { - gcd = std::gcd(gcd, unalignedLoad(cur_source)); + gcd = gcd_func(gcd, unalignedLoad(cur_source)); } } @@ -130,7 +167,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_size, char * dest) const { - UInt8 bytes_to_skip = source_size % gcd_bytes_size; + UInt8 bytes_to_skip = source_size % abs(gcd_bytes_size); dest[0] = gcd_bytes_size; dest[1] = bytes_to_skip; /// unused (backward compatibility) memcpy(&dest[2], source, bytes_to_skip); @@ -149,6 +186,30 @@ UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_si case 8: compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); break; + case 16: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; + case 32: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; + case -1: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; + case -2: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; + case -4: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; + case -8: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; + case -16: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; + case -32: + compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + break; } return 1 + 1 + source_size; } @@ -161,12 +222,13 @@ void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_si if (uncompressed_size == 0) return; - UInt8 bytes_size = source[0]; + Int8 bytes_size = source[0]; - if (!(bytes_size == 1 || bytes_size == 2 || bytes_size == 4 || bytes_size == 8)) + if (!(bytes_size == 1 || bytes_size == 2 || bytes_size == 4 || bytes_size == 8 || bytes_size == 16 || bytes_size == 32 || + bytes_size == -1 || bytes_size == -2 || bytes_size == -4 || bytes_size == -8 || bytes_size == -16 || bytes_size == -32)) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress. File has wrong header"); - UInt8 bytes_to_skip = uncompressed_size % bytes_size; + UInt8 bytes_to_skip = uncompressed_size % abs(bytes_size); UInt32 output_size = uncompressed_size - bytes_to_skip; if (static_cast(2 + bytes_to_skip) > source_size) @@ -188,24 +250,102 @@ void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_si case 8: decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); break; + case 16: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; + case 32: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; + case -1: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; + case -2: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; + case -4: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; + case -8: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; + case -16: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; + case -32: + decompressDataForType(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip], output_size); + break; } } namespace { -UInt8 getGCDBytesSize(const IDataType * column_type) +Int8 getGCDBytesSize(const IDataType * column_type) { if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is not applicable for {} because the data type is not of fixed size", column_type->getName()); size_t max_size = column_type->getSizeOfValueInMemory(); - if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8) - return static_cast(max_size); - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is only applicable for data types of size 1, 2, 4, 8 bytes. Given type {}", + if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8 || max_size == 16 || max_size == 32) + { + if (column_type->getName() == "UInt8") + { + return static_cast(GCDTypes::UInt8_type); + } + else if (column_type->getName() == "UInt16") + { + return static_cast(GCDTypes::UInt16_type); + } + else if (column_type->getName() == "UInt32") + { + return static_cast(GCDTypes::UInt32_type); + } + else if (column_type->getName() == "UInt64") + { + return static_cast(GCDTypes::UInt64_type); + } + else if (column_type->getName() == "UInt128") + { + return static_cast(GCDTypes::UInt128_type); + } + else if (column_type->getName() == "UInt256") + { + return static_cast(GCDTypes::UInt256_type); + } + else if (column_type->getName() == "Int8") + { + return static_cast(GCDTypes::Int8_type); + } + else if (column_type->getName() == "Int16") + { + return static_cast(GCDTypes::Int16_type); + } + else if (column_type->getName() == "Int32") + { + return static_cast(GCDTypes::Int32_type); + } + else if (column_type->getName() == "Int64") + { + return static_cast(GCDTypes::Int64_type); + } + else if (column_type->getName() == "Int128") + { + return static_cast(GCDTypes::Int128_type); + } + else if (column_type->getName() == "Int256") + { + return static_cast(GCDTypes::Int256_type); + } else { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is only applicable for data types of size 1, 2, 4, 8, 16, 32 bytes. Given type {}", column_type->getName()); + } + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is only applicable for data types of size 1, 2, 4, 8, 16, 32 bytes. Given type {}", + column_type->getName()); + } } } @@ -216,7 +356,7 @@ void registerCodecGCD(CompressionCodecFactory & factory) auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr { /// Default bytes size is 1. - UInt8 gcd_bytes_size = 1; + Int8 gcd_bytes_size = 1; if (arguments && !arguments->children.empty()) { @@ -228,10 +368,11 @@ void registerCodecGCD(CompressionCodecFactory & factory) if (!literal || literal->value.getType() != Field::Types::Which::UInt64) throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD codec argument must be unsigned integer"); - size_t user_bytes_size = literal->value.safeGet(); - if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8) + Int64 user_bytes_size = literal->value.safeGet(); + if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8 && user_bytes_size != 16 && user_bytes_size != 32 && + user_bytes_size != -1 && user_bytes_size != -2 && user_bytes_size != -4 && user_bytes_size != -8 && user_bytes_size != -16 && user_bytes_size != -32) throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD value for GCD codec can be 1, 2, 4 or 8, given {}", user_bytes_size); - gcd_bytes_size = static_cast(user_bytes_size); + gcd_bytes_size = static_cast(user_bytes_size); } else if (column_type) { diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 79382165051..4a1425dbbed 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -169,6 +169,7 @@ void registerCodecLZ4HC(CompressionCodecFactory & factory); void registerCodecZSTD(CompressionCodecFactory & factory); void registerCodecMultiple(CompressionCodecFactory & factory); void registerCodecDeflateQpl(CompressionCodecFactory & factory); +void registerCodecGCD(CompressionCodecFactory & factory); /// Keeper use only general-purpose codecs, so we don't need these special codecs /// in standalone build @@ -179,7 +180,6 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory); void registerCodecGorilla(CompressionCodecFactory & factory); void registerCodecEncrypted(CompressionCodecFactory & factory); void registerCodecFPC(CompressionCodecFactory & factory); -void registerCodecGCD(CompressionCodecFactory & factory); #endif CompressionCodecFactory::CompressionCodecFactory() @@ -189,6 +189,7 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecZSTD(*this); registerCodecLZ4HC(*this); registerCodecMultiple(*this); + registerCodecGCD(*this); #ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD registerCodecDelta(*this); registerCodecT64(*this); @@ -196,7 +197,6 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecGorilla(*this); registerCodecEncrypted(*this); registerCodecFPC(*this); - registerCodecGCD(*this); #ifdef ENABLE_QPL_COMPRESSION registerCodecDeflateQpl(*this); #endif From bdc3cdbb6758865ddd62067b6d7f6e09456ba454 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Tue, 8 Aug 2023 17:11:32 +0000 Subject: [PATCH 0160/1687] Style --- src/Compression/CompressionCodecGCD.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 74b633708c2..314ae0932dc 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -40,7 +40,8 @@ T gcd_func(T a, T b) { a = -a; } - if (b < 0) { + if (b < 0) + { b = -b; } while (b != 0) @@ -144,9 +145,8 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, if (source_size % sizeof(T) != 0) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress, data size {} is not aligned to {}", source_size, sizeof(T)); - if (source_size < sizeof(T)) { + if (source_size < sizeof(T)) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress, data size {} is less than {}", source_size, sizeof(T)); - } const char * const source_end = source + source_size; const T gcd = unalignedLoad(source); From f533eeaf9514a35b65be242252726563ef816dc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Tue, 8 Aug 2023 17:26:01 +0000 Subject: [PATCH 0161/1687] Moved registerCodecGCD to ifndef --- src/Compression/CompressionFactory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 4a1425dbbed..79382165051 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -169,7 +169,6 @@ void registerCodecLZ4HC(CompressionCodecFactory & factory); void registerCodecZSTD(CompressionCodecFactory & factory); void registerCodecMultiple(CompressionCodecFactory & factory); void registerCodecDeflateQpl(CompressionCodecFactory & factory); -void registerCodecGCD(CompressionCodecFactory & factory); /// Keeper use only general-purpose codecs, so we don't need these special codecs /// in standalone build @@ -180,6 +179,7 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory); void registerCodecGorilla(CompressionCodecFactory & factory); void registerCodecEncrypted(CompressionCodecFactory & factory); void registerCodecFPC(CompressionCodecFactory & factory); +void registerCodecGCD(CompressionCodecFactory & factory); #endif CompressionCodecFactory::CompressionCodecFactory() @@ -189,7 +189,6 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecZSTD(*this); registerCodecLZ4HC(*this); registerCodecMultiple(*this); - registerCodecGCD(*this); #ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD registerCodecDelta(*this); registerCodecT64(*this); @@ -197,6 +196,7 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecGorilla(*this); registerCodecEncrypted(*this); registerCodecFPC(*this); + registerCodecGCD(*this); #ifdef ENABLE_QPL_COMPRESSION registerCodecDeflateQpl(*this); #endif From f52d4b5ccfa33c770f1ec7598c9368662a430a05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Tue, 8 Aug 2023 17:40:16 +0000 Subject: [PATCH 0162/1687] Style --- src/Compression/CompressionCodecGCD.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 314ae0932dc..2fb7d548cca 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -65,7 +65,7 @@ public: protected: UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; - UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override { return uncompressed_size + 2; } + UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; bool isCompression() const override { return false; } bool isGenericCompression() const override { return false; } @@ -84,6 +84,11 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + UInt32 CompressionCodecGCD::getMaxCompressedDataSize(UInt32 uncompressed_size) const override + { + return uncompressed_size + 2; +} + CompressionCodecGCD::CompressionCodecGCD(Int8 gcd_bytes_size_) : gcd_bytes_size(gcd_bytes_size_) { @@ -336,7 +341,9 @@ Int8 getGCDBytesSize(const IDataType * column_type) else if (column_type->getName() == "Int256") { return static_cast(GCDTypes::Int256_type); - } else { + } + else + { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec GCD is only applicable for data types of size 1, 2, 4, 8, 16, 32 bytes. Given type {}", column_type->getName()); } From d8904ffa694500676734c039fb04ddda3b7e00d0 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Tue, 8 Aug 2023 23:48:23 +0200 Subject: [PATCH 0163/1687] Simplified prepared statements handling for MySQL interface --- src/Core/MySQL/PacketsGeneric.h | 3 + src/Core/MySQL/PacketsPreparedStatements.cpp | 40 +++++++++++ src/Core/MySQL/PacketsPreparedStatements.h | 35 +++++++++ src/Server/MySQLHandler.cpp | 76 +++++++++++++++++++- src/Server/MySQLHandler.h | 10 +++ 5 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 src/Core/MySQL/PacketsPreparedStatements.cpp create mode 100644 src/Core/MySQL/PacketsPreparedStatements.h diff --git a/src/Core/MySQL/PacketsGeneric.h b/src/Core/MySQL/PacketsGeneric.h index cf990bbf15c..969716dfc7a 100644 --- a/src/Core/MySQL/PacketsGeneric.h +++ b/src/Core/MySQL/PacketsGeneric.h @@ -54,6 +54,9 @@ enum Command COM_CHANGE_USER = 0x11, COM_BINLOG_DUMP = 0x12, COM_REGISTER_SLAVE = 0x15, + COM_STMT_PREPARE = 0x16, + COM_STMT_EXECUTE = 0x17, + COM_STMT_CLOSE = 0x19, COM_RESET_CONNECTION = 0x1f, COM_DAEMON = 0x1d, COM_BINLOG_DUMP_GTID = 0x1e diff --git a/src/Core/MySQL/PacketsPreparedStatements.cpp b/src/Core/MySQL/PacketsPreparedStatements.cpp new file mode 100644 index 00000000000..eddded585d5 --- /dev/null +++ b/src/Core/MySQL/PacketsPreparedStatements.cpp @@ -0,0 +1,40 @@ +#include +#include +#include +#include + +namespace DB +{ +namespace MySQLProtocol +{ + namespace PreparedStatements + { + size_t PrepareStatementResponseOK::getPayloadSize() const + { + return 13; + } + + void PrepareStatementResponseOK::writePayloadImpl(WriteBuffer & buffer) const + { + buffer.write(reinterpret_cast(&status), 1); + buffer.write(reinterpret_cast(&statement_id), 4); + buffer.write(reinterpret_cast(&num_columns), 2); + buffer.write(reinterpret_cast(&num_params), 2); + buffer.write(reinterpret_cast(&reserved_1), 1); + buffer.write(reinterpret_cast(&warnings_count), 2); + buffer.write(0x0); // RESULTSET_METADATA_NONE + } + + void PrepareStatementResponseOK::readPayloadImpl([[maybe_unused]] ReadBuffer & payload) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PrepareStatementResponseOK::readPayloadImpl is not implemented"); + } + + PrepareStatementResponseOK::PrepareStatementResponseOK( + uint32_t statement_id_, uint16_t num_columns_, uint16_t num_params_, uint16_t warnings_count_) + : statement_id(statement_id_), num_columns(num_columns_), num_params(num_params_), warnings_count(warnings_count_) + { + } + } +} +} diff --git a/src/Core/MySQL/PacketsPreparedStatements.h b/src/Core/MySQL/PacketsPreparedStatements.h new file mode 100644 index 00000000000..7f180d4d142 --- /dev/null +++ b/src/Core/MySQL/PacketsPreparedStatements.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + +namespace DB +{ +namespace MySQLProtocol +{ + namespace PreparedStatements + { + // https://dev.mysql.com/doc/dev/mysql-server/latest/page_protocol_com_stmt_prepare.html#sect_protocol_com_stmt_prepare_response_ok + class PrepareStatementResponseOK : public IMySQLWritePacket, public IMySQLReadPacket + { + public: + uint8_t status = 0x00; + uint32_t statement_id; + uint16_t num_columns; + uint16_t num_params; + uint8_t reserved_1 = 0; + uint16_t warnings_count; + + protected: + size_t getPayloadSize() const override; + + void readPayloadImpl(ReadBuffer & payload) override; + + void writePayloadImpl(WriteBuffer & buffer) const override; + + public: + PrepareStatementResponseOK(uint32_t statement_id_, uint16_t num_columns_, uint16_t num_params_, uint16_t warnings_count_); + }; + } +} +} diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index f98b86e6cf8..a0018d0dc27 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ using namespace MySQLProtocol; using namespace MySQLProtocol::Generic; using namespace MySQLProtocol::ProtocolText; using namespace MySQLProtocol::ConnectionPhase; +using namespace MySQLProtocol::PreparedStatements; #if USE_SSL using Poco::Net::SecureStreamSocket; @@ -181,6 +183,15 @@ void MySQLHandler::run() case COM_PING: comPing(); break; + case COM_STMT_PREPARE: + comStmtPrepare(payload); + break; + case COM_STMT_EXECUTE: + comStmtExecute(payload); + break; + case COM_STMT_CLOSE: + comStmtClose(payload); + break; default: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Command {} is not implemented.", command); } @@ -254,7 +265,8 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl { try { - // For compatibility with JavaScript MySQL client, Native41 authentication plugin is used when possible (if password is specified using double SHA1). Otherwise SHA256 plugin is used. + // For compatibility with JavaScript MySQL client, Native41 authentication plugin is used when possible + // (if password is specified using double SHA1). Otherwise, SHA256 plugin is used. if (session->getAuthenticationTypeOrLogInFailure(user_name) == DB::AuthenticationType::SHA256_PASSWORD) { authPluginSSL(); @@ -371,6 +383,68 @@ void MySQLHandler::comQuery(ReadBuffer & payload) } } +void MySQLHandler::comStmtPrepare(DB::ReadBuffer & payload) +{ + String query; + readStringUntilEOF(query, payload); + + uint32_t statement_id = current_prepared_statement_id; + if (current_prepared_statement_id == std::numeric_limits::max()) + { + current_prepared_statement_id = 0; + } + else + { + current_prepared_statement_id++; + } + + // Key collisions should not happen here, as we remove the elements from the map with COM_STMT_CLOSE, + // and we have quite a big range of available identifiers with 32-bit unsigned integer + if (prepared_statements_map.contains(statement_id)) [[unlikely]] + { + LOG_ERROR( + log, + "Failed to store a new statement `{}` with id {}; it is already taken by `{}`", + query, + statement_id, + prepared_statements_map.at(statement_id)); + packet_endpoint->sendPacket(ERRPacket(), true); + return; + } + prepared_statements_map.emplace(statement_id, query); + + packet_endpoint->sendPacket(PrepareStatementResponseOK(statement_id, 0, 0, 0), true); +} + +void MySQLHandler::comStmtExecute(ReadBuffer & payload) +{ + uint32_t statement_id; + payload.readStrict(reinterpret_cast(&statement_id), 4); + + if (!prepared_statements_map.contains(statement_id)) [[unlikely]] + { + LOG_ERROR(log, "Could not find prepared statement with id {}", statement_id); + packet_endpoint->sendPacket(ERRPacket(), true); + return; + } + + // Temporary workaround as we work only with queries that do not bind any parameters atm + ReadBufferFromString com_query_payload(prepared_statements_map.at(statement_id)); + MySQLHandler::comQuery(com_query_payload); +}; + +void MySQLHandler::comStmtClose([[maybe_unused]] ReadBuffer & payload) { + uint32_t statement_id; + payload.readStrict(reinterpret_cast(&statement_id), 4); + + if (prepared_statements_map.contains(statement_id)) { + prepared_statements_map.erase(statement_id); + } + + // https://dev.mysql.com/doc/dev/mysql-server/latest/page_protocol_com_stmt_close.html + // No response packet is sent back to the client. +}; + void MySQLHandler::authPluginSSL() { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 3366e8792c9..6b8cc56a46e 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -56,6 +56,12 @@ protected: void authenticate(const String & user_name, const String & auth_plugin_name, const String & auth_response); + void comStmtPrepare(ReadBuffer & payload); + + void comStmtExecute(ReadBuffer & payload); + + void comStmtClose(ReadBuffer & payload); + virtual void authPluginSSL(); virtual void finishHandshakeSSL(size_t packet_size, char * buf, size_t pos, std::function read_bytes, MySQLProtocol::ConnectionPhase::HandshakeResponse & packet); @@ -76,6 +82,10 @@ protected: using Replacements = std::unordered_map; Replacements replacements; + uint32_t current_prepared_statement_id = 0; + using PreparedStatementsMap = std::unordered_map; + PreparedStatementsMap prepared_statements_map; + std::unique_ptr auth_plugin; std::shared_ptr in; std::shared_ptr out; From ef3e0f7b836864d1ddcc9d114d319f639654e661 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 9 Aug 2023 06:25:47 +0000 Subject: [PATCH 0164/1687] Use async replication in tests --- tests/config/config.d/keeper_port.xml | 2 ++ .../helpers/0_common_enable_keeper_async_replication.xml | 7 +++++++ tests/integration/helpers/cluster.py | 2 ++ tests/integration/helpers/keeper_config1.xml | 2 ++ tests/integration/helpers/keeper_config2.xml | 2 ++ tests/integration/helpers/keeper_config3.xml | 2 ++ tests/jepsen.clickhouse/resources/keeper_config.xml | 1 + tests/jepsen.clickhouse/resources/keeper_config_solo.xml | 1 + 8 files changed, 19 insertions(+) create mode 100644 tests/integration/helpers/0_common_enable_keeper_async_replication.xml diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml index 7db174c5419..beac507304f 100644 --- a/tests/config/config.d/keeper_port.xml +++ b/tests/config/config.d/keeper_port.xml @@ -19,6 +19,8 @@ 0 0 0 + + 1 diff --git a/tests/integration/helpers/0_common_enable_keeper_async_replication.xml b/tests/integration/helpers/0_common_enable_keeper_async_replication.xml new file mode 100644 index 00000000000..4ecada09444 --- /dev/null +++ b/tests/integration/helpers/0_common_enable_keeper_async_replication.xml @@ -0,0 +1,7 @@ + + + + 1 + + + \ No newline at end of file diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index eff44de842a..1f3cd473e29 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -4258,6 +4258,8 @@ class ClickHouseInstance: if len(self.custom_dictionaries_paths): write_embedded_config("0_common_enable_dictionaries.xml", self.config_d_dir) + write_embedded_config("0_common_enable_keeper_async_replication.xml", self.config_d_dir) + logging.debug("Generate and write macros file") macros = self.macros.copy() macros["instance"] = self.name diff --git a/tests/integration/helpers/keeper_config1.xml b/tests/integration/helpers/keeper_config1.xml index f40ed9ac6fa..7702aecba9c 100644 --- a/tests/integration/helpers/keeper_config1.xml +++ b/tests/integration/helpers/keeper_config1.xml @@ -20,6 +20,8 @@ false 2000 4000 + + 1 diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml index d5bdb92a79d..2a1a1c1003c 100644 --- a/tests/integration/helpers/keeper_config2.xml +++ b/tests/integration/helpers/keeper_config2.xml @@ -20,6 +20,8 @@ false 2000 4000 + + 1 diff --git a/tests/integration/helpers/keeper_config3.xml b/tests/integration/helpers/keeper_config3.xml index aa69b554660..035da1bbd22 100644 --- a/tests/integration/helpers/keeper_config3.xml +++ b/tests/integration/helpers/keeper_config3.xml @@ -20,6 +20,8 @@ false 2000 4000 + + 1 diff --git a/tests/jepsen.clickhouse/resources/keeper_config.xml b/tests/jepsen.clickhouse/resources/keeper_config.xml index 1972ef6b917..6bc4ad89839 100644 --- a/tests/jepsen.clickhouse/resources/keeper_config.xml +++ b/tests/jepsen.clickhouse/resources/keeper_config.xml @@ -47,6 +47,7 @@ {snapshot_distance} {stale_log_gap} {reserved_log_items} + 1 diff --git a/tests/jepsen.clickhouse/resources/keeper_config_solo.xml b/tests/jepsen.clickhouse/resources/keeper_config_solo.xml index b20592545cd..0054cad8f85 100644 --- a/tests/jepsen.clickhouse/resources/keeper_config_solo.xml +++ b/tests/jepsen.clickhouse/resources/keeper_config_solo.xml @@ -21,6 +21,7 @@ 1000 2000 4000 + 1 From 5af6a89ee6ae2cf74fb617717a29ed1ba806f5b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Wed, 9 Aug 2023 10:51:55 +0000 Subject: [PATCH 0165/1687] Style --- src/Compression/CompressionCodecGCD.cpp | 6 +++--- src/Compression/CompressionInfo.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 2fb7d548cca..6e9aa28fe13 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -66,7 +66,7 @@ protected: UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; - + bool isCompression() const override { return false; } bool isGenericCompression() const override { return false; } @@ -84,8 +84,8 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } - UInt32 CompressionCodecGCD::getMaxCompressedDataSize(UInt32 uncompressed_size) const override - { + UInt32 CompressionCodecGCD::getMaxCompressedDataSize(UInt32 uncompressed_size) const +{ return uncompressed_size + 2; } diff --git a/src/Compression/CompressionInfo.h b/src/Compression/CompressionInfo.h index 6c5b478294b..ed63fc5e73a 100644 --- a/src/Compression/CompressionInfo.h +++ b/src/Compression/CompressionInfo.h @@ -47,7 +47,7 @@ enum class CompressionMethodByte : uint8_t AES_256_GCM_SIV = 0x97, FPC = 0x98, DeflateQpl = 0x99, - GCD = 0x69, // TODO: IDK why + GCD = 0x69, }; } From d5004900d3e3bbf5c9bf6057a69840783b9c1a3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Wed, 9 Aug 2023 11:18:19 +0000 Subject: [PATCH 0166/1687] Fixed bugs --- src/Compression/CompressionCodecGCD.cpp | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 6e9aa28fe13..fced7270f8d 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -36,20 +36,15 @@ enum class GCDTypes : Int8 template T gcd_func(T a, T b) { - if (a < 0) - { - a = -a; - } - if (b < 0) - { - b = -b; - } while (b != 0) { T c = a % b; a = b; b = c; } + if (a == T(-1)) { + return -a; + } return a; } @@ -165,7 +160,6 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, source += sizeof(T); dest += sizeof(T); } - } } @@ -372,13 +366,13 @@ void registerCodecGCD(CompressionCodecFactory & factory) const auto children = arguments->children; const auto * literal = children[0]->as(); - if (!literal || literal->value.getType() != Field::Types::Which::UInt64) - throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD codec argument must be unsigned integer"); + if (!literal || (literal->value.getType() != Field::Types::Which::Int64 && literal->value.getType() != Field::Types::Which::UInt64)) + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD codec argument must be integer"); Int64 user_bytes_size = literal->value.safeGet(); if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8 && user_bytes_size != 16 && user_bytes_size != 32 && user_bytes_size != -1 && user_bytes_size != -2 && user_bytes_size != -4 && user_bytes_size != -8 && user_bytes_size != -16 && user_bytes_size != -32) - throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD value for GCD codec can be 1, 2, 4 or 8, given {}", user_bytes_size); + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD value for GCD codec can be +-1, +-2, +-4, +-8, +-16 or +-32, given {}", user_bytes_size); gcd_bytes_size = static_cast(user_bytes_size); } else if (column_type) From 086d0741a5ae14279571422493b8d8014460100c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Wed, 9 Aug 2023 11:30:38 +0000 Subject: [PATCH 0167/1687] Style --- src/Compression/CompressionCodecGCD.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index fced7270f8d..b3d8c359c45 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -42,7 +42,8 @@ T gcd_func(T a, T b) a = b; b = c; } - if (a == T(-1)) { + if (a == T(-1)) + { return -a; } return a; @@ -79,7 +80,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } - UInt32 CompressionCodecGCD::getMaxCompressedDataSize(UInt32 uncompressed_size) const +UInt32 CompressionCodecGCD::getMaxCompressedDataSize(UInt32 uncompressed_size) const { return uncompressed_size + 2; } From 7fdb414793e533444808d111fd29f8fda904e15e Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Wed, 9 Aug 2023 15:18:05 +0200 Subject: [PATCH 0168/1687] One more [[unlikely]] --- src/Server/MySQLHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index a0018d0dc27..868575b701f 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -389,7 +389,7 @@ void MySQLHandler::comStmtPrepare(DB::ReadBuffer & payload) readStringUntilEOF(query, payload); uint32_t statement_id = current_prepared_statement_id; - if (current_prepared_statement_id == std::numeric_limits::max()) + if (current_prepared_statement_id == std::numeric_limits::max()) [[unlikely]] { current_prepared_statement_id = 0; } From efb041c4d5482a9acc26aa718d5c9b47e1ad95ab Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Wed, 9 Aug 2023 23:03:50 +0800 Subject: [PATCH 0169/1687] Optimize the code and streamline the functionality by retaining only the feature of automatically recognizing UInt64 as Int64. --- src/Common/ErrorCodes.cpp | 3 + src/DataTypes/DataTypesNumber.h | 19 +++-- src/DataTypes/FieldToDataType.cpp | 23 +----- src/DataTypes/IDataType.cpp | 6 ++ src/DataTypes/IDataType.h | 11 ++- src/DataTypes/getLeastSupertype.cpp | 113 ++++++---------------------- 6 files changed, 61 insertions(+), 114 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index ae8d5f8796d..3df92c941e8 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -582,6 +582,9 @@ M(697, CANNOT_RESTORE_TO_NONENCRYPTED_DISK) \ M(698, INVALID_REDIS_STORAGE_TYPE) \ M(699, INVALID_REDIS_TABLE_STRUCTURE) \ + M(700, USER_SESSION_LIMIT_EXCEEDED) \ + M(701, CLUSTER_DOESNT_EXIST) \ + M(702, OPPOSITE_SIGN_DATA_TYPE_NOT_FOUND) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/DataTypes/DataTypesNumber.h b/src/DataTypes/DataTypesNumber.h index 8511955e193..6be07db1062 100644 --- a/src/DataTypes/DataTypesNumber.h +++ b/src/DataTypes/DataTypesNumber.h @@ -18,9 +18,10 @@ class DataTypeNumber final : public DataTypeNumberBase public: DataTypeNumber() = default; - explicit DataTypeNumber(DataTypes data_types) + explicit DataTypeNumber(DataTypePtr opposite_sign_data_type_) : DataTypeNumberBase() - , possible_data_types(std::move(data_types)) + , opposite_sign_data_type(std::move(opposite_sign_data_type_)) + , has_opposite_sign_data_type(true) { } @@ -39,15 +40,23 @@ public: return std::make_shared(); } + bool hasOppositeSignDataType() const override { return has_opposite_sign_data_type; } + DataTypePtr oppositeSignDataType() const override + { + if (!has_opposite_sign_data_type) + IDataType::oppositeSignDataType(); + + return opposite_sign_data_type; + } + SerializationPtr doGetDefaultSerialization() const override { return std::make_shared>(); } - DataTypes getPossiblePtr() const override { return possible_data_types; } - private: - DataTypes possible_data_types; + DataTypePtr opposite_sign_data_type; + bool has_opposite_sign_data_type = false; }; using DataTypeUInt8 = DataTypeNumber; diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index 9b8e476067e..2f759e79661 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -33,34 +33,19 @@ DataTypePtr FieldToDataType::operator() (const Null &) const template DataTypePtr FieldToDataType::operator() (const UInt64 & x) const { - if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); if (x <= std::numeric_limits::max()) return std::make_shared(); - if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); if (x <= std::numeric_limits::max()) return std::make_shared(); - if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); if (x <= std::numeric_limits::max()) return std::make_shared(); - if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); + if (x <= std::numeric_limits::max()) return std::make_shared(std::make_shared()); return std::make_shared(); } template DataTypePtr FieldToDataType::operator() (const Int64 & x) const { - if (x >= 0) - { - if (x <= std::numeric_limits::max()) return std::make_shared(); - if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); - if (x <= std::numeric_limits::max()) return std::make_shared(); - if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); - if (x <= std::numeric_limits::max()) return std::make_shared(); - if (x <= std::numeric_limits::max()) return std::make_shared(DataTypes{ std::make_shared() }); - } - else - { - if (x >= std::numeric_limits::min()) return std::make_shared(); - if (x >= std::numeric_limits::min()) return std::make_shared(); - if (x >= std::numeric_limits::min()) return std::make_shared(); - } + if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return std::make_shared(); + if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return std::make_shared(); + if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return std::make_shared(); return std::make_shared(); } diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 4ffe82039b2..221254e575e 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int DATA_TYPE_CANNOT_BE_PROMOTED; + extern const int OPPOSITE_SIGN_DATA_TYPE_NOT_FOUND; extern const int ILLEGAL_COLUMN; } @@ -71,6 +72,11 @@ DataTypePtr IDataType::promoteNumericType() const throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_PROMOTED, "Data type {} can't be promoted.", getName()); } +DataTypePtr IDataType::oppositeSignDataType() const +{ + throw Exception(ErrorCodes::OPPOSITE_SIGN_DATA_TYPE_NOT_FOUND, "Opposite sign data type not found for {}.", getName()); +} + size_t IDataType::getSizeOfValueInMemory() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Value of type {} in memory is not of fixed size.", getName()); diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 52962462d7e..867ed6df5e3 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -73,8 +73,6 @@ public: DataTypePtr getPtr() const { return shared_from_this(); } - virtual DataTypes getPossiblePtr() const { return {}; } - /// Name of data type family (example: FixedString, Array). virtual const char * getFamilyName() const = 0; /// Name of corresponding data type in MySQL (exampe: Bigint, Blob, etc) @@ -160,6 +158,15 @@ public: */ virtual DataTypePtr promoteNumericType() const; + /** The data type has an opposite sign DataTypePtr type. + * Data types that can have an opposite sign are typically signed or unsigned types. + */ + virtual bool hasOppositeSignDataType() const { return false; } + + /** Return the opposite sign data type of the current data type. Throw an exception if `hasOppositeSignDataType() == false`. + */ + virtual DataTypePtr oppositeSignDataType() const; + /** Directly insert default value into a column. Default implementation use method IColumn::insertDefault. * This should be overridden if data type default value differs from column default value (example: Enum data types). */ diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index d8be5fab91e..4c399e99caa 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -605,113 +605,50 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool optimize_type_ids) void optimizeTypeIds(const DataTypes & types, TypeIndexSet & type_ids) { - // Determine whether the type_id is UInt - auto is_unsigned = [](const TypeIndex & type_id) + auto is_signed_int = [](const TypeIndex & type_id) { switch (type_id) { - case TypeIndex::UInt8: - case TypeIndex::UInt16: - case TypeIndex::UInt32: - case TypeIndex::UInt64: + case TypeIndex::Int8: + case TypeIndex::Int16: + case TypeIndex::Int32: + case TypeIndex::Int64: return true; default: return false; } }; - bool only_unsigned = false; - bool only_signed = false; - bool both = false; - bool has_unsigned = false; - bool has_signed = false; + bool has_signed_int = false; + bool has_uint64_and_has_opposite = false; + TypeIndexSet opposite_type_ids; - // Determine the distribution of maximum signed and unsigned, Example: - // Int64, Int64 = only_signed. - // UInt64, UInt64 = only_unsigned. - // UInt64(possible: Int64), Int64(possible: UInt64) = both. - // UInt64(possible: Int64), Int64 = both, only_signed. + // Determine whether UInt64 in type_ids needs to change its sign. for (const auto & type : types) { - TypeIndex type_id = type->getTypeId(); - bool type_is_unsigned = is_unsigned(type_id); - bool type_is_both = false; - for (const auto & possible_type : type->getPossiblePtr()) + auto type_id = type->getTypeId(); + + if (!has_signed_int) + has_signed_int = is_signed_int(type_id); + + if (type_id == TypeIndex::UInt64) { - if (type_is_unsigned != is_unsigned(possible_type->getTypeId())) + if (!type->hasOppositeSignDataType()) { - type_is_both = true; - break; + has_uint64_and_has_opposite = false; + break ; + }else + { + has_uint64_and_has_opposite = true; + opposite_type_ids.insert(type->oppositeSignDataType()->getTypeId()); } } - - if (type_is_unsigned) - has_unsigned = true; - else - has_signed = true; - - if (type_is_both) - both = true; - else if (type_is_unsigned) - only_unsigned = true; - else - only_signed = true; } - auto optimize_type_id = [&is_unsigned](const DataTypePtr & type, bool try_change_unsigned) + if (has_uint64_and_has_opposite && has_signed_int) { - TypeIndex type_id = type->getTypeId(); - switch (type_id) - { - case TypeIndex::UInt8: - case TypeIndex::UInt16: - case TypeIndex::UInt32: - case TypeIndex::UInt64: - if (try_change_unsigned) - return type_id; - break ; - case TypeIndex::Int8: - case TypeIndex::Int16: - case TypeIndex::Int32: - case TypeIndex::Int64: - if (!try_change_unsigned) - return type_id; - break ; - default: - return type_id; - } - - for (const auto & other_type : type->getPossiblePtr()) - { - TypeIndex other_type_id = other_type->getTypeId(); - if ((try_change_unsigned && is_unsigned(other_type_id)) - || (!try_change_unsigned && !is_unsigned(other_type_id))) - { - return other_type_id; - } - } - - return type_id; - }; - - // optimize type_ids - if (both) - { - // Example: Int64(possible: UInt32), UInt64 = UInt32, UInt64 - if (only_unsigned && !only_signed) - { - type_ids.clear(); - for (const auto & type : types) - type_ids.insert(optimize_type_id(type, true)); - } - // Example: UInt64(possible: Int64), Int64 = Int64, Int64 - // Int64(possible: UInt32), UInt64(possible: Int64) = Int64, Int64 - else if ((only_signed && !only_unsigned) || (has_unsigned && has_signed && !only_signed && !only_unsigned)) - { - type_ids.clear(); - for (const auto & type : types) - type_ids.insert(optimize_type_id(type, false)); - } + type_ids.erase(TypeIndex::UInt64); + type_ids.insert(opposite_type_ids.begin(), opposite_type_ids.end()); } } From ef857f3646e9c6839a8236b74b09269de1cd8be3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Wed, 9 Aug 2023 21:32:37 +0000 Subject: [PATCH 0170/1687] Fix --- src/Compression/CompressionCodecGCD.cpp | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index b3d8c359c45..668123e2106 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -355,28 +355,12 @@ Int8 getGCDBytesSize(const IDataType * column_type) void registerCodecGCD(CompressionCodecFactory & factory) { UInt8 method_code = static_cast(CompressionMethodByte::GCD); - auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr + auto codec_builder = [&](const ASTPtr &, const IDataType * column_type) -> CompressionCodecPtr { /// Default bytes size is 1. Int8 gcd_bytes_size = 1; - if (arguments && !arguments->children.empty()) - { - if (arguments->children.size() > 1) - throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "GCD codec must have 1 parameter, given {}", arguments->children.size()); - - const auto children = arguments->children; - const auto * literal = children[0]->as(); - if (!literal || (literal->value.getType() != Field::Types::Which::Int64 && literal->value.getType() != Field::Types::Which::UInt64)) - throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD codec argument must be integer"); - - Int64 user_bytes_size = literal->value.safeGet(); - if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8 && user_bytes_size != 16 && user_bytes_size != 32 && - user_bytes_size != -1 && user_bytes_size != -2 && user_bytes_size != -4 && user_bytes_size != -8 && user_bytes_size != -16 && user_bytes_size != -32) - throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD value for GCD codec can be +-1, +-2, +-4, +-8, +-16 or +-32, given {}", user_bytes_size); - gcd_bytes_size = static_cast(user_bytes_size); - } - else if (column_type) + if (column_type) { gcd_bytes_size = getGCDBytesSize(column_type); } From b52655dd5a82e31b14b8276ddd5d1cc491627a68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Wed, 9 Aug 2023 21:48:40 +0000 Subject: [PATCH 0171/1687] Added exception for types --- src/Compression/CompressionCodecGCD.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 668123e2106..963f695e045 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -364,6 +364,10 @@ void registerCodecGCD(CompressionCodecFactory & factory) { gcd_bytes_size = getGCDBytesSize(column_type); } + else + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Not specified type for codec GCD"); + } return std::make_shared(gcd_bytes_size); }; From 43a827137f5e4b693a70240b5109769ecf0ef683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Wed, 9 Aug 2023 22:08:46 +0000 Subject: [PATCH 0172/1687] Deleted args --- src/Compression/CompressionCodecGCD.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 963f695e045..bfbc32b82bb 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -355,8 +355,12 @@ Int8 getGCDBytesSize(const IDataType * column_type) void registerCodecGCD(CompressionCodecFactory & factory) { UInt8 method_code = static_cast(CompressionMethodByte::GCD); - auto codec_builder = [&](const ASTPtr &, const IDataType * column_type) -> CompressionCodecPtr + auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr { + + if (arguments && !arguments->children.empty() && !arguments->children.empty()) + throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "Delta codec must have 0 parameters, given {}", arguments->children.size()); + /// Default bytes size is 1. Int8 gcd_bytes_size = 1; @@ -364,10 +368,6 @@ void registerCodecGCD(CompressionCodecFactory & factory) { gcd_bytes_size = getGCDBytesSize(column_type); } - else - { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Not specified type for codec GCD"); - } return std::make_shared(gcd_bytes_size); }; From da2a041d6891058bee5bbed621a312526be8ef21 Mon Sep 17 00:00:00 2001 From: chen768959 <934103231@qq.com> Date: Thu, 10 Aug 2023 11:23:43 +0800 Subject: [PATCH 0173/1687] Temporarily modify ErrorCodes to avoid conflicts. --- src/Common/ErrorCodes.cpp | 3 --- src/DataTypes/IDataType.cpp | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 3df92c941e8..ae8d5f8796d 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -582,9 +582,6 @@ M(697, CANNOT_RESTORE_TO_NONENCRYPTED_DISK) \ M(698, INVALID_REDIS_STORAGE_TYPE) \ M(699, INVALID_REDIS_TABLE_STRUCTURE) \ - M(700, USER_SESSION_LIMIT_EXCEEDED) \ - M(701, CLUSTER_DOESNT_EXIST) \ - M(702, OPPOSITE_SIGN_DATA_TYPE_NOT_FOUND) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 221254e575e..092b9ed8e0e 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -23,7 +23,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int DATA_TYPE_CANNOT_BE_PROMOTED; - extern const int OPPOSITE_SIGN_DATA_TYPE_NOT_FOUND; extern const int ILLEGAL_COLUMN; } @@ -74,7 +73,8 @@ DataTypePtr IDataType::promoteNumericType() const DataTypePtr IDataType::oppositeSignDataType() const { - throw Exception(ErrorCodes::OPPOSITE_SIGN_DATA_TYPE_NOT_FOUND, "Opposite sign data type not found for {}.", getName()); + // TODO: Should use ErrorCodes::OPPOSITE_SIGN_DATA_TYPE_NOT_FOUND. + throw Exception(702, "Opposite sign data type not found for {}.", getName()); } size_t IDataType::getSizeOfValueInMemory() const From c8f0ee94ca8187ad75660d2146003a843f08c9d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Thu, 10 Aug 2023 07:38:04 +0000 Subject: [PATCH 0174/1687] Edited ErrorCodes --- src/Compression/CompressionCodecGCD.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index bfbc32b82bb..6a786e04646 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -75,7 +75,6 @@ namespace ErrorCodes { extern const int CANNOT_COMPRESS; extern const int CANNOT_DECOMPRESS; - extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; extern const int ILLEGAL_CODEC_PARAMETER; extern const int BAD_ARGUMENTS; } @@ -359,7 +358,7 @@ void registerCodecGCD(CompressionCodecFactory & factory) { if (arguments && !arguments->children.empty() && !arguments->children.empty()) - throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "Delta codec must have 0 parameters, given {}", arguments->children.size()); + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "Delta codec must have 0 parameters, given {}", arguments->children.size()); /// Default bytes size is 1. Int8 gcd_bytes_size = 1; From cb687e25690f6e2f16cfa7248573e770e8a5f69b Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Thu, 10 Aug 2023 12:47:30 +0300 Subject: [PATCH 0175/1687] Remove two redundant headers from Context.cpp --- src/Interpreters/Context.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a210a9efbc7..229ed637feb 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -75,8 +75,6 @@ #include #include #include -#include -#include #include #include #include From 3c299dc684e6d231157099c59820eb3474dcca6d Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Thu, 10 Aug 2023 12:56:57 +0300 Subject: [PATCH 0176/1687] Remove redundant forward declaration from IDataPartStorage.h --- src/Storages/MergeTree/IDataPartStorage.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 19af6085547..aa97b1531f1 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -55,8 +55,6 @@ struct MergeTreeDataPartChecksums; class IReservation; using ReservationPtr = std::unique_ptr; -class IStoragePolicy; - class IDisk; using DiskPtr = std::shared_ptr; From a89d71111303f00290b468fe07b4d3cdb709a2db Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Thu, 10 Aug 2023 13:29:20 +0300 Subject: [PATCH 0177/1687] Move BackupsWorker::Info to a separate file BackupOperationInfo.h --- src/Backups/BackupOperationInfo.h | 55 ++++++++++++++ src/Backups/BackupsWorker.cpp | 10 +-- src/Backups/BackupsWorker.h | 77 ++++---------------- src/Interpreters/InterpreterBackupQuery.cpp | 2 +- src/Storages/System/StorageSystemBackups.cpp | 2 +- 5 files changed, 77 insertions(+), 69 deletions(-) create mode 100644 src/Backups/BackupOperationInfo.h diff --git a/src/Backups/BackupOperationInfo.h b/src/Backups/BackupOperationInfo.h new file mode 100644 index 00000000000..4847dc42003 --- /dev/null +++ b/src/Backups/BackupOperationInfo.h @@ -0,0 +1,55 @@ +#pragma once + +#include + +namespace DB +{ + +/// Information about executing a BACKUP or RESTORE operation +struct BackupOperationInfo +{ + /// Operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID. + using ID = String; + + /// Operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID. + ID id; + + /// Operation name, a string like "Disk('backups', 'my_backup')" + String name; + + /// This operation is internal and should not be shown in system.backups + bool internal = false; + + /// Status of backup or restore operation. + BackupStatus status; + + /// The number of files stored in the backup. + size_t num_files = 0; + + /// The total size of files stored in the backup. + UInt64 total_size = 0; + + /// The number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder. + size_t num_entries = 0; + + /// The uncompressed size of the backup. + UInt64 uncompressed_size = 0; + + /// The compressed size of the backup. + UInt64 compressed_size = 0; + + /// Returns the number of files read during RESTORE from this backup. + size_t num_read_files = 0; + + // Returns the total size of files read during RESTORE from this backup. + UInt64 num_read_bytes = 0; + + /// Set only if there was an error. + std::exception_ptr exception; + String error_message; + + std::chrono::system_clock::time_point start_time; + std::chrono::system_clock::time_point end_time; +}; + +} diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 90e76ef9b46..326d023fb81 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -44,7 +44,7 @@ namespace ErrorCodes extern const int CONCURRENT_ACCESS_NOT_SUPPORTED; } -using OperationID = BackupsWorker::OperationID; +using OperationID = BackupOperationInfo::ID; namespace Stage = BackupCoordinationStage; namespace @@ -869,7 +869,7 @@ void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr void BackupsWorker::addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status) { - Info info; + BackupOperationInfo info; info.id = id; info.name = name; info.internal = internal; @@ -964,7 +964,7 @@ void BackupsWorker::wait(const OperationID & id, bool rethrow_exception) }); } -BackupsWorker::Info BackupsWorker::getInfo(const OperationID & id) const +BackupOperationInfo BackupsWorker::getInfo(const OperationID & id) const { std::lock_guard lock{infos_mutex}; auto it = infos.find(id); @@ -973,9 +973,9 @@ BackupsWorker::Info BackupsWorker::getInfo(const OperationID & id) const return it->second; } -std::vector BackupsWorker::getAllInfos() const +std::vector BackupsWorker::getAllInfos() const { - std::vector res_infos; + std::vector res_infos; std::lock_guard lock{infos_mutex}; for (const auto & info : infos | boost::adaptors::map_values) { diff --git a/src/Backups/BackupsWorker.h b/src/Backups/BackupsWorker.h index ab4359ec257..d5b62620f3e 100644 --- a/src/Backups/BackupsWorker.h +++ b/src/Backups/BackupsWorker.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -36,69 +36,22 @@ public: /// Waits until all tasks have been completed. void shutdown(); - /// Backup's or restore's operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID. - using OperationID = String; - /// Starts executing a BACKUP or RESTORE query. Returns ID of the operation. - OperationID start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context); + BackupOperationInfo::ID start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context); /// Waits until a BACKUP or RESTORE query started by start() is finished. /// The function returns immediately if the operation is already finished. - void wait(const OperationID & backup_or_restore_id, bool rethrow_exception = true); + void wait(const BackupOperationInfo::ID & backup_or_restore_id, bool rethrow_exception = true); - /// Information about executing a BACKUP or RESTORE query started by calling start(). - struct Info - { - /// Backup's or restore's operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID. - OperationID id; - - /// Backup's name, a string like "Disk('backups', 'my_backup')" - String name; - - /// This operation is internal and should not be shown in system.backups - bool internal = false; - - /// Status of backup or restore operation. - BackupStatus status; - - /// The number of files stored in the backup. - size_t num_files = 0; - - /// The total size of files stored in the backup. - UInt64 total_size = 0; - - /// The number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder. - size_t num_entries = 0; - - /// The uncompressed size of the backup. - UInt64 uncompressed_size = 0; - - /// The compressed size of the backup. - UInt64 compressed_size = 0; - - /// Returns the number of files read during RESTORE from this backup. - size_t num_read_files = 0; - - // Returns the total size of files read during RESTORE from this backup. - UInt64 num_read_bytes = 0; - - /// Set only if there was an error. - std::exception_ptr exception; - String error_message; - - std::chrono::system_clock::time_point start_time; - std::chrono::system_clock::time_point end_time; - }; - - Info getInfo(const OperationID & id) const; - std::vector getAllInfos() const; + BackupOperationInfo getInfo(const BackupOperationInfo::ID & id) const; + std::vector getAllInfos() const; private: - OperationID startMakingBackup(const ASTPtr & query, const ContextPtr & context); + BackupOperationInfo::ID startMakingBackup(const ASTPtr & query, const ContextPtr & context); void doBackup( const std::shared_ptr & backup_query, - const OperationID & backup_id, + const BackupOperationInfo::ID & backup_id, const String & backup_name_for_logging, const BackupInfo & backup_info, BackupSettings backup_settings, @@ -111,13 +64,13 @@ private: void buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr backup_coordination); /// Write backup entries to an opened backup. - void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, const OperationID & backup_id, std::shared_ptr backup_coordination, bool internal); + void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, const BackupOperationInfo::ID & backup_id, std::shared_ptr backup_coordination, bool internal); - OperationID startRestoring(const ASTPtr & query, ContextMutablePtr context); + BackupOperationInfo::ID startRestoring(const ASTPtr & query, ContextMutablePtr context); void doRestore( const std::shared_ptr & restore_query, - const OperationID & restore_id, + const BackupOperationInfo::ID & restore_id, const String & backup_name_for_logging, const BackupInfo & backup_info, RestoreSettings restore_settings, @@ -126,18 +79,18 @@ private: bool called_async); /// Run data restoring tasks which insert data to tables. - void restoreTablesData(const OperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool); + void restoreTablesData(const BackupOperationInfo::ID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool); - void addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status); - void setStatus(const OperationID & id, BackupStatus status, bool throw_if_error = true); + void addInfo(const BackupOperationInfo::ID & id, const String & name, bool internal, BackupStatus status); + void setStatus(const BackupOperationInfo::ID & id, BackupStatus status, bool throw_if_error = true); void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); } - void setNumFilesAndSize(const OperationID & id, size_t num_files, UInt64 total_size, size_t num_entries, + void setNumFilesAndSize(const BackupOperationInfo::ID & id, size_t num_files, UInt64 total_size, size_t num_entries, UInt64 uncompressed_size, UInt64 compressed_size, size_t num_read_files, UInt64 num_read_bytes); std::unique_ptr backups_thread_pool; std::unique_ptr restores_thread_pool; - std::unordered_map infos; + std::unordered_map infos; std::condition_variable status_changed; std::atomic num_active_backups = 0; std::atomic num_active_restores = 0; diff --git a/src/Interpreters/InterpreterBackupQuery.cpp b/src/Interpreters/InterpreterBackupQuery.cpp index e238286a33c..be5fcedce27 100644 --- a/src/Interpreters/InterpreterBackupQuery.cpp +++ b/src/Interpreters/InterpreterBackupQuery.cpp @@ -17,7 +17,7 @@ namespace DB namespace { - Block getResultRow(const BackupsWorker::Info & info) + Block getResultRow(const BackupOperationInfo & info) { auto column_id = ColumnString::create(); auto column_status = ColumnInt8::create(); diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index 8e968f8f7c0..6fac9b04885 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -51,7 +51,7 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con auto & column_num_read_files = assert_cast(*res_columns[column_index++]); auto & column_num_read_bytes = assert_cast(*res_columns[column_index++]); - auto add_row = [&](const BackupsWorker::Info & info) + auto add_row = [&](const BackupOperationInfo & info) { column_id.insertData(info.id.data(), info.id.size()); column_name.insertData(info.name.data(), info.name.size()); From de655457beaa36be12f13f782ce7a07c6825783e Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 10 Aug 2023 11:37:43 +0000 Subject: [PATCH 0178/1687] fix race --- src/Processors/Executors/PipelineExecutor.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 30325e283c3..4dd65def123 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -318,8 +318,7 @@ void PipelineExecutor::spawnThreads() { while (auto slot = slots->tryAcquire()) { - size_t thread_num = threads; - ++threads; + size_t thread_num = threads.fetch_add(1); /// Count of threads in use should be updated for proper finish() condition. /// NOTE: this will not decrease `use_threads` below initially granted count From e4a1780bd74ad9de0dbce1e97509c396911a500b Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 10 Aug 2023 15:29:27 +0300 Subject: [PATCH 0179/1687] update docs according to the PR + fix the same for HDFS --- docs/en/sql-reference/table-functions/file.md | 3 ++- docs/en/sql-reference/table-functions/hdfs.md | 2 +- docs/ru/sql-reference/table-functions/file.md | 2 +- docs/ru/sql-reference/table-functions/hdfs.md | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 8 ++++++-- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 4db9494502e..508e58ed3ca 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -141,7 +141,8 @@ Multiple path components can have globs. For being processed file must exist and - `*` — Substitutes any number of any characters except `/` including empty string. - `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored. + - `{N..M}` — Substitutes any number in range from N to M including both borders. - `**` - Fetches all files inside the folder recursively. diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 680ac54ee78..b4f63b35592 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -45,7 +45,7 @@ Multiple path components can have globs. For being processed file should exists - `*` — Substitutes any number of any characters except `/` including empty string. - `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored. - `{N..M}` — Substitutes any number in range from N to M including both borders. Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)). diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md index 83ef115aacd..d7107ba5950 100644 --- a/docs/ru/sql-reference/table-functions/file.md +++ b/docs/ru/sql-reference/table-functions/file.md @@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U - `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов. - `?` — заменяет ровно один любой символ. -- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`. +- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы. - `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули). Конструкция с `{}` аналогична табличной функции [remote](remote.md). diff --git a/docs/ru/sql-reference/table-functions/hdfs.md b/docs/ru/sql-reference/table-functions/hdfs.md index b0d182eef14..350d483b592 100644 --- a/docs/ru/sql-reference/table-functions/hdfs.md +++ b/docs/ru/sql-reference/table-functions/hdfs.md @@ -43,7 +43,7 @@ LIMIT 2 - `*` — Заменяет любое количество любых символов кроме `/`, включая отсутствие символов. - `?` — Заменяет ровно один любой символ. -- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы. - `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули). Конструкция с `{}` аналогична табличной функции [remote](remote.md). diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index aa99917d533..da7b1e3fe73 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -92,9 +92,13 @@ namespace HDFSFileInfo ls; ls.file_info = hdfsListDirectory(fs.get(), path_for_ls.data(), &ls.length); - if (ls.file_info == nullptr && errno != ENOENT) // NOLINT + if (ls.file_info == nullptr && errno != ENOENT && errno != EACCES) // NOLINT { - // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. + // ignore: + // file not found (as in LSWithRegexpMatching) + // permission denied (there is no easy way to determine + // if we really need access or just scanning all dirs while doing recursive search), + // keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. throw Exception( ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", path_for_ls, String(hdfsGetLastError())); } From 9c34ef899e974904a7f0545a9ebf3dd5e058295e Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Thu, 10 Aug 2023 15:46:04 +0300 Subject: [PATCH 0180/1687] Add backup_log table --- programs/server/config.xml | 10 ++++ src/Backups/BackupsWorker.cpp | 11 +++- src/Backups/BackupsWorker.h | 2 + src/Common/SystemLogBase.cpp | 1 + src/Common/SystemLogBase.h | 3 +- src/Interpreters/BackupLog.cpp | 59 +++++++++++++++++++++ src/Interpreters/BackupLog.h | 38 +++++++++++++ src/Interpreters/Context.cpp | 10 ++++ src/Interpreters/Context.h | 2 + src/Interpreters/InterpreterSystemQuery.cpp | 4 +- src/Interpreters/SystemLog.cpp | 4 ++ src/Interpreters/SystemLog.h | 3 ++ 12 files changed, 144 insertions(+), 3 deletions(-) create mode 100644 src/Interpreters/BackupLog.cpp create mode 100644 src/Interpreters/BackupLog.h diff --git a/programs/server/config.xml b/programs/server/config.xml index 14b8954fc39..9e5acc5e326 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1218,6 +1218,16 @@ event_date + INTERVAL 3 DAY + + + + +``` + ## query_masking_rules {#query-masking-rules} Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs, diff --git a/docs/en/operations/system-tables/backup_log.md b/docs/en/operations/system-tables/backup_log.md new file mode 100644 index 00000000000..b87666e218d --- /dev/null +++ b/docs/en/operations/system-tables/backup_log.md @@ -0,0 +1,145 @@ +--- +slug: /en/operations/system-tables/backup_log +--- +# backup_log + +Contains logging entries with the information about `BACKUP` and `RESTORE` operations. + +Columns: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the entry. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the entry with microseconds precision. +- `id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the backup or restore operation. +- `name` ([String](../../sql-reference/data-types/string.md)) — Name of the backup storage (the contents of the `FROM` or `TO` clause). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Operation status. Possible values: + - `'CREATING_BACKUP'` + - `'BACKUP_CREATED'` + - `'BACKUP_FAILED'` + - `'RESTORING'` + - `'RESTORED'` + - `'RESTORE_FAILED'` +- `error` ([String](../../sql-reference/data-types/string.md)) — Error message of the failed operation (empty string for successful opetations). +- `start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of the operation. +- `end_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time of the operation. +- `num_files` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of files stored in the backup. +- `total_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total size of files stored in the backup. +- `num_entries` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder, or the number of files inside the archive if the backup is stored as an archive. It is not the same as `num_files` if it's an incremental backup or if it contains empty files or duplicates. The following is always true: `num_entries <= num_files`. +- `uncompressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Uncompressed size of the backup. +- `compressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Compressed size of the backup. If the backup is not stored as an archive it equals to `uncompressed_size`. +- `files_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of files read during the restore operation. +- `bytes_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total size of files read during the restore operation. + +**Example** + +```sql +BACKUP TABLE test_db.my_table TO Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status─────────┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ BACKUP_CREATED │ +└──────────────────────────────────────┴────────────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'e5b74ecb-f6f1-426a-80be-872f90043885' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:05:21.998566 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: CREATING_BACKUP +error: +start_time: 2023-08-19 11:05:21 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:08:56.916192 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: BACKUP_CREATED +error: +start_time: 2023-08-19 11:05:21 +end_time: 2023-08-19 11:08:56 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 3525068304 +files_read: 0 +bytes_read: 0 +``` +```sql +RESTORE TABLE test_db.my_table FROM Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status───┐ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ RESTORED │ +└──────────────────────────────────────┴──────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'cdf1f731-52ef-42da-bc65-2e1bfcd4ce90' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:19.718077 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORING +error: +start_time: 2023-08-19 11:09:19 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:29.334234 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORED +error: +start_time: 2023-08-19 11:09:19 +end_time: 2023-08-19 11:09:29 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 4290362365 +files_read: 57 +bytes_read: 4290364870 +``` + +This is essentially the same information that is written in the system table `system.backups`: + +```sql +SELECT * FROM system.backups ORDER BY start_time +``` +```response +┌─id───────────────────────────────────┬─name──────────────────────────┬─status─────────┬─error─┬──────────start_time─┬────────────end_time─┬─num_files─┬─total_size─┬─num_entries─┬─uncompressed_size─┬─compressed_size─┬─files_read─┬─bytes_read─┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ Disk('backups_disk', '1.zip') │ BACKUP_CREATED │ │ 2023-08-19 11:05:21 │ 2023-08-19 11:08:56 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 3525068304 │ 0 │ 0 │ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ Disk('backups_disk', '1.zip') │ RESTORED │ │ 2023-08-19 11:09:19 │ 2023-08-19 11:09:29 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 4290362365 │ 57 │ 4290364870 │ +└──────────────────────────────────────┴───────────────────────────────┴────────────────┴───────┴─────────────────────┴─────────────────────┴───────────┴────────────┴─────────────┴───────────────────┴─────────────────┴────────────┴────────────┘ +``` + +**See Also** + +- [Backup and Restore](../../operations/backup.md) diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index a46f306f677..df42f80275e 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -23,7 +23,7 @@ System tables: Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start. -Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. +Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md), [text_log](../../operations/system-tables/text_log.md) and [backup_log](../../operations/system-tables/backup_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are: @@ -32,7 +32,7 @@ System log tables can be customized by creating a config file with the same name - `partition_by`: specify [PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) expression. - `ttl`: specify table [TTL](../../sql-reference/statements/alter/ttl.md) expression. - `flush_interval_milliseconds`: interval of flushing data to disk. -- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option is contradict with `partition_by` and `ttl`. If set together, the server would raise an exception and exit. +- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option conflicts with `partition_by` and `ttl`. If set together, the server will raise an exception and exit. An example: diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 7b026244624..07525ba5228 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1488,6 +1488,47 @@ ClickHouse использует потоки из глобального пул ``` +## backup_log {#server_configuration_parameters-backup_log} + +Настройки для системной таблицы [backup_log](../../operations/system-tables/backup_log.md), предназначенной для логирования операций `BACKUP` и `RESTORE`. + +Параметры: + +- `database` — имя базы данных. +- `table` — имя таблицы. +- `partition_by` — [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать одновременно с `engine`. +- `order_by` - [произвольный ключ сортировки](../../engines/table-engines/mergetree-family/mergetree.md#order_by). Нельзя использовать одновременно с `engine`. +- `engine` - [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). Нельзя использовать с `partition_by` или `order_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1024. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 1024. +- `buffer_size_rows_flush_threshold` – количество строк в логе, при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size_rows / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. +- `storage_policy` – название политики хранения (необязательный параметр). +- `settings` - [дополнительные настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#settings) (необязательный параметр). + +**Пример** + +```xml + + + system + backup_log
+ 1000 + toYYYYMM(event_date) + 1048576 + 8192 + 524288 + false + +
+
+``` + ## query_masking_rules {#query-masking-rules} Правила, основанные на регулярных выражениях, которые будут применены для всех запросов, а также для всех сообщений перед сохранением их в лог на сервере, diff --git a/docs/ru/operations/system-tables/backup_log.md b/docs/ru/operations/system-tables/backup_log.md new file mode 100644 index 00000000000..15c1bfb20a4 --- /dev/null +++ b/docs/ru/operations/system-tables/backup_log.md @@ -0,0 +1,145 @@ +--- +slug: /ru/operations/system-tables/backup_log +--- +# system.backup_log {#system_tables-backup-log} + +Содержит информацию о всех операциях `BACKUP` and `RESTORE`. + +Колонки: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Дата события. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Время события с точностью до микросекунд. +- `id` ([String](../../sql-reference/data-types/string.md)) — Идентификатор операции. +- `name` ([String](../../sql-reference/data-types/string.md)) — Название хранилища (содержимое секции `FROM` или `TO` в SQL запросе). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Статус операции. Возможные значения: + - `'CREATING_BACKUP'` + - `'BACKUP_CREATED'` + - `'BACKUP_FAILED'` + - `'RESTORING'` + - `'RESTORED'` + - `'RESTORE_FAILED'` +- `error` ([String](../../sql-reference/data-types/string.md)) — Сообщение об ошибке, при наличии (записи для успешных операций содержат пустую строку). +- `start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Время начала операции. +- `end_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Время завершения операции. +- `num_files` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Количество файлов, хранимых в бэкапе. +- `total_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общий размер файлов, хранимых в бэкапе. +- `num_entries` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Количество позиций в бэкапе, т.е. либо количество файлов в папке (если бэкап хранится в папке), либо количество файлов в архиве (если бэкап хранится в архиве). Это значение не равно `num_files` в случае если это инкрементальный бэкап либо он содержит пустые файлы или дубликаты. Следующее утверждение верно всегда: `num_entries <= num_files`. +- `uncompressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Размер бэкапа до сжатия. +- `compressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Размер бэкапа после сжатия. Если бэкап не хранится в виде архива, это значение равно `uncompressed_size`. +- `files_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Количество файлов, прочитанных во время операции восстановления. +- `bytes_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общий размер файлов, прочитанных во время операции восстановления. + +**Пример** + +```sql +BACKUP TABLE test_db.my_table TO Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status─────────┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ BACKUP_CREATED │ +└──────────────────────────────────────┴────────────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'e5b74ecb-f6f1-426a-80be-872f90043885' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:05:21.998566 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: CREATING_BACKUP +error: +start_time: 2023-08-19 11:05:21 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:08:56.916192 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: BACKUP_CREATED +error: +start_time: 2023-08-19 11:05:21 +end_time: 2023-08-19 11:08:56 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 3525068304 +files_read: 0 +bytes_read: 0 +``` +```sql +RESTORE TABLE test_db.my_table FROM Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status───┐ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ RESTORED │ +└──────────────────────────────────────┴──────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'cdf1f731-52ef-42da-bc65-2e1bfcd4ce90' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:19.718077 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORING +error: +start_time: 2023-08-19 11:09:19 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:29.334234 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORED +error: +start_time: 2023-08-19 11:09:19 +end_time: 2023-08-19 11:09:29 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 4290362365 +files_read: 57 +bytes_read: 4290364870 +``` + +Это по сути та же информация, что заносится и в системную таблицу `system.backups`: + +```sql +SELECT * FROM system.backups ORDER BY start_time +``` +```response +┌─id───────────────────────────────────┬─name──────────────────────────┬─status─────────┬─error─┬──────────start_time─┬────────────end_time─┬─num_files─┬─total_size─┬─num_entries─┬─uncompressed_size─┬─compressed_size─┬─files_read─┬─bytes_read─┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ Disk('backups_disk', '1.zip') │ BACKUP_CREATED │ │ 2023-08-19 11:05:21 │ 2023-08-19 11:08:56 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 3525068304 │ 0 │ 0 │ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ Disk('backups_disk', '1.zip') │ RESTORED │ │ 2023-08-19 11:09:19 │ 2023-08-19 11:09:29 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 4290362365 │ 57 │ 4290364870 │ +└──────────────────────────────────────┴───────────────────────────────┴────────────────┴───────┴─────────────────────┴─────────────────────┴───────────┴────────────┴─────────────┴───────────────────┴─────────────────┴────────────┴────────────┘ +``` + +**См. также** + +- [Backup and Restore](../../operations/backup.md) diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md index 24f79cae212..aedefb24193 100644 --- a/docs/ru/operations/system-tables/index.md +++ b/docs/ru/operations/system-tables/index.md @@ -21,7 +21,7 @@ sidebar_label: "Системные таблицы" Большинство системных таблиц хранят свои данные в оперативной памяти. Сервер ClickHouse создает эти системные таблицы при старте. -В отличие от других системных таблиц, таблицы с системными логами [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) и [text_log](../../operations/system-tables/text_log.md) используют движок таблиц [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) и по умолчанию хранят свои данные в файловой системе. Если удалить таблицу из файловой системы, сервер ClickHouse снова создаст пустую таблицу во время следующей записи данных. Если схема системной таблицы изменилась в новом релизе, то ClickHouse переименует текущую таблицу и создаст новую. +В отличие от других системных таблиц, таблицы с системными логами [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md), [text_log](../../operations/system-tables/text_log.md) и [backup_log](../../operations/system-tables/backup_log.md) используют движок таблиц [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) и по умолчанию хранят свои данные в файловой системе. Если удалить таблицу из файловой системы, сервер ClickHouse снова создаст пустую таблицу во время следующей записи данных. Если схема системной таблицы изменилась в новом релизе, то ClickHouse переименует текущую таблицу и создаст новую. Таблицы с системными логами `log` можно настроить, создав конфигурационный файл с тем же именем, что и таблица в разделе `/etc/clickhouse-server/config.d/`, или указав соответствующие элементы в `/etc/clickhouse-server/config.xml`. Настраиваться могут следующие элементы: From 7a4d891a37b615c24383c68471cbdfc78ebfdde5 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Mon, 21 Aug 2023 12:50:10 +0300 Subject: [PATCH 0229/1687] Fix docs spelling --- docs/en/operations/system-tables/backup_log.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/system-tables/backup_log.md b/docs/en/operations/system-tables/backup_log.md index b87666e218d..7e088fcad94 100644 --- a/docs/en/operations/system-tables/backup_log.md +++ b/docs/en/operations/system-tables/backup_log.md @@ -18,7 +18,7 @@ Columns: - `'RESTORING'` - `'RESTORED'` - `'RESTORE_FAILED'` -- `error` ([String](../../sql-reference/data-types/string.md)) — Error message of the failed operation (empty string for successful opetations). +- `error` ([String](../../sql-reference/data-types/string.md)) — Error message of the failed operation (empty string for successful operations). - `start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of the operation. - `end_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time of the operation. - `num_files` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of files stored in the backup. From 3e9a1825556a12e9dc5362b1bb29b877be158a4e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 21 Aug 2023 13:49:14 +0000 Subject: [PATCH 0230/1687] better interfaces for getting of stream name in part --- .../settings/merge-tree-settings.md | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 83 +++++++++++-- src/Storages/MergeTree/IMergeTreeDataPart.h | 33 +++++- .../MergeTree/IMergedBlockOutputStream.cpp | 15 ++- .../MergeTree/MergeTreeDataPartChecksum.cpp | 12 -- .../MergeTree/MergeTreeDataPartChecksum.h | 3 - .../MergeTree/MergeTreeDataPartCompact.h | 2 +- .../MergeTree/MergeTreeDataPartInMemory.h | 2 +- .../MergeTree/MergeTreeDataPartWide.cpp | 110 +++++++++--------- .../MergeTree/MergeTreeDataPartWide.h | 2 +- .../MergeTree/MergeTreeReaderWide.cpp | 45 +++---- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- src/Storages/MergeTree/MutateTask.cpp | 32 +++-- src/Storages/MergeTree/checkDataPart.cpp | 9 +- .../System/StorageSystemPartsColumns.cpp | 22 ++-- 15 files changed, 227 insertions(+), 147 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 36365c59b35..e1d9e76c2ba 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -832,7 +832,7 @@ You can see which parts of `s` were stored using the sparse serialization: └────────┴────────────────────┘ ``` -## replace_long_file_name_to_hash {#ratio_of_defaults_for_sparse_serialization} +## replace_long_file_name_to_hash {#replace_long_file_name_to_hash} If the file name for column is too long (more than `max_file_name_length` bytes) replace it to SipHash128. Default value: `false`. ## max_file_name_length {#max_file_name_length} diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 5b2f75b6f00..81330255a5f 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1034,14 +1034,14 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const { if (path_to_data_file.empty()) { - auto candidate_path = ISerialization::getFileNameForStream(part_column, substream_path) + ".bin"; - - if (!getDataPartStorage().exists(candidate_path)) - candidate_path = sipHash128String(candidate_path) + ".bin"; + auto stream_name = getStreamNameForColumn(part_column, substream_path, ".bin", getDataPartStorage()); + if (!stream_name) + return; + auto file_name = *stream_name + ".bin"; /// We can have existing, but empty .bin files. Example: LowCardinality(Nullable(...)) columns and column_name.dict.null.bin file. - if (getDataPartStorage().exists(candidate_path) && getDataPartStorage().getFileSize(candidate_path) != 0) - path_to_data_file = candidate_path; + if (getDataPartStorage().getFileSize(file_name) != 0) + path_to_data_file = file_name; } }); @@ -1326,8 +1326,8 @@ void IMergeTreeDataPart::loadColumns(bool require) auto metadata_snapshot = storage.getInMemoryMetadataPtr(); if (parent_part) metadata_snapshot = metadata_snapshot->projections.get(name).metadata; - NamesAndTypesList loaded_columns; + NamesAndTypesList loaded_columns; bool is_readonly_storage = getDataPartStorage().isReadonly(); if (!metadata_manager->exists("columns.txt")) @@ -1339,7 +1339,7 @@ void IMergeTreeDataPart::loadColumns(bool require) /// If there is no file with a list of columns, write it down. for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAllPhysical()) - if (getDataPartStorage().exists(getFileNameForColumn(column) + ".bin")) + if (getFileNameForColumn(column)) loaded_columns.push_back(column); if (columns.empty()) @@ -2090,6 +2090,73 @@ IMergeTreeDataPart::uint128 IMergeTreeDataPart::getActualChecksumByFile(const St return in_hash.getHash(); } +std::optional IMergeTreeDataPart::getStreamNameOrHash( + const String & stream_name, + const Checksums & checksums_) +{ + if (checksums_.files.contains(stream_name + ".bin")) + return stream_name; + + auto hash = sipHash128String(stream_name); + if (checksums_.files.contains(hash + ".bin")) + return hash; + + return {}; +} + +std::optional IMergeTreeDataPart::getStreamNameOrHash( + const String & stream_name, + const String & extension, + const IDataPartStorage & storage_) +{ + if (storage_.exists(stream_name + extension)) + return stream_name; + + auto hash = sipHash128String(stream_name); + if (storage_.exists(hash + extension)) + return stream_name; + + return {}; +} + +std::optional IMergeTreeDataPart::getStreamNameForColumn( + const String & column_name, + const ISerialization::SubstreamPath & substream_path, + const Checksums & checksums_) +{ + auto stream_name = ISerialization::getFileNameForStream(column_name, substream_path); + return getStreamNameOrHash(stream_name, checksums_); +} + +std::optional IMergeTreeDataPart::getStreamNameForColumn( + const NameAndTypePair & column, + const ISerialization::SubstreamPath & substream_path, + const Checksums & checksums_) +{ + auto stream_name = ISerialization::getFileNameForStream(column, substream_path); + return getStreamNameOrHash(stream_name, checksums_); +} + +std::optional IMergeTreeDataPart::getStreamNameForColumn( + const String & column_name, + const ISerialization::SubstreamPath & substream_path, + const String & extension, + const IDataPartStorage & storage_) +{ + auto stream_name = ISerialization::getFileNameForStream(column_name, substream_path); + return getStreamNameOrHash(stream_name, extension, storage_); +} + +std::optional IMergeTreeDataPart::getStreamNameForColumn( + const NameAndTypePair & column, + const ISerialization::SubstreamPath & substream_path, + const String & extension, + const IDataPartStorage & storage_) +{ + auto stream_name = ISerialization::getFileNameForStream(column, substream_path); + return getStreamNameOrHash(stream_name, extension, storage_); +} + std::unordered_map IMergeTreeDataPart::checkMetadata() const { return metadata_manager->check(); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index a8e053a9c7b..34e6801c3ef 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -131,7 +131,7 @@ public: /// Return information about secondary indexes size on disk for all indexes in part IndexSize getTotalSeconaryIndicesSize() const { return total_secondary_indices_size; } - virtual String getFileNameForColumn(const NameAndTypePair & column) const = 0; + virtual std::optional getFileNameForColumn(const NameAndTypePair & column) const = 0; virtual ~IMergeTreeDataPart(); @@ -503,6 +503,37 @@ public: /// This one is about removing file with version of part's metadata (columns, pk and so on) void removeMetadataVersion(); + static std::optional getStreamNameOrHash( + const String & name, + const IMergeTreeDataPart::Checksums & checksums); + + static std::optional getStreamNameOrHash( + const String & name, + const String & extension, + const IDataPartStorage & storage_); + + static std::optional getStreamNameForColumn( + const String & column_name, + const ISerialization::SubstreamPath & substream_path, + const Checksums & checksums_); + + static std::optional getStreamNameForColumn( + const NameAndTypePair & column, + const ISerialization::SubstreamPath & substream_path, + const Checksums & checksums_); + + static std::optional getStreamNameForColumn( + const String & column_name, + const ISerialization::SubstreamPath & substream_path, + const String & extension, + const IDataPartStorage & storage_); + + static std::optional getStreamNameForColumn( + const NameAndTypePair & column, + const ISerialization::SubstreamPath & substream_path, + const String & extension, + const IDataPartStorage & storage_); + mutable std::atomic removal_state = DataPartRemovalState::NOT_ATTEMPTED; mutable std::atomic last_removal_attempt_time = 0; diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index 2df3b6d15a6..c8d6aa0ba65 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -51,9 +51,9 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( data_part->getSerialization(column.name)->enumerateStreams( [&](const ISerialization::SubstreamPath & substream_path) { - auto full_stream_name = ISerialization::getFileNameForStream(column.name, substream_path); - auto stream_name = checksums.getFileNameOrHash(full_stream_name); - ++stream_counts[stream_name]; + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column, substream_path, checksums); + if (stream_name) + ++stream_counts[*stream_name]; }); } @@ -67,14 +67,13 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( ISerialization::StreamCallback callback = [&](const ISerialization::SubstreamPath & substream_path) { - auto full_stream_name = ISerialization::getFileNameForStream(column_name, substream_path); - auto stream_name = checksums.getFileNameOrHash(full_stream_name); + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column_name, substream_path, checksums); /// Delete files if they are no longer shared with another column. - if (--stream_counts[stream_name] == 0) + if (stream_name && --stream_counts[*stream_name] == 0) { - remove_files.emplace(stream_name + ".bin"); - remove_files.emplace(stream_name + mrk_extension); + remove_files.emplace(*stream_name + ".bin"); + remove_files.emplace(*stream_name + mrk_extension); } }; diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 794eba809a2..ed2202fcb19 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -312,18 +312,6 @@ MergeTreeDataPartChecksums::Checksum::uint128 MergeTreeDataPartChecksums::getTot return getSipHash128AsPair(hash_of_all_files); } -String MergeTreeDataPartChecksums::getFileNameOrHash(const String & name) const -{ - if (files.contains(name + ".bin")) - return name; - - auto hash = sipHash128String(name); - if (files.contains(hash + ".bin")) - return hash; - - return name; -} - void MinimalisticDataPartChecksums::serialize(WriteBuffer & to) const { writeString("checksums format version: 5\n", to); diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index 2a38b52c72a..8e5e8c8c448 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -88,11 +88,8 @@ struct MergeTreeDataPartChecksums static MergeTreeDataPartChecksums deserializeFrom(const String & s); UInt64 getTotalSizeOnDisk() const; - - String getFileNameOrHash(const String & name) const; }; - /// A kind of MergeTreeDataPartChecksums intended to be stored in ZooKeeper (to save its RAM) /// MinimalisticDataPartChecksums and MergeTreeDataPartChecksums have the same serialization format /// for versions less than MINIMAL_VERSION_WITH_MINIMALISTIC_CHECKSUMS. diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 2bbac766c8e..7850e7c976c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -57,7 +57,7 @@ public: std::optional getColumnModificationTime(const String & column_name) const override; - String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return DATA_FILE_NAME; } + std::optional getFileNameForColumn(const NameAndTypePair & /* column */) const override { return DATA_FILE_NAME; } ~MergeTreeDataPartCompact() override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 95f7b796f9a..c7b7dde50a6 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -40,7 +40,7 @@ public: bool isStoredOnRemoteDisk() const override { return false; } bool isStoredOnRemoteDiskWithZeroCopySupport() const override { return false; } bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); } - String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } + std::optional getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, const DiskTransactionPtr & disk_transaction) const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 417b5f3d19b..9b71c8df3a3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -73,20 +73,22 @@ ColumnSize MergeTreeDataPartWide::getColumnSizeImpl( getSerialization(column.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); - auto stream_name = checksums.getFileNameOrHash(full_stream_name); + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column, substream_path, checksums); - if (processed_substreams && !processed_substreams->insert(stream_name).second) + if (!stream_name) return; - auto bin_checksum = checksums.files.find(stream_name + ".bin"); + if (processed_substreams && !processed_substreams->insert(*stream_name).second) + return; + + auto bin_checksum = checksums.files.find(*stream_name + ".bin"); if (bin_checksum != checksums.files.end()) { size.data_compressed += bin_checksum->second.file_size; size.data_uncompressed += bin_checksum->second.uncompressed_size; } - auto mrk_checksum = checksums.files.find(stream_name + getMarksFileExtension()); + auto mrk_checksum = checksums.files.find(*stream_name + getMarksFileExtension()); if (mrk_checksum != checksums.files.end()) size.marks += mrk_checksum->second.file_size; }); @@ -154,7 +156,13 @@ void MergeTreeDataPartWide::loadIndexGranularity() if (columns.empty()) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name); - loadIndexGranularityImpl(index_granularity, index_granularity_info, getDataPartStorage(), getFileNameForColumn(columns.front())); + auto any_column_filename = getFileNameForColumn(columns.front()); + if (!any_column_filename) + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, + "There are no files for column {} in part {}", + columns.front().name, getDataPartStorage().getFullPath()); + + loadIndexGranularityImpl(index_granularity, index_granularity_info, getDataPartStorage(), *any_column_filename); } @@ -186,23 +194,19 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const { getSerialization(name_type.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - String full_stream_name = ISerialization::getFileNameForStream(name_type, substream_path); - String stream_name = checksums.getFileNameOrHash(full_stream_name); - - String mrk_file_name = stream_name + marks_file_extension; - String bin_file_name = stream_name + DATA_FILE_EXTENSION; + auto stream_name = getStreamNameForColumn(name_type, substream_path, checksums); + if (!stream_name) + throw Exception( + ErrorCodes::NO_FILE_IN_DATA_PART, + "No {}.{} file checksum for column {} in part {}", + *stream_name, DATA_FILE_EXTENSION, name_type.name, getDataPartStorage().getFullPath()); + auto mrk_file_name = *stream_name + marks_file_extension; if (!checksums.files.contains(mrk_file_name)) throw Exception( ErrorCodes::NO_FILE_IN_DATA_PART, "No {} file checksum for column {} in part {} ", mrk_file_name, name_type.name, getDataPartStorage().getFullPath()); - - if (!checksums.files.contains(bin_file_name)) - throw Exception( - ErrorCodes::NO_FILE_IN_DATA_PART, - "No {} file checksum for column {} in part {}", - bin_file_name, name_type.name, getDataPartStorage().getFullPath()); }); } } @@ -215,29 +219,28 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const { getSerialization(name_type.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - auto file_path = ISerialization::getFileNameForStream(name_type, substream_path) + marks_file_extension; - if (!getDataPartStorage().exists(file_path)) - file_path = sipHash128String(file_path) + marks_file_extension; + auto stream_name = getStreamNameForColumn(name_type, substream_path, marks_file_extension, getDataPartStorage()); /// Missing file is Ok for case when new column was added. - if (getDataPartStorage().exists(file_path)) - { - UInt64 file_size = getDataPartStorage().getFileSize(file_path); + if (!stream_name) + return; - if (!file_size) - throw Exception( - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, - "Part {} is broken: {} is empty.", - getDataPartStorage().getFullPath(), - std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); + auto file_path = *stream_name + marks_file_extension; + UInt64 file_size = getDataPartStorage().getFileSize(file_path); - if (!marks_size) - marks_size = file_size; - else if (file_size != *marks_size) - throw Exception( - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, - "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); - } + if (!file_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: {} is empty.", + getDataPartStorage().getFullPath(), + std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); + + if (!marks_size) + marks_size = file_size; + else if (file_size != *marks_size) + throw Exception( + ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); }); } } @@ -245,22 +248,13 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const bool MergeTreeDataPartWide::hasColumnFiles(const NameAndTypePair & column) const { - std::string marks_file_extension = index_granularity_info.mark_type.getFileExtension(); - auto check_stream_exists = [this, &marks_file_extension](const String & stream_name) - { - auto bin_checksum = checksums.files.find(stream_name + DATA_FILE_EXTENSION); - auto mrk_checksum = checksums.files.find(stream_name + marks_file_extension); - - return bin_checksum != checksums.files.end() && mrk_checksum != checksums.files.end(); - }; + auto marks_file_extension = index_granularity_info.mark_type.getFileExtension(); bool res = true; getSerialization(column.name)->enumerateStreams([&](const auto & substream_path) { - auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); - auto stream_name = checksums.getFileNameOrHash(full_stream_name); - - if (!check_stream_exists(stream_name)) + auto stream_name = getStreamNameForColumn(column, substream_path, checksums); + if (!stream_name || !checksums.files.contains(*stream_name + marks_file_extension)) res = false; }); @@ -271,8 +265,11 @@ std::optional MergeTreeDataPartWide::getColumnModificationTime(const Str { try { - auto name_on_disk = checksums.getFileNameOrHash(column_name); - return getDataPartStorage().getFileLastModified(name_on_disk + DATA_FILE_EXTENSION).epochTime(); + auto stream_name = getStreamNameOrHash(column_name, checksums); + if (!stream_name) + return {}; + + return getDataPartStorage().getFileLastModified(*stream_name + DATA_FILE_EXTENSION).epochTime(); } catch (const fs::filesystem_error &) { @@ -280,15 +277,18 @@ std::optional MergeTreeDataPartWide::getColumnModificationTime(const Str } } -String MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const +std::optional MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const { - String filename; + std::optional filename; getSerialization(column.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - if (filename.empty()) + if (!filename.has_value()) { - auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); - filename = checksums.getFileNameOrHash(full_stream_name); + /// This method may be called when checksums are not initialized yet. + if (!checksums.empty()) + filename = getStreamNameForColumn(column, substream_path, checksums); + else + filename = getStreamNameForColumn(column, substream_path, DATA_FILE_EXTENSION, getDataPartStorage()); } }); return filename; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 2076a1ec028..bcf70426fa6 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -48,7 +48,7 @@ public: bool isStoredOnRemoteDiskWithZeroCopySupport() const override; - String getFileNameForColumn(const NameAndTypePair & column) const override; + std::optional getFileNameForColumn(const NameAndTypePair & column) const override; ~MergeTreeDataPartWide() override; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 999e3d0f7ec..4ba6402e3c5 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -202,15 +202,6 @@ size_t MergeTreeReaderWide::readRows( return read_rows; } -String getStreamName( - const NameAndTypePair & column, - const ISerialization::SubstreamPath & substream_path, - const MergeTreeDataPartChecksums & checksums) -{ - auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); - return checksums.getFileNameOrHash(full_stream_name); -} - void MergeTreeReaderWide::addStreams( const NameAndTypePair & name_and_type, const SerializationPtr & serialization, @@ -222,35 +213,33 @@ void MergeTreeReaderWide::addStreams( ISerialization::StreamCallback callback = [&] (const ISerialization::SubstreamPath & substream_path) { - auto stream_name = getStreamName(name_and_type, substream_path, data_part_info_for_read->getChecksums()); - - if (streams.contains(stream_name)) - { - has_any_stream = true; - return; - } - - bool data_file_exists = data_part_info_for_read->getChecksums().files.contains(stream_name + DATA_FILE_EXTENSION); + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums()); /** If data file is missing then we will not try to open it. * It is necessary since it allows to add new column to structure of the table without creating new files for old parts. */ - if (!data_file_exists) + if (!stream_name) { has_all_streams = false; return; } + if (streams.contains(*stream_name)) + { + has_any_stream = true; + return; + } + has_any_stream = true; bool is_lc_dict = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys; auto context = data_part_info_for_read->getContext(); auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; - streams.emplace(stream_name, std::make_unique( - data_part_info_for_read, stream_name, DATA_FILE_EXTENSION, + streams.emplace(*stream_name, std::make_unique( + data_part_info_for_read, *stream_name, DATA_FILE_EXTENSION, data_part_info_for_read->getMarksCount(), all_mark_ranges, settings, mark_cache, - uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION), + uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION), &data_part_info_for_read->getIndexGranularityInfo(), profile_callback, clock_type, is_lc_dict, load_marks_threadpool)); }; @@ -276,9 +265,11 @@ static ReadBuffer * getStream( if (cache.contains(ISerialization::getSubcolumnNameForStream(substream_path))) return nullptr; - auto stream_name = getStreamName(name_and_type, substream_path, checksums); + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, checksums); + if (!stream_name) + return nullptr; - auto it = streams.find(stream_name); + auto it = streams.find(*stream_name); if (it == streams.end()) return nullptr; @@ -324,15 +315,15 @@ void MergeTreeReaderWide::prefetchForColumn( serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - auto stream_name = getStreamName(name_and_type, substream_path, data_part_info_for_read->getChecksums()); + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums()); - if (!prefetched_streams.contains(stream_name)) + if (stream_name && !prefetched_streams.contains(*stream_name)) { bool seek_to_mark = !continue_reading; if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) { buf->prefetch(priority); - prefetched_streams.insert(stream_name); + prefetched_streams.insert(*stream_name); } } }); diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 38bcb0fc94c..4f02c1c543e 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -35,7 +35,7 @@ struct Settings; M(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ M(Float, ratio_of_defaults_for_sparse_serialization, 0.9375f, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ M(Bool, replace_long_file_name_to_hash, false, "If the file name for column is too long (more than 'max_file_name_length' bytes) replace it to SipHash128", 0) \ - M(UInt64, max_file_name_length, 128, "The maximal length of the file name to keep it as is without hashing", 0) \ + M(UInt64, max_file_name_length, 127, "The maximal length of the file name to keep it as is without hashing", 0) \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ M(UInt64, merge_max_block_size_bytes, 10 * 1024 * 1024, "How many bytes in blocks should be formed for merge operations. By default has the same value as `index_granularity_bytes`.", 0) \ diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 5ce5355c794..6de23f3e294 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -534,9 +534,9 @@ static std::unordered_map getStreamCounts( { auto callback = [&](const ISerialization::SubstreamPath & substream_path) { - auto full_stream_name = ISerialization::getFileNameForStream(column_name, substream_path); - auto stream_name = source_part_checksums.getFileNameOrHash(full_stream_name); - ++stream_counts[stream_name]; + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column_name, substream_path, source_part_checksums); + if (stream_name) + ++stream_counts[*stream_name]; }; serialization->enumerateStreams(callback); @@ -654,14 +654,13 @@ static NameToNameVector collectFilesForRenames( { ISerialization::StreamCallback callback = [&](const ISerialization::SubstreamPath & substream_path) { - auto full_stream_name = ISerialization::getFileNameForStream({command.column_name, command.data_type}, substream_path); - auto stream_name = source_part->checksums.getFileNameOrHash(full_stream_name); + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(command.column_name, substream_path, source_part->checksums); /// Delete files if they are no longer shared with another column. - if (--stream_counts[stream_name] == 0) + if (stream_name && --stream_counts[*stream_name] == 0) { - add_rename(stream_name + ".bin", ""); - add_rename(stream_name + mrk_extension, ""); + add_rename(*stream_name + ".bin", ""); + add_rename(*stream_name + mrk_extension, ""); } }; @@ -678,13 +677,22 @@ static NameToNameVector collectFilesForRenames( String full_stream_from = ISerialization::getFileNameForStream(command.column_name, substream_path); String full_stream_to = boost::replace_first_copy(full_stream_from, escaped_name_from, escaped_name_to); - String stream_from = source_part->checksums.getFileNameOrHash(full_stream_from); - String stream_to = stream_from == full_stream_from ? full_stream_to : sipHash128String(full_stream_to); + auto stream_from = IMergeTreeDataPart::getStreamNameOrHash(full_stream_from, source_part->checksums); + if (!stream_from) + return; + + String stream_to; + auto storage_settings = source_part->storage.getSettings(); + + if (storage_settings->replace_long_file_name_to_hash && full_stream_to.size() > storage_settings->max_file_name_length) + stream_to = sipHash128String(full_stream_to); + else + stream_to = full_stream_to; if (stream_from != stream_to) { - add_rename(stream_from + ".bin", stream_to + ".bin"); - add_rename(stream_from + mrk_extension, stream_to + mrk_extension); + add_rename(*stream_from + ".bin", stream_to + ".bin"); + add_rename(*stream_from + mrk_extension, stream_to + mrk_extension); } }; diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 33715785574..f54056421a7 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -165,17 +165,14 @@ static IMergeTreeDataPart::Checksums checkDataPart( { get_serialization(column)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { - auto stream_name = ISerialization::getFileNameForStream(column, substream_path); - auto file_name = stream_name + ".bin"; + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column, substream_path, ".bin", data_part_storage); - if (!data_part_storage.exists(file_name)) - file_name = sipHash128String(stream_name) + ".bin"; - - if (!data_part_storage.exists(file_name)) + if (!stream_name) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "There is no file for column '{}' in data part '{}'", column.name, data_part->name); + auto file_name = *stream_name + ".bin"; checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); }); } diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 0510c733e65..275d56c3da5 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -271,19 +271,21 @@ void StorageSystemPartsColumns::processNextStorage( ColumnSize size; NameAndTypePair subcolumn(column.name, name, column.type, data.type); - String full_stream_name = ISerialization::getFileNameForStream(subcolumn, subpath); - String stream_name = part->checksums.getFileNameOrHash(full_stream_name); - auto bin_checksum = part->checksums.files.find(stream_name + ".bin"); - if (bin_checksum != part->checksums.files.end()) + auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(subcolumn, subpath, part->checksums); + if (stream_name) { - size.data_compressed += bin_checksum->second.file_size; - size.data_uncompressed += bin_checksum->second.uncompressed_size; - } + auto bin_checksum = part->checksums.files.find(*stream_name + ".bin"); + if (bin_checksum != part->checksums.files.end()) + { + size.data_compressed += bin_checksum->second.file_size; + size.data_uncompressed += bin_checksum->second.uncompressed_size; + } - auto mrk_checksum = part->checksums.files.find(stream_name + part->index_granularity_info.mark_type.getFileExtension()); - if (mrk_checksum != part->checksums.files.end()) - size.marks += mrk_checksum->second.file_size; + auto mrk_checksum = part->checksums.files.find(*stream_name + part->index_granularity_info.mark_type.getFileExtension()); + if (mrk_checksum != part->checksums.files.end()) + size.marks += mrk_checksum->second.file_size; + } subcolumn_bytes_on_disk.push_back(size.data_compressed + size.marks); subcolumn_data_compressed_bytes.push_back(size.data_compressed); From 2638cc69d76e824e549c9dc0a008e104d316e95c Mon Sep 17 00:00:00 2001 From: VanDarkholme7 <980181529@qq.com> Date: Tue, 22 Aug 2023 09:26:17 +0000 Subject: [PATCH 0231/1687] fix bug on DELETE and UPDATE with JSON subcolumns --- .../MergeTree/MergeTreeSequentialSource.cpp | 8 ++- .../MergeTree/StorageFromMergeTreeDataPart.h | 8 +++ .../02864_mutations_subcolumns.reference | 10 ++++ .../02864_mutations_subcolumns.sql | 51 +++++++++++++++++++ 4 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02864_mutations_subcolumns.reference create mode 100644 tests/queries/0_stateless/02864_mutations_subcolumns.sql diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 8a9faa5cee4..9bd9db6a8c4 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -119,7 +119,11 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( addTotalRowsApprox(data_part->rows_count); /// Add columns because we don't want to read empty blocks - injectRequiredColumns(LoadedMergeTreeDataPartInfoForReader(data_part, alter_conversions), storage_snapshot, /*with_subcolumns=*/ false, columns_to_read); + injectRequiredColumns( + LoadedMergeTreeDataPartInfoForReader(data_part, alter_conversions), + storage_snapshot, + storage.supportsSubcolumns(), + columns_to_read); NamesAndTypesList columns_for_reader; if (take_column_types_from_storage) @@ -127,6 +131,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) .withExtendedObjects() .withSystemColumns(); + if (storage.supportsSubcolumns()) + options.withSubcolumns(); columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read); } else diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 17078d3e73b..ad4a9770048 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -89,6 +89,8 @@ public: bool supportsDynamicSubcolumns() const override { return true; } + bool supportsSubcolumns() const override { return true; } + bool mayBenefitFromIndexForIn( const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override { @@ -110,6 +112,12 @@ public: return storage.getPartitionIDFromQuery(ast, context); } + StorageSnapshotPtr getStorageSnapshotForQuery( + const StorageMetadataPtr & metadata_snapshot, const ASTPtr & /*query*/, ContextPtr query_context) const override + { + return storage.getStorageSnapshot(metadata_snapshot, query_context); + } + bool materializeTTLRecalculateOnly() const { if (parts.empty()) diff --git a/tests/queries/0_stateless/02864_mutations_subcolumns.reference b/tests/queries/0_stateless/02864_mutations_subcolumns.reference new file mode 100644 index 00000000000..b702755e127 --- /dev/null +++ b/tests/queries/0_stateless/02864_mutations_subcolumns.reference @@ -0,0 +1,10 @@ +6 1 +5 2 +4 3 +3 4 +4 ttt +5 ttt +6 ttt +{"a":"1","obj":{"k1":1,"k2":0,"k3":0}} +{"a":"3","obj":{"k1":0,"k2":0,"k3":1}} +{"a":"1","obj":{"k1":1,"k2":0,"k3":0}} diff --git a/tests/queries/0_stateless/02864_mutations_subcolumns.sql b/tests/queries/0_stateless/02864_mutations_subcolumns.sql new file mode 100644 index 00000000000..2a8e0f7811e --- /dev/null +++ b/tests/queries/0_stateless/02864_mutations_subcolumns.sql @@ -0,0 +1,51 @@ +-- Tags: no-replicated-database +-- It won't work in case there are misssing subcolumns in different shards + +DROP TABLE IF EXISTS t_mutations_subcolumns; + +SET allow_experimental_object_type = 1; + +CREATE TABLE t_mutations_subcolumns (id UInt64, n String, obj JSON) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_mutations_subcolumns VALUES (1, 'aaa', '{"k1": {"k2": "foo"}, "k3": 5}'); +INSERT INTO t_mutations_subcolumns VALUES (2, 'bbb', '{"k1": {"k2": "fee"}, "k3": 4}'); +INSERT INTO t_mutations_subcolumns VALUES (3, 'ccc', '{"k1": {"k2": "foo", "k4": "baz"}, "k3": 4}'); +INSERT INTO t_mutations_subcolumns VALUES (4, 'ddd', '{"k1": {"k2": "foo"}, "k3": 4}'); +INSERT INTO t_mutations_subcolumns VALUES (5, 'eee', '{"k1": {"k2": "foo"}, "k3": 4}'); +INSERT INTO t_mutations_subcolumns VALUES (6, 'fff', '{"k1": {"k2": "foo"}, "k3": 4}'); + +OPTIMIZE TABLE t_mutations_subcolumns FINAL; + +SELECT count(), min(id) FROM t_mutations_subcolumns; + +SET mutations_sync = 2; + +ALTER TABLE t_mutations_subcolumns DELETE WHERE obj.k3 = 5; +SELECT count(), min(id) FROM t_mutations_subcolumns; + +DELETE FROM t_mutations_subcolumns WHERE obj.k1.k2 = 'fee'; +SELECT count(), min(id) FROM t_mutations_subcolumns; + +ALTER TABLE t_mutations_subcolumns DELETE WHERE obj.k1 = ('foo', 'baz'); +SELECT count(), min(id) FROM t_mutations_subcolumns; + +ALTER TABLE t_mutations_subcolumns UPDATE n = 'ttt' WHERE obj.k1.k2 = 'foo'; +SELECT id, n FROM t_mutations_subcolumns; + +DROP TABLE IF EXISTS t_mutations_subcolumns; + +CREATE TABLE t_mutations_subcolumns (a UInt64, obj JSON) +ENGINE = MergeTree ORDER BY a PARTITION BY a; + +INSERT INTO t_mutations_subcolumns VALUES (1, '{"k1": 1}'); +INSERT INTO t_mutations_subcolumns VALUES (2, '{"k2": 1}'); +INSERT INTO t_mutations_subcolumns VALUES (3, '{"k3": 1}'); + +ALTER TABLE t_mutations_subcolumns DELETE WHERE obj.k2 = 1; +SELECT * FROM t_mutations_subcolumns ORDER BY a FORMAT JSONEachRow; + +ALTER TABLE t_mutations_subcolumns DELETE WHERE obj.k1 = 0; +SELECT * FROM t_mutations_subcolumns ORDER BY a FORMAT JSONEachRow; + +DROP TABLE t_mutations_subcolumns; From 6c76ad1ea408d0ea6186c233bc417eed64cd768a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 22 Aug 2023 11:57:56 +0200 Subject: [PATCH 0232/1687] Cleanup --- tests/integration/test_storage_kafka/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 5595edf5576..b1191af60b7 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -2948,7 +2948,7 @@ def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster): # while materialized view is working to inject zookeeper failure pm.drop_instance_zk_connections(instance) instance.wait_for_log_line( - "Error.*(session has been expired|Connection loss|Coordination::Exception).*while pushing to view" + "Error.*(Connection loss|Coordination::Exception).*while pushing to view" ) pm.heal_all() instance.wait_for_log_line("Committed offset 22") From c11ed5feb4b8c916ba36b6fe6171e7ab1f76b2b4 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 22 Aug 2023 10:57:58 +0000 Subject: [PATCH 0233/1687] support for non-const timezone arguments/strings --- src/Functions/FunctionsConversion.h | 29 ++++++++----------- .../extractTimeZoneFromFunctionArguments.cpp | 4 +-- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 79d17d8ac98..102d4339554 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1844,20 +1844,20 @@ public: mandatory_args.push_back({"scale", &isNativeInteger, &isColumnConst, "const Integer"}); } - // toString(DateTime or DateTime64, [timezone: String]) + // toString(DateTime or DateTime64, [timezone: String (const/non-const)]) if ((std::is_same_v && !arguments.empty() && (isDateTime64(arguments[0].type) || isDateTime(arguments[0].type))) - // toUnixTimestamp(value[, timezone : String]) + // toUnixTimestamp(value[, timezone : String(const/non-const)]) || std::is_same_v - // toDate(value[, timezone : String]) + // toDate(value[, timezone : String(const/non-const)]) || std::is_same_v // TODO: shall we allow timestamp argument for toDate? DateTime knows nothing about timezones and this argument is ignored below. - // toDate32(value[, timezone : String]) + // toDate32(value[, timezone : String(const/non-const)]) || std::is_same_v - // toDateTime(value[, timezone: String]) + // toDateTime(value[, timezone: String(const/non-const)]) || std::is_same_v - // toDateTime64(value, scale : Integer[, timezone: String]) + // toDateTime64(value, scale : Integer[, timezone: String(const/non-const)]) || std::is_same_v) - { - optional_args.push_back({"timezone", &isString, &isColumnConst, "const String"}); + { + optional_args.push_back({"timezone", &isString, nullptr, "const String/String"}); } validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -1895,13 +1895,13 @@ public: if (to_datetime64 || scale != 0) /// toDateTime('xxxx-xx-xx xx:xx:xx', 0) return DateTime return std::make_shared(scale, - extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); + extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, true)); - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, true)); } if constexpr (std::is_same_v) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, true)); else if constexpr (std::is_same_v) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected branch in code of conversion function: it is a bug."); else @@ -1919,12 +1919,7 @@ public: } bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override - { - if constexpr (std::is_same_v) - return {2}; - return {1}; - } + bool canBeExecutedOnDefaultArguments() const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override diff --git a/src/Functions/extractTimeZoneFromFunctionArguments.cpp b/src/Functions/extractTimeZoneFromFunctionArguments.cpp index 7168c68c9c9..38ce0467aee 100644 --- a/src/Functions/extractTimeZoneFromFunctionArguments.cpp +++ b/src/Functions/extractTimeZoneFromFunctionArguments.cpp @@ -30,11 +30,11 @@ std::string extractTimeZoneNameFromColumn(const IColumn * column, const String & } -std::string extractTimeZoneNameFromFunctionArguments(const ColumnsWithTypeAndName & arguments, size_t time_zone_arg_num, size_t datetime_arg_num, bool allow_nonconst_timezone_arguments) +std::string extractTimeZoneNameFromFunctionArguments(const ColumnsWithTypeAndName & arguments, size_t time_zone_arg_num, size_t datetime_arg_num, [[maybe_unused]] bool allow_nonconst_timezone_arguments) { /// Explicit time zone may be passed in last argument. if ((arguments.size() == time_zone_arg_num + 1) - && (!allow_nonconst_timezone_arguments || arguments[time_zone_arg_num].column)) + && (arguments[time_zone_arg_num].column)) { return extractTimeZoneNameFromColumn(arguments[time_zone_arg_num].column.get(), arguments[time_zone_arg_num].name); } From 6e0d9f1df737001cf401dacc77ccf891e54e6326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Tue, 22 Aug 2023 14:41:02 +0000 Subject: [PATCH 0234/1687] Edited CompressionMethodByte for GCDCodec --- src/Compression/CompressionInfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compression/CompressionInfo.h b/src/Compression/CompressionInfo.h index ed63fc5e73a..1b4025fed1d 100644 --- a/src/Compression/CompressionInfo.h +++ b/src/Compression/CompressionInfo.h @@ -47,7 +47,7 @@ enum class CompressionMethodByte : uint8_t AES_256_GCM_SIV = 0x97, FPC = 0x98, DeflateQpl = 0x99, - GCD = 0x69, + GCD = 0x9a, }; } From 4421bd66b2921cf01c11d79d87a67697fff63712 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Tue, 22 Aug 2023 14:53:04 +0200 Subject: [PATCH 0235/1687] Fix for #53187 --- src/Functions/transform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 62ab51abd76..0ac213194c3 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -490,7 +490,7 @@ namespace else if (cache.default_column) column_result.insertFrom(*cache.default_column, 0); else if (default_non_const) - column_result.insertFrom(*default_non_const, 0); + column_result.insertFrom(*default_non_const, i); else column_result.insertFrom(in_casted, i); } From 740fff84b4aba2f992edf3722a6b11f4c85efaa5 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Tue, 22 Aug 2023 14:53:39 +0200 Subject: [PATCH 0236/1687] trasform: always check that the default column is big enough --- src/Functions/transform.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 0ac213194c3..fa41841b7d1 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -163,7 +163,16 @@ namespace ColumnPtr default_non_const; if (!cache.default_column && arguments.size() == 4) + { default_non_const = castColumn(arguments[3], result_type); + if (in->size() > default_non_const->size()) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Fourth argument of function {} must be a constant or a column at least as big as the second and third arguments", + getName()); + } + } ColumnPtr in_casted = arguments[0].column; if (arguments.size() == 3) From 90e485957a825cb6491debacb5487ad3436e195c Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Tue, 22 Aug 2023 15:09:30 +0200 Subject: [PATCH 0237/1687] Add tests for #53187 fix --- .../0_stateless/02542_transform_new.reference | 6 ++++++ tests/queries/0_stateless/02542_transform_new.sql | 13 +++++++++++++ 2 files changed, 19 insertions(+) diff --git a/tests/queries/0_stateless/02542_transform_new.reference b/tests/queries/0_stateless/02542_transform_new.reference index b6eaa692c41..faec7b5c777 100644 --- a/tests/queries/0_stateless/02542_transform_new.reference +++ b/tests/queries/0_stateless/02542_transform_new.reference @@ -30,3 +30,9 @@ sep4 sep5 8000 sep6 +issue #53187 +0 1 1 +1 0 0 +- +0 0 0 +1 1 1 diff --git a/tests/queries/0_stateless/02542_transform_new.sql b/tests/queries/0_stateless/02542_transform_new.sql index 43da0a50731..f3475d6157f 100644 --- a/tests/queries/0_stateless/02542_transform_new.sql +++ b/tests/queries/0_stateless/02542_transform_new.sql @@ -33,3 +33,16 @@ select 'sep6'; SELECT transform(-9223372036854775807, [-1], [toDecimal32(1024, 3)]) FROM system.numbers LIMIT 7; -- { serverError BAD_ARGUMENTS } SELECT [NULL, NULL, NULL, NULL], transform(number, [2147483648], [toDecimal32(1, 2)]) AS x FROM numbers(257) WHERE materialize(10); -- { serverError BAD_ARGUMENTS } SELECT transform(-2147483649, [1], [toDecimal32(1, 2)]) GROUP BY [1] WITH TOTALS; -- { serverError BAD_ARGUMENTS } + +SELECT 'issue #53187'; +SELECT + CAST(number, 'String') AS v2, + caseWithExpression('x', 'y', 0, cond2) AS cond1, + toNullable('0' = v2) AS cond2 +FROM numbers(2); +SELECT '-'; +SELECT + CAST(number, 'String') AS v2, + caseWithExpression('x', 'y', 0, cond2) AS cond1, + toNullable('1' = v2) AS cond2 +FROM numbers(2); From c7be413ae2687dc087b7ee4963364e04b699c1bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Tue, 22 Aug 2023 16:16:55 +0000 Subject: [PATCH 0238/1687] Added boost::math::gcd and improved test --- src/Compression/CompressionCodecGCD.cpp | 35 ++++++------------- .../0_stateless/02845_gcd_codec.reference | 3 -- tests/queries/0_stateless/02845_gcd_codec.sql | 4 --- .../0_stateless/02863_gcd_codec.reference | 1 + tests/queries/0_stateless/02863_gcd_codec.sql | 15 ++++++++ 5 files changed, 27 insertions(+), 31 deletions(-) delete mode 100644 tests/queries/0_stateless/02845_gcd_codec.reference delete mode 100644 tests/queries/0_stateless/02845_gcd_codec.sql create mode 100644 tests/queries/0_stateless/02863_gcd_codec.reference create mode 100644 tests/queries/0_stateless/02863_gcd_codec.sql diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 49d985a0c89..3be30102b9c 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -11,27 +11,14 @@ #include "base/types.h" #include "config.h" -#include -#include - +#include #include #include + namespace DB { -template -T gcd_func(T a, T b) -{ - while (b != 0) - { - T c = a % b; - a = b; - b = c; - } - return a; -} - class CompressionCodecGCD : public ICompressionCodec { public: @@ -94,32 +81,32 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest) const char * const source_end = source + source_size; - T gcd{}; + T gcd_divider{}; const auto * cur_source = source; while (cur_source < source_end) { if (cur_source == source) { - gcd = unalignedLoad(cur_source); + gcd_divider = unalignedLoad(cur_source); } else { - gcd = gcd_func(gcd, unalignedLoad(cur_source)); + gcd_divider = boost::math::gcd(gcd_divider, unalignedLoad(cur_source)); } - if (gcd == T(1)) + if (gcd_divider == T(1)) { break; } } - unalignedStore(dest, gcd); + unalignedStore(dest, gcd_divider); dest += sizeof(T); if (typeid(T) == typeid(UInt32) || typeid(T) == typeid(UInt64)) { /// libdivide support only UInt32 and UInt64. using TUInt32Or64 = std::conditional_t; - libdivide::divider divider(static_cast(gcd)); + libdivide::divider divider(static_cast(gcd_divider)); cur_source = source; while (cur_source < source_end) { @@ -133,7 +120,7 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest) cur_source = source; while (cur_source < source_end) { - unalignedStore(dest, unalignedLoad(cur_source) / gcd); + unalignedStore(dest, unalignedLoad(cur_source) / gcd_divider); cur_source += sizeof(T); dest += sizeof(T); } @@ -152,13 +139,13 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress, data size {} is less than {}", source_size, sizeof(T)); const char * const source_end = source + source_size; - const T gcd = unalignedLoad(source); + const T gcd_multiplier = unalignedLoad(source); source += sizeof(T); while (source < source_end) { if (dest + sizeof(T) > output_end) [[unlikely]] throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data"); - unalignedStore(dest, unalignedLoad(source) * gcd); + unalignedStore(dest, unalignedLoad(source) * gcd_multiplier); source += sizeof(T); dest += sizeof(T); diff --git a/tests/queries/0_stateless/02845_gcd_codec.reference b/tests/queries/0_stateless/02845_gcd_codec.reference deleted file mode 100644 index ebd3a037ffb..00000000000 --- a/tests/queries/0_stateless/02845_gcd_codec.reference +++ /dev/null @@ -1,3 +0,0 @@ -239 -0 -37 diff --git a/tests/queries/0_stateless/02845_gcd_codec.sql b/tests/queries/0_stateless/02845_gcd_codec.sql deleted file mode 100644 index 48a2ba076eb..00000000000 --- a/tests/queries/0_stateless/02845_gcd_codec.sql +++ /dev/null @@ -1,4 +0,0 @@ -DROP TABLE IF EXISTS aboba; -CREATE TABLE aboba (s String, ui UInt8 CODEC(GCD(1), LZ4)) ENGINE = Memory; -INSERT INTO aboba (*) VALUES ('Hello', 239), ('World', 0), ('Goodbye', 37); -SELECT ui FROM aboba; diff --git a/tests/queries/0_stateless/02863_gcd_codec.reference b/tests/queries/0_stateless/02863_gcd_codec.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02863_gcd_codec.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02863_gcd_codec.sql b/tests/queries/0_stateless/02863_gcd_codec.sql new file mode 100644 index 00000000000..1039ebcecb8 --- /dev/null +++ b/tests/queries/0_stateless/02863_gcd_codec.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS table_none; +CREATE TABLE table_none (id UInt64, ui UInt256 CODEC(LZ4)) ENGINE = Memory; +INSERT INTO table_none SELECT * FROM generateRandom() LIMIT 50; + +DROP TABLE IF EXISTS table_gcd_codec; +CREATE TABLE table_gcd_codec (id UInt64, ui UInt256 CODEC(GCD, LZ4)) ENGINE = Memory; +INSERT INTO table_gcd_codec SELECT * FROM table_none; + +SELECT COUNT(*) +FROM ( + SELECT table_none.id, table_none.ui AS ui1, table_gcd_codec.id, table_gcd_codec.ui AS ui2 + FROM table_none + JOIN table_gcd_codec ON table_none.id = table_gcd_codec.id +) +WHERE ui1 != ui2; From f5b3dbbe68046fbbd6eb86e1a07f5ce7b1cc4bd4 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Tue, 22 Aug 2023 16:22:47 +0000 Subject: [PATCH 0239/1687] Create new XML Document for preprocessed config --- src/Common/Config/ConfigProcessor.cpp | 20 +++++++++++++------- src/Common/Config/ConfigProcessor.h | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index cfbcd0bd258..996d1051bad 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -809,19 +809,25 @@ void ConfigProcessor::decryptEncryptedElements(LoadedConfig & loaded_config) #endif -void ConfigProcessor::hideElements(LoadedConfig & loaded_config) +XMLDocumentPtr ConfigProcessor::hideElements(LoadedConfig & loaded_config) { - Node * config_root = getRootNode(loaded_config.preprocessed_xml.get()); - hideRecursive(config_root); - // loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml); + XMLDocumentPtr new_preprocessed_xml = new Poco::XML::Document; + + for (Node * node = loaded_config.preprocessed_xml->firstChild(); node; node = node->nextSibling()) + { + Node * new_node = new_preprocessed_xml->importNode(node, true); + new_preprocessed_xml->appendChild(new_node); + } + Node * new_config_root = getRootNode(new_preprocessed_xml.get()); + hideRecursive(new_config_root); + + return new_preprocessed_xml; } void ConfigProcessor::savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir) { try { - hideElements(loaded_config); - if (preprocessed_path.empty()) { fs::path preprocessed_configs_path("preprocessed_configs/"); @@ -866,7 +872,7 @@ void ConfigProcessor::savePreprocessedConfig(LoadedConfig & loaded_config, std:: writer.setNewLine("\n"); writer.setIndent(" "); writer.setOptions(Poco::XML::XMLWriter::PRETTY_PRINT); - writer.writeNode(preprocessed_path, loaded_config.preprocessed_xml); + writer.writeNode(preprocessed_path, hideElements(loaded_config)); LOG_DEBUG(log, "Saved preprocessed configuration to '{}'.", preprocessed_path); } catch (Poco::Exception & e) diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 0d41372e796..6d993d8f58b 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -143,7 +143,7 @@ private: #endif void hideRecursive(Poco::XML::Node * config_root); - void hideElements(LoadedConfig & loaded_config); + XMLDocumentPtr hideElements(LoadedConfig & loaded_config); void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); From 176fdf3aa6970988ff069334af30b775dbf3939d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Tue, 22 Aug 2023 16:35:06 +0000 Subject: [PATCH 0240/1687] Erased args from codec_builder --- src/Compression/CompressionCodecGCD.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 3be30102b9c..9e3c9f77cd4 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -260,8 +260,8 @@ void registerCodecGCD(CompressionCodecFactory & factory) gcd_bytes_size = getGCDBytesSize(column_type); } - if (arguments && arguments->children.size() > 1) - throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "GCD codec must have 1 parameter, given {}", arguments->children.size()); + if (arguments && !arguments->children.empty()) + throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "GCD codec must have 0 parameters, given {}", arguments->children.size()); return std::make_shared(gcd_bytes_size); }; factory.registerCompressionCodecWithType("GCD", method_code, codec_builder); From 50e10a66cc3c42821081dc0d97020933263717df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Tue, 22 Aug 2023 17:12:01 +0000 Subject: [PATCH 0241/1687] Added Docs --- docs/en/sql-reference/statements/create/table.md | 4 ++++ docs/ru/sql-reference/statements/create/table.md | 1 + 2 files changed, 5 insertions(+) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 1a72f89fb1f..cb2cac89bc6 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -405,6 +405,10 @@ These codecs are designed to make compression more effective by using specific f `Gorilla(bytes_size)` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078). +#### GCD + +`GCD()` - Calculates GCD of all numbers, then it divides them by it. This codec is for data preparation and is not suitable for use without an additional codec. GCD-codec can be used with Integers(from 8 up to 256 bytes), Decimals and DateTime. A good use case would be to store timestamps or monetary values with high precision. + #### FPC `FPC(level, float_size)` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. Possible `level` values: 1-28, the default value is 12. Possible `float_size` values: 4, 8, the default value is `sizeof(type)` if type is Float. In all other cases, it’s 4. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf). diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index 64eae49be6c..3429b2fdfb1 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -241,6 +241,7 @@ ClickHouse поддерживает кодеки общего назначени - `Delta(delta_bytes)` — Метод, в котором исходные значения заменяются разностью двух соседних значений, за исключением первого значения, которое остаётся неизменным. Для хранения разниц используется до `delta_bytes`, т.е. `delta_bytes` — это максимальный размер исходных данных. Возможные значения `delta_bytes`: 1, 2, 4, 8. Значение по умолчанию для `delta_bytes` равно `sizeof(type)`, если результат 1, 2, 4, or 8. Во всех других случаях — 1. - `DoubleDelta` — Вычисляется разницу от разниц и сохраняет её в компакном бинарном виде. Оптимальная степень сжатия достигается для монотонных последовательностей с постоянным шагом, наподобие временных рядов. Можно использовать с любым типом данных фиксированного размера. Реализует алгоритм, используемый в TSDB Gorilla, поддерживает 64-битные типы данных. Использует 1 дополнительный бит для 32-байтовых значений: 5-битные префиксы вместо 4-битных префиксов. Подробнее читайте в разделе «Compressing Time Stamps» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). - `Gorilla` — Вычисляет XOR между текущим и предыдущим значением и записывает результат в компактной бинарной форме. Еффективно сохраняет ряды медленно изменяющихся чисел с плавающей запятой, поскольку наилучший коэффициент сжатия достигается, если соседние значения одинаковые. Реализует алгоритм, используемый в TSDB Gorilla, адаптируя его для работы с 64-битными значениями. Подробнее читайте в разделе «Compressing Values» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +- `GCD()` - Вычисляет НОД всех чисел, а затем делит их на него. Этот кодек предназначен для подготовки данных и не подходит для использования без дополнительного кодека. GCD-кодек может использоваться с Integer, Decimal и DateTime. Хорошим вариантом использования было бы хранение временных меток или денежных значений с высокой точностью. - `T64` — Метод сжатия который обрезает неиспользуемые старшие биты целочисленных значений (включая `Enum`, `Date` и `DateTime`). На каждом шаге алгоритма, кодек помещает блок из 64 значений в матрицу 64✕64, транспонирует её, обрезает неиспользуемые биты, а то, что осталось возвращает в виде последовательности. Неиспользуемые биты, это биты, которые не изменяются от минимального к максимальному на всём диапазоне значений куска данных. Кодеки `DoubleDelta` и `Gorilla` используются в TSDB Gorilla как компоненты алгоритма сжатия. Подход Gorilla эффективен в сценариях, когда данные представляют собой медленно изменяющиеся во времени величины. Метки времени эффективно сжимаются кодеком `DoubleDelta`, а значения кодеком `Gorilla`. Например, чтобы создать эффективно хранящуюся таблицу, используйте следующую конфигурацию: From 2e3004d2b6f0d9514fcc545ea40c2e48d64ac040 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Tue, 22 Aug 2023 17:17:31 +0000 Subject: [PATCH 0242/1687] Test-rename --- .../{02863_gcd_codec.reference => 02864_gcd_codec.reference} | 0 .../0_stateless/{02863_gcd_codec.sql => 02864_gcd_codec.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{02863_gcd_codec.reference => 02864_gcd_codec.reference} (100%) rename tests/queries/0_stateless/{02863_gcd_codec.sql => 02864_gcd_codec.sql} (100%) diff --git a/tests/queries/0_stateless/02863_gcd_codec.reference b/tests/queries/0_stateless/02864_gcd_codec.reference similarity index 100% rename from tests/queries/0_stateless/02863_gcd_codec.reference rename to tests/queries/0_stateless/02864_gcd_codec.reference diff --git a/tests/queries/0_stateless/02863_gcd_codec.sql b/tests/queries/0_stateless/02864_gcd_codec.sql similarity index 100% rename from tests/queries/0_stateless/02863_gcd_codec.sql rename to tests/queries/0_stateless/02864_gcd_codec.sql From 67277c1db1b0adf9424d2b9d25cd5bb7aaadae06 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 22 Aug 2023 22:45:23 +0200 Subject: [PATCH 0243/1687] Reasonable prefetches for remote FS by default --- src/Core/Settings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 136d5aa872d..b81367c7bec 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -709,7 +709,7 @@ class IColumn; \ M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \ M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \ - M(Bool, allow_prefetched_read_pool_for_remote_filesystem, false, "Prefer prefethed threadpool if all parts are on remote filesystem", 0) \ + M(Bool, allow_prefetched_read_pool_for_remote_filesystem, true, "Prefer prefethed threadpool if all parts are on remote filesystem", 0) \ M(Bool, allow_prefetched_read_pool_for_local_filesystem, false, "Prefer prefethed threadpool if all parts are on remote filesystem", 0) \ \ M(UInt64, prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the prefetch buffer to read from the filesystem.", 0) \ @@ -717,7 +717,7 @@ class IColumn; M(UInt64, filesystem_prefetch_step_marks, 0, "Prefetch step in marks. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \ M(UInt64, filesystem_prefetch_min_bytes_for_single_read_task, "8Mi", "Do not parallelize within one file read less than this amount of bytes. E.g. one reader will not receive a read task of size less than this amount. This setting is recommended to avoid spikes of time for aws getObject requests to aws", 0) \ M(UInt64, filesystem_prefetch_max_memory_usage, "1Gi", "Maximum memory usage for prefetches. Zero means unlimited", 0) \ - M(UInt64, filesystem_prefetches_limit, 0, "Maximum number of prefetches. Zero means unlimited. A setting `filesystem_prefetches_max_memory_usage` is more recommended if you want to limit the number of prefetches", 0) \ + M(UInt64, filesystem_prefetches_limit, 200, "Maximum number of prefetches. Zero means unlimited. A setting `filesystem_prefetches_max_memory_usage` is more recommended if you want to limit the number of prefetches", 0) \ \ M(UInt64, use_structure_from_insertion_table_in_table_functions, 2, "Use structure from insertion table instead of schema inference from data. Possible values: 0 - disabled, 1 - enabled, 2 - auto", 0) \ \ From a291444b30d37e2f509e64b0ba9688c1a5e3270a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= <47687537+seshWCS@users.noreply.github.com> Date: Wed, 23 Aug 2023 00:33:14 +0300 Subject: [PATCH 0244/1687] Update table.md --- docs/en/sql-reference/statements/create/table.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index cb2cac89bc6..0f8d0b5f6bf 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -407,7 +407,7 @@ These codecs are designed to make compression more effective by using specific f #### GCD -`GCD()` - Calculates GCD of all numbers, then it divides them by it. This codec is for data preparation and is not suitable for use without an additional codec. GCD-codec can be used with Integers(from 8 up to 256 bytes), Decimals and DateTime. A good use case would be to store timestamps or monetary values with high precision. +`GCD(bytes_size)` - Calculates GCD of all numbers, then it divides them by it. This codec is for data preparation and is not suitable for use without an additional codec. GCD-codec can be used with Integers, Decimals and DateTime. Possible `bytes_size` values: 1, 2, 4, 8, 16, 32 the default value is `sizeof(type)` if equal to 1, 2, 4, 8, 16, or 32. A good use case would be to store timestamps or monetary values with high precision. #### FPC From e8a3914baaa1fa78d96d54197cb3fd763086bc7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= <47687537+seshWCS@users.noreply.github.com> Date: Wed, 23 Aug 2023 00:34:26 +0300 Subject: [PATCH 0245/1687] Update table.md --- docs/ru/sql-reference/statements/create/table.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index 3429b2fdfb1..947b6a41089 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -241,7 +241,7 @@ ClickHouse поддерживает кодеки общего назначени - `Delta(delta_bytes)` — Метод, в котором исходные значения заменяются разностью двух соседних значений, за исключением первого значения, которое остаётся неизменным. Для хранения разниц используется до `delta_bytes`, т.е. `delta_bytes` — это максимальный размер исходных данных. Возможные значения `delta_bytes`: 1, 2, 4, 8. Значение по умолчанию для `delta_bytes` равно `sizeof(type)`, если результат 1, 2, 4, or 8. Во всех других случаях — 1. - `DoubleDelta` — Вычисляется разницу от разниц и сохраняет её в компакном бинарном виде. Оптимальная степень сжатия достигается для монотонных последовательностей с постоянным шагом, наподобие временных рядов. Можно использовать с любым типом данных фиксированного размера. Реализует алгоритм, используемый в TSDB Gorilla, поддерживает 64-битные типы данных. Использует 1 дополнительный бит для 32-байтовых значений: 5-битные префиксы вместо 4-битных префиксов. Подробнее читайте в разделе «Compressing Time Stamps» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). - `Gorilla` — Вычисляет XOR между текущим и предыдущим значением и записывает результат в компактной бинарной форме. Еффективно сохраняет ряды медленно изменяющихся чисел с плавающей запятой, поскольку наилучший коэффициент сжатия достигается, если соседние значения одинаковые. Реализует алгоритм, используемый в TSDB Gorilla, адаптируя его для работы с 64-битными значениями. Подробнее читайте в разделе «Compressing Values» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). -- `GCD()` - Вычисляет НОД всех чисел, а затем делит их на него. Этот кодек предназначен для подготовки данных и не подходит для использования без дополнительного кодека. GCD-кодек может использоваться с Integer, Decimal и DateTime. Хорошим вариантом использования было бы хранение временных меток или денежных значений с высокой точностью. +- `GCD` - Вычисляет НОД всех чисел, а затем делит их на него. Этот кодек предназначен для подготовки данных и не подходит для использования без дополнительного кодека. GCD-кодек может использоваться с Integer, Decimal и DateTime. Хорошим вариантом использования было бы хранение временных меток или денежных значений с высокой точностью. - `T64` — Метод сжатия который обрезает неиспользуемые старшие биты целочисленных значений (включая `Enum`, `Date` и `DateTime`). На каждом шаге алгоритма, кодек помещает блок из 64 значений в матрицу 64✕64, транспонирует её, обрезает неиспользуемые биты, а то, что осталось возвращает в виде последовательности. Неиспользуемые биты, это биты, которые не изменяются от минимального к максимальному на всём диапазоне значений куска данных. Кодеки `DoubleDelta` и `Gorilla` используются в TSDB Gorilla как компоненты алгоритма сжатия. Подход Gorilla эффективен в сценариях, когда данные представляют собой медленно изменяющиеся во времени величины. Метки времени эффективно сжимаются кодеком `DoubleDelta`, а значения кодеком `Gorilla`. Например, чтобы создать эффективно хранящуюся таблицу, используйте следующую конфигурацию: From 480d2df4e8bdf63953e3585124cb5aed4ce666ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Tue, 22 Aug 2023 21:39:29 +0000 Subject: [PATCH 0246/1687] Added params --- src/Compression/CompressionCodecGCD.cpp | 23 +++++++++++++++++++---- tests/performance/codec_gcd.xml | 9 +++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 tests/performance/codec_gcd.xml diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 9e3c9f77cd4..0207e298174 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -45,8 +45,9 @@ namespace ErrorCodes { extern const int CANNOT_COMPRESS; extern const int CANNOT_DECOMPRESS; - extern const int BAD_ARGUMENTS; extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; + extern const int ILLEGAL_CODEC_PARAMETER; + extern const int BAD_ARGUMENTS; } UInt32 CompressionCodecGCD::getMaxCompressedDataSize(UInt32 uncompressed_size) const @@ -255,13 +256,27 @@ void registerCodecGCD(CompressionCodecFactory & factory) { /// Default bytes size is 1. UInt8 gcd_bytes_size = 1; - if (column_type) + + if (arguments && !arguments->children.empty()) + { + if (arguments->children.size() > 1) + throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "GCD codec must have 1 parameter, given {}", arguments->children.size()); + + const auto children = arguments->children; + const auto * literal = children[0]->as(); + if (!literal || literal->value.getType() != Field::Types::Which::UInt64) + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD codec argument must be unsigned integer"); + + size_t user_bytes_size = literal->value.safeGet(); + if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8 && user_bytes_size != 16 && user_bytes_size != 32) + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "GCD value for gcd codec can be 1, 2, 4, 8, 16 or 32, given {}", user_bytes_size); + gcd_bytes_size = static_cast(user_bytes_size); + } + else if (column_type) { gcd_bytes_size = getGCDBytesSize(column_type); } - if (arguments && !arguments->children.empty()) - throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "GCD codec must have 0 parameters, given {}", arguments->children.size()); return std::make_shared(gcd_bytes_size); }; factory.registerCompressionCodecWithType("GCD", method_code, codec_builder); diff --git a/tests/performance/codec_gcd.xml b/tests/performance/codec_gcd.xml new file mode 100644 index 00000000000..e6caa4beb82 --- /dev/null +++ b/tests/performance/codec_gcd.xml @@ -0,0 +1,9 @@ + + CREATE TABLE gcd_codec (n UInt64 CODEC(GCD, LZ4)) ENGINE = MergeTree ORDER BY tuple() + + INSERT INTO gcd_codec SELECT number FROM system.numbers LIMIT 20000000 SETTINGS max_threads=1 + INSERT INTO gcd_codec SELECT number*1000 FROM system.numbers LIMIT 20000000 SETTINGS max_threads=1 + INSERT INTO gcd_codec SELECT intHash64(number) FROM system.numbers LIMIT 20000000 SETTINGS max_threads=1 + + DROP TABLE gcd_codec + \ No newline at end of file From f9e30b0e8b7d95fc9ededa63547432291afa50ae Mon Sep 17 00:00:00 2001 From: VanDarkholme7 <980181529@qq.com> Date: Wed, 23 Aug 2023 02:54:57 +0000 Subject: [PATCH 0247/1687] empty commit to restart CI checks From 3950dc4619145d57c15ecba0b972a1cebe7e3ce7 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 23 Aug 2023 07:47:28 +0000 Subject: [PATCH 0248/1687] Add test for hidden attributes --- .../test_config_hidden_attributes/__init__.py | 0 .../configs/config.xml | 3 +++ .../test_config_hidden_attributes/test.py | 25 +++++++++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 tests/integration/test_config_hidden_attributes/__init__.py create mode 100644 tests/integration/test_config_hidden_attributes/configs/config.xml create mode 100644 tests/integration/test_config_hidden_attributes/test.py diff --git a/tests/integration/test_config_hidden_attributes/__init__.py b/tests/integration/test_config_hidden_attributes/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_config_hidden_attributes/configs/config.xml b/tests/integration/test_config_hidden_attributes/configs/config.xml new file mode 100644 index 00000000000..7d622ef7bbc --- /dev/null +++ b/tests/integration/test_config_hidden_attributes/configs/config.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_config_hidden_attributes/test.py b/tests/integration/test_config_hidden_attributes/test.py new file mode 100644 index 00000000000..f7109022abd --- /dev/null +++ b/tests/integration/test_config_hidden_attributes/test.py @@ -0,0 +1,25 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", main_configs=["configs/config.xml"]) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_hidden(started_cluster): + assert ( + node.query( + "select value from system.server_settings where name ='max_table_size_to_drop'" + ) + == "60000000000\n" + ) From 83d4b819f3fe8076e98fdc5471b4cb9d8fb87428 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 23 Aug 2023 08:10:30 +0000 Subject: [PATCH 0249/1687] Better support for reading from archives --- src/IO/Archives/LibArchiveReader.cpp | 2 + src/IO/Archives/ZipArchiveReader.cpp | 2 + src/Processors/Sources/ShellCommandSource.cpp | 1 + src/Storages/StorageFile.cpp | 306 +++++++++++------- src/Storages/StorageFile.h | 20 +- src/TableFunctions/TableFunctionFile.cpp | 7 +- 6 files changed, 209 insertions(+), 129 deletions(-) diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp index 2b7a4cca5de..a411b4bb4b6 100644 --- a/src/IO/Archives/LibArchiveReader.cpp +++ b/src/IO/Archives/LibArchiveReader.cpp @@ -231,6 +231,8 @@ public: String getFileName() const override { return handle.getFileName(); } + size_t getFileSize() override { return handle.getFileInfo().uncompressed_size; } + Handle releaseHandle() && { return std::move(handle); diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index a19c7abf8dd..fd7a09c4f20 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -312,6 +312,8 @@ public: String getFileName() const override { return handle.getFileName(); } + size_t getFileSize() override { return handle.getFileInfo().uncompressed_size; } + /// Releases owned handle to pass it to an enumerator. HandleHolder releaseHandle() && { diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 3ba9ebb11de..b24f7257c3a 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -553,6 +553,7 @@ Pipe ShellCommandSourceCoordinator::createPipe( ContextPtr context, const ShellCommandSourceConfiguration & source_configuration) { + std::cout << "Creating pipe " << std::endl; ShellCommand::Config command_config(command); command_config.arguments = arguments; for (size_t i = 1; i < input_pipes.size(); ++i) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 6ec40b86c05..0ecb3a9826b 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -46,6 +46,7 @@ #include #include #include +#include "IO/ReadBufferFromFileBase.h" #include #include @@ -380,33 +381,9 @@ std::unique_ptr createReadBuffer( bool use_table_fd, int table_fd, const String & compression_method, - ContextPtr context, - const String & path_to_archive = "") + ContextPtr context) { CompressionMethod method; - - if (!path_to_archive.empty()) - { - auto reader = createArchiveReader(path_to_archive); - - if (current_path.find_first_of("*?{") != std::string::npos) - { - auto matcher = std::make_shared(makeRegexpPatternFromGlobs(current_path)); - if (!matcher->ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", current_path, matcher->error()); - - return reader->readFile([my_matcher = std::move(matcher)](const std::string & path) - { - return re2::RE2::FullMatch(path, *my_matcher); - }, /*throw_on_not_found=*/true); - } - else - { - return reader->readFile(current_path, /*throw_on_not_found=*/true); - } - } - if (use_table_fd) method = chooseCompressionMethod("", compression_method); else @@ -496,7 +473,7 @@ ColumnsDescription StorageFile::getTableStructureFromFile( const String & compression_method, const std::optional & format_settings, ContextPtr context, - const std::vector & paths_to_archive) + const std::optional & archive_info) { if (format == "Distributed") { @@ -506,7 +483,8 @@ ColumnsDescription StorageFile::getTableStructureFromFile( return ColumnsDescription(DistributedAsyncInsertSource(paths[0]).getOutputs().front().getHeader().getNamesAndTypesList()); } - if (paths.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) + if (((archive_info && archive_info->paths_to_archive.empty()) || (!archive_info && paths.empty())) + && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file, because there are no files with provided path. " @@ -517,7 +495,87 @@ ColumnsDescription StorageFile::getTableStructureFromFile( columns_from_cache = tryGetColumnsFromCache(paths, format, format_settings, context); ReadBufferIterator read_buffer_iterator; - if (paths_to_archive.empty()) + if (archive_info.has_value()) + { + read_buffer_iterator = [&, + read_files = std::unordered_set(), + archive_it = archive_info->paths_to_archive.begin(), + first = true](ColumnsDescription &) mutable -> std::unique_ptr + { + std::unique_ptr read_buf; + struct stat file_stat; + while (true) + { + if (archive_it == archive_info->paths_to_archive.end()) + { + if (first) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", + format); + return nullptr; + } + + file_stat = getFileStat(*archive_it, false, -1, "File"); + if (file_stat.st_size == 0) + { + if (context->getSettingsRef().engine_file_skip_empty_files) + { + ++archive_it; + continue; + } + + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because the archive {} is empty. " + "You must specify table structure manually", + format, + *archive_it); + } + + auto archive_reader = createArchiveReader(*archive_it); + + if (archive_info->filter) + { + auto file_enumerator = archive_reader->firstFile(); + while (true) + { + if (!file_enumerator) + { + ++archive_it; + read_files.clear(); + break; + } + + const auto & filename = file_enumerator->getFileName(); + if (read_files.contains(filename) || !archive_info->filter(filename)) + { + file_enumerator->nextFile(); + continue; + } + + read_files.insert(filename); + read_buf = archive_reader->readFile(std::move(file_enumerator)); + break; + } + + if (!read_buf) + continue; + } + else + { + read_buf = archive_reader->readFile(archive_info->path_in_archive, /*throw_on_not_found=*/false); + ++archive_it; + } + + break; + } + + first = false; + return read_buf; + }; + } + else { read_buffer_iterator = [&, it = paths.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr { @@ -544,34 +602,6 @@ ColumnsDescription StorageFile::getTableStructureFromFile( return createReadBuffer(path, file_stat, false, -1, compression_method, context); }; } - else - { - read_buffer_iterator = [&, path_it = paths.begin(), archive_it = paths_to_archive.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr - { - String path; - struct stat file_stat; - do - { - if (archive_it == paths_to_archive.end()) - { - if (first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", - format); - return nullptr; - } - - path = *archive_it++; - file_stat = getFileStat(path, false, -1, "File"); - } - while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0); - - first = false; - return createReadBuffer(*path_it, file_stat, false, -1, compression_method, context, path); - - }; - } ColumnsDescription columns; if (columns_from_cache) @@ -580,7 +610,7 @@ ColumnsDescription StorageFile::getTableStructureFromFile( columns = readSchemaFromFormat(format, format_settings, read_buffer_iterator, paths.size() > 1, context); if (context->getSettingsRef().schema_inference_use_cache_for_file) - addColumnsToCache(paths, columns, format, format_settings, context); + addColumnsToCache(archive_info ? archive_info->paths_to_archive : paths, columns, format, format_settings, context); return columns; } @@ -624,14 +654,9 @@ StorageFile::StorageFile(const std::string & table_path_, const std::string & us : StorageFile(args) { if (!args.path_to_archive.empty()) - { - paths_to_archive = getPathsList(args.path_to_archive, user_files_path, args.getContext(), total_bytes_to_read); - paths = {table_path_}; - } + archive_info = getArchiveInfo(args.path_to_archive, table_path_, user_files_path, args.getContext(), total_bytes_to_read); else - { paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read); - } is_db_table = false; is_path_with_globs = paths.size() > 1; @@ -687,7 +712,13 @@ void StorageFile::setStorageMetadata(CommonArguments args) columns = getTableStructureFromFileDescriptor(args.getContext()); else { - columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext(), paths_to_archive); + columns = getTableStructureFromFile( + format_name, + paths, + compression_method, + format_settings, + args.getContext(), + archive_info); if (!args.columns.empty() && args.columns != columns) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Table structure and file structure are different"); } @@ -721,14 +752,14 @@ public: { public: explicit FilesIterator( - const Strings & files_, std::vector archives_, const IArchiveReader::NameFilter & name_filter_) - : files(files_), archives(std::move(archives_)), name_filter(name_filter_) + const Strings & files_, std::optional archive_info_) + : files(files_), archive_info(std::move(archive_info_)) { } String next() { - const auto & fs = fromArchive() ? archives : files; + const auto & fs = readFromArchive() ? archive_info->paths_to_archive : files; auto current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= fs.size()) @@ -737,35 +768,32 @@ public: return fs[current_index]; } - bool fromArchive() const + bool readFromArchive() const { - return !archives.empty(); + return archive_info.has_value(); + } + + bool validFileInArchive(const std::string & path) const + { + return archive_info->filter(path); } bool readSingleFileFromArchive() const { - return !name_filter; + return !archive_info->filter; } - bool passesFilter(const std::string & name) const + const String & getFileNameInArchive() { - std::lock_guard lock(filter_mutex); - return name_filter(name); - } + if (archive_info->path_in_archive.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only 1 filename but it's empty"); - const String & getFileName() - { - if (files.size() != 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only 1 filename but got {}", files.size()); - - return files[0]; + return archive_info->path_in_archive; } private: std::vector files; - std::vector archives; - mutable std::mutex filter_mutex; - IArchiveReader::NameFilter name_filter; + std::optional archive_info; std::atomic index = 0; }; @@ -876,7 +904,7 @@ public: { if (!storage->use_table_fd) { - if (files_iterator->fromArchive()) + if (files_iterator->readFromArchive()) { if (files_iterator->readSingleFileFromArchive()) { @@ -889,7 +917,8 @@ public: continue; archive_reader = createArchiveReader(archive); - current_path = files_iterator->getFileName(); + filename = files_iterator->getFileNameInArchive(); + read_buf = archive_reader->readFile(current_path, /*throw_on_not_found=*/false); if (!read_buf) continue; @@ -914,7 +943,7 @@ public: } bool file_found = true; - while (!files_iterator->passesFilter(file_enumerator->getFileName())) + while (!files_iterator->validFileInArchive(file_enumerator->getFileName())) { if (!file_enumerator->nextFile()) { @@ -925,7 +954,7 @@ public: if (file_found) { - current_path = file_enumerator->getFileName(); + filename = file_enumerator->getFileName(); break; } @@ -935,12 +964,17 @@ public: chassert(file_enumerator); read_buf = archive_reader->readFile(std::move(file_enumerator)); } + + current_path = fmt::format("{}::{}", archive_reader->getPath(), filename); } else { current_path = files_iterator->next(); if (current_path.empty()) return {}; + + size_t last_slash_pos = current_path.find_last_of('/'); + filename = current_path.substr(last_slash_pos + 1); } /// Special case for distributed format. Defaults are not needed here. @@ -965,7 +999,14 @@ public: const Settings & settings = context->getSettingsRef(); chassert(!storage->paths.empty()); - const auto max_parsing_threads = std::max(settings.max_threads/ storage->paths.size(), 1UL); + + size_t file_num = 0; + if (storage->archive_info) + file_num = storage->archive_info->paths_to_archive.size(); + else + file_num = storage->paths.size(); + + const auto max_parsing_threads = std::max(settings.max_threads / file_num, 1UL); input_format = context->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, storage->format_settings, max_parsing_threads); input_format->setQueryInfo(query_info, context); @@ -1006,15 +1047,9 @@ public: for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") - { chunk.addColumn(virtual_column.type->createColumnConst(num_rows, current_path)->convertToFullColumnIfConst()); - } else if (virtual_column.name == "_file") - { - size_t last_slash_pos = current_path.find_last_of('/'); - auto file_name = current_path.substr(last_slash_pos + 1); - chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_name)->convertToFullColumnIfConst()); - } + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, filename)->convertToFullColumnIfConst()); } return chunk; @@ -1029,7 +1064,7 @@ public: pipeline.reset(); input_format.reset(); - if (files_iterator->fromArchive() && !files_iterator->readSingleFileFromArchive()) + if (files_iterator->readFromArchive() && !files_iterator->readSingleFileFromArchive()) file_enumerator = archive_reader->nextFile(std::move(read_buf)); read_buf.reset(); @@ -1044,6 +1079,7 @@ private: StorageSnapshotPtr storage_snapshot; FilesIteratorPtr files_iterator; String current_path; + String filename; Block sample_block; std::unique_ptr read_buf; InputFormatPtr input_format; @@ -1083,44 +1119,33 @@ Pipe StorageFile::read( } else { - const auto & p = paths_to_archive.empty() ? paths : paths_to_archive; - if (p.size() == 1 && !fs::exists(p[0])) + const std::string * p; + + if (archive_info.has_value()) + p = archive_info->paths_to_archive.data(); + else + p = paths.data(); + + if (p->size() == 1 && !fs::exists(*p)) { if (context->getSettingsRef().engine_file_empty_if_not_exists) return Pipe(std::make_shared(storage_snapshot->getSampleBlockForColumns(column_names))); else - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", p[0]); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", *p); } } - IArchiveReader::NameFilter filter; - if (!paths_to_archive.empty()) - { - if (paths.size() != 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Multiple paths defined for reading from archive"); - - const auto & path = paths[0]; - - if (path.find_first_of("*?{") != std::string::npos) - { - auto matcher = std::make_shared(makeRegexpPatternFromGlobs(path)); - if (!matcher->ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", path, matcher->error()); - - filter = [matcher](const std::string & p) - { - return re2::RE2::FullMatch(p, *matcher); - }; - } - } - - auto files_iterator = std::make_shared(paths, paths_to_archive, std::move(filter)); + auto files_iterator = std::make_shared(paths, archive_info); auto this_ptr = std::static_pointer_cast(shared_from_this()); size_t num_streams = max_num_streams; - auto files_to_read = std::max(paths_to_archive.size(), paths.size()); + size_t files_to_read = 0; + if (archive_info) + files_to_read = archive_info->paths_to_archive.size(); + else + files_to_read = paths.size(); + if (max_num_streams > files_to_read) num_streams = files_to_read; @@ -1403,7 +1428,7 @@ SinkToStoragePtr StorageFile::write( ContextPtr context, bool /*async_insert*/) { - if (!use_table_fd && !paths_to_archive.empty()) + if (!use_table_fd && archive_info.has_value()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Writing to archives is not supported"); if (format_name == "Distributed") @@ -1750,4 +1775,37 @@ void StorageFile::parseFileSource(String source, String & filename, String & pat filename = filename_view; } +StorageFile::ArchiveInfo StorageFile::getArchiveInfo( + const std::string & path_to_archive, + const std::string & file_in_archive, + const std::string & user_files_path, + ContextPtr context, + size_t & total_bytes_to_read +) +{ + ArchiveInfo archive_info; + + if (file_in_archive.find_first_of("*?{") != std::string::npos) + { + auto matcher = std::make_shared(makeRegexpPatternFromGlobs(file_in_archive)); + if (!matcher->ok()) + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", file_in_archive, matcher->error()); + + archive_info.filter = [matcher, matcher_mutex = std::make_shared()](const std::string & p) mutable + { + std::lock_guard lock(*matcher_mutex); + return re2::RE2::FullMatch(p, *matcher); + }; + } + else + { + archive_info.path_in_archive = file_in_archive; + } + + archive_info.paths_to_archive = getPathsList(path_to_archive, user_files_path, context, total_bytes_to_read); + + return archive_info; +} + } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index b5c23d87bba..e479b767f83 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -83,6 +84,13 @@ public: bool supportsPartitionBy() const override { return true; } + struct ArchiveInfo + { + std::vector paths_to_archive; + std::string path_in_archive; // used when reading a single file from archive + IArchiveReader::NameFilter filter; // used when files inside archive are defined with a glob + }; + ColumnsDescription getTableStructureFromFileDescriptor(ContextPtr context); static ColumnsDescription getTableStructureFromFile( @@ -91,12 +99,19 @@ public: const String & compression_method, const std::optional & format_settings, ContextPtr context, - const std::vector & paths_to_archive = {"auto"}); + const std::optional & archive_info = std::nullopt); static SchemaCache & getSchemaCache(const ContextPtr & context); static void parseFileSource(String source, String & filename, String & path_to_archive); + static ArchiveInfo getArchiveInfo( + const std::string & path_to_archive, + const std::string & file_in_archive, + const std::string & user_files_path, + ContextPtr context, + size_t & total_bytes_to_read); + protected: friend class StorageFileSource; friend class StorageFileSink; @@ -126,7 +141,8 @@ private: std::string base_path; std::vector paths; - std::vector paths_to_archive; + + std::optional archive_info; bool is_db_table = true; /// Table is stored in real database, not user's file bool use_table_fd = false; /// Use table_fd instead of path diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 56a6839ddbb..f75c56e6523 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -97,13 +97,14 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context size_t total_bytes_to_read = 0; Strings paths; - Strings paths_to_archives; + std::optional archive_info; if (path_to_archive.empty()) paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); else - paths_to_archives = StorageFile::getPathsList(path_to_archive, context->getUserFilesPath(), context, total_bytes_to_read); + archive_info + = StorageFile::getArchiveInfo(path_to_archive, filename, context->getUserFilesPath(), context, total_bytes_to_read); - return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, paths_to_archives); + return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, archive_info); } From cceb2b2d570bf97de5421c17d8825d9df48139da Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 23 Aug 2023 08:12:05 +0000 Subject: [PATCH 0250/1687] Disable integration with async replication --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 1f3cd473e29..9f8fa390be3 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -4258,7 +4258,7 @@ class ClickHouseInstance: if len(self.custom_dictionaries_paths): write_embedded_config("0_common_enable_dictionaries.xml", self.config_d_dir) - write_embedded_config("0_common_enable_keeper_async_replication.xml", self.config_d_dir) + #write_embedded_config("0_common_enable_keeper_async_replication.xml", self.config_d_dir) logging.debug("Generate and write macros file") macros = self.macros.copy() From ee5d7b1d955b6c2a8d564b1085e0168a0ffbdb14 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 23 Aug 2023 08:22:54 +0000 Subject: [PATCH 0251/1687] Automatic style fix --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 9f8fa390be3..35a656aa4a4 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -4258,7 +4258,7 @@ class ClickHouseInstance: if len(self.custom_dictionaries_paths): write_embedded_config("0_common_enable_dictionaries.xml", self.config_d_dir) - #write_embedded_config("0_common_enable_keeper_async_replication.xml", self.config_d_dir) + # write_embedded_config("0_common_enable_keeper_async_replication.xml", self.config_d_dir) logging.debug("Generate and write macros file") macros = self.macros.copy() From 625be86dc0d254447429a8ed4737cca4bb7137e3 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 23 Aug 2023 08:59:41 +0000 Subject: [PATCH 0252/1687] Fix tests --- src/Coordination/tests/gtest_coordination.cpp | 210 +++++++++++++----- utils/keeper-data-dumper/main.cpp | 4 +- 2 files changed, 162 insertions(+), 52 deletions(-) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 08d31bf34f6..759304b40ba 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -298,7 +298,9 @@ TEST_P(CoordinationTest, ChangelogTestSimple) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello world", 77); changelog.append(entry); @@ -328,7 +330,9 @@ TEST_P(CoordinationTest, ChangelogTestFile) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello world", 77); changelog.append(entry); @@ -360,7 +364,9 @@ TEST_P(CoordinationTest, ChangelogReadWrite) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) @@ -375,7 +381,9 @@ TEST_P(CoordinationTest, ChangelogReadWrite) waitDurableLogs(changelog); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::FlushSettings(), + keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 10); EXPECT_EQ(changelog_reader.last_entry()->get_term(), changelog.last_entry()->get_term()); @@ -398,7 +406,9 @@ TEST_P(CoordinationTest, ChangelogWriteAt) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) { @@ -421,7 +431,9 @@ TEST_P(CoordinationTest, ChangelogWriteAt) EXPECT_EQ(changelog.next_slot(), 8); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::FlushSettings(), + keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), changelog.size()); @@ -438,7 +450,9 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 7; ++i) { @@ -455,7 +469,9 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 7); @@ -520,7 +536,9 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 3; ++i) @@ -570,7 +588,9 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_EQ(changelog.last_entry()->get_term(), 60); /// And we able to read it DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog_reader.init(7, 0); EXPECT_EQ(changelog_reader.size(), 1); @@ -586,7 +606,9 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperations) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) { @@ -602,7 +624,9 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperations) auto entries = changelog.pack(1, 5); DB::KeeperLogStore apply_changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); apply_changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) @@ -639,7 +663,9 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) nuraft::ptr entries; { DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) { @@ -658,7 +684,9 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) ChangelogDirTest test1("./logs1"); setLogDirectory("./logs1"); DB::KeeperLogStore changelog_new( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog_new.init(1, 0); EXPECT_EQ(changelog_new.size(), 0); @@ -681,7 +709,9 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) EXPECT_EQ(changelog_new.next_slot(), 11); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog_reader.init(5, 0); } @@ -693,7 +723,9 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) @@ -735,7 +767,9 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); DB::KeeperLogStore changelog_read( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog_read.init(1, 0); EXPECT_EQ(changelog_read.size(), 7); EXPECT_EQ(changelog_read.start_index(), 1); @@ -750,7 +784,9 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) @@ -792,7 +828,9 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); DB::KeeperLogStore changelog_read( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog_read.init(1, 0); EXPECT_EQ(changelog_read.size(), 11); EXPECT_EQ(changelog_read.start_index(), 1); @@ -807,7 +845,9 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) { @@ -855,7 +895,9 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -877,7 +919,9 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) EXPECT_FALSE(fs::exists("./logs/changelog_36_40.bin" + params.extension)); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog_reader.init(1, 0); auto entry = getLogEntry("36_hello_world", 360); @@ -924,7 +968,9 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) setLogDirectory(log_folder); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -949,7 +995,9 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) plain_buf.truncate(0); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog_reader.init(1, 0); changelog_reader.end_of_append_batch(0, 0); @@ -983,7 +1031,9 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) assertBrokenLogRemoved(log_folder, "changelog_31_35.bin" + params.extension); DB::KeeperLogStore changelog_reader2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog_reader2.init(1, 0); EXPECT_EQ(changelog_reader2.size(), 11); EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777); @@ -996,7 +1046,9 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1015,7 +1067,9 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) plain_buf.truncate(30); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::FlushSettings(), + keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 0); @@ -1031,7 +1085,9 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777); DB::KeeperLogStore changelog_reader2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1}, + DB::FlushSettings(), + keeper_context); changelog_reader2.init(1, 0); EXPECT_EQ(changelog_reader2.size(), 1); EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777); @@ -1044,7 +1100,9 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1061,7 +1119,9 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles) fs::remove("./logs/changelog_1_20.bin" + params.extension); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::FlushSettings(), + keeper_context); /// It should print error message, but still able to start changelog_reader.init(5, 0); assertBrokenLogRemoved("./logs", "changelog_21_40.bin" + params.extension); @@ -1074,7 +1134,9 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles2) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1095,7 +1157,9 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles2) fs::remove("./logs/changelog_21_30.bin" + params.extension); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, + DB::FlushSettings(), + keeper_context); /// It should print error message, but still able to start changelog_reader.init(5, 0); EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + params.extension)); @@ -1555,6 +1619,7 @@ void testLogAndStateMachine( DB::KeeperLogStore changelog( DB::LogFileSettings{ .force_sync = true, .compress_logs = enable_compression, .rotate_interval = settings->rotate_log_storage_interval}, + DB::FlushSettings(), keeper_context); changelog.init(state_machine->last_commit_index() + 1, settings->reserved_log_items); for (size_t i = 1; i < total_logs + 1; ++i) @@ -1599,6 +1664,7 @@ void testLogAndStateMachine( DB::KeeperLogStore restore_changelog( DB::LogFileSettings{ .force_sync = true, .compress_logs = enable_compression, .rotate_interval = settings->rotate_log_storage_interval}, + DB::FlushSettings(), keeper_context); restore_changelog.init(restore_machine->last_commit_index() + 1, settings->reserved_log_items); @@ -1851,7 +1917,9 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) setLogDirectory("./logs"); { DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -1870,7 +1938,9 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + params.extension)); DB::KeeperLogStore changelog_1( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, + DB::FlushSettings(), + keeper_context); changelog_1.init(0, 50); for (size_t i = 0; i < 55; ++i) { @@ -1887,7 +1957,9 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + params.extension)); DB::KeeperLogStore changelog_2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 7}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 7}, + DB::FlushSettings(), + keeper_context); changelog_2.init(98, 55); for (size_t i = 0; i < 17; ++i) @@ -1911,7 +1983,9 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin" + params.extension)); DB::KeeperLogStore changelog_3( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::FlushSettings(), + keeper_context); changelog_3.init(116, 3); for (size_t i = 0; i < 17; ++i) { @@ -1961,7 +2035,9 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) ChangelogDirTest logs("./logs"); setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -1976,7 +2052,9 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) waitDurableLogs(changelog); DB::KeeperLogStore changelog1( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog1.init(0, 3); for (size_t i = 55; i < 70; ++i) { @@ -1990,7 +2068,9 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) waitDurableLogs(changelog1); DB::KeeperLogStore changelog2( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog2.init(0, 3); for (size_t i = 70; i < 80; ++i) { @@ -2056,7 +2136,10 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) setLogDirectory("./logs"); { LOG_INFO(log, "================First time====================="); - DB::KeeperLogStore changelog(DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::KeeperLogStore changelog( + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2068,7 +2151,9 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) { LOG_INFO(log, "================Second time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2080,7 +2165,9 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) { LOG_INFO(log, "================Third time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2092,7 +2179,9 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) { LOG_INFO(log, "================Fourth time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2112,7 +2201,9 @@ TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth) { LOG_INFO(log, "================First time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); for (size_t j = 0; j < 7; ++j) { @@ -2124,7 +2215,9 @@ TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth) } DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog.init(1, 0); EXPECT_EQ(changelog.next_slot(), 36 * 7 + 1); } @@ -2137,7 +2230,9 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) { LOG_INFO(log, "================First time====================="); DB::KeeperLogStore changelog1( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog1.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog1.append(entry); @@ -2149,7 +2244,9 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) { LOG_INFO(log, "================Second time====================="); DB::KeeperLogStore changelog2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog2.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog2.append(entry); @@ -2161,7 +2258,9 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) { LOG_INFO(log, "================Third time====================="); DB::KeeperLogStore changelog3( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog3.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog3.append(entry); @@ -2173,7 +2272,9 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) { LOG_INFO(log, "================Fourth time====================="); DB::KeeperLogStore changelog4( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog4.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog4.append(entry); @@ -2235,7 +2336,9 @@ TEST_P(CoordinationTest, TestLogGap) setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -2248,7 +2351,9 @@ TEST_P(CoordinationTest, TestLogGap) } DB::KeeperLogStore changelog1( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, keeper_context); + DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::FlushSettings(), + keeper_context); changelog1.init(61, 3); /// Logs discarded @@ -2597,6 +2702,7 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) DB::KeeperLogStore changelog( DB::LogFileSettings{ .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20, .max_size = 50 * 1024 * 1024}, + DB::FlushSettings(), keeper_context); changelog.init(1, 0); @@ -2616,6 +2722,7 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) DB::KeeperLogStore changelog( DB::LogFileSettings{ .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000}, + DB::FlushSettings(), keeper_context); changelog.init(1, 0); @@ -2637,6 +2744,7 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) DB::KeeperLogStore changelog( DB::LogFileSettings{ .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000}, + DB::FlushSettings(), keeper_context); changelog.init(1, 0); ASSERT_EQ(changelog.entry_at(last_entry_index)->get_term(), (i - 1 + 44) * 10); diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 9f089a26360..aa8c0efbb26 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -74,7 +74,9 @@ int main(int argc, char *argv[]) LOG_INFO(logger, "Last committed index: {}", last_commited_index); DB::KeeperLogStore changelog( - LogFileSettings{.force_sync = true, .compress_logs = settings->compress_logs, .rotate_interval = 10000000}, keeper_context); + LogFileSettings{.force_sync = true, .compress_logs = settings->compress_logs, .rotate_interval = 10000000}, + FlushSettings(), + keeper_context); changelog.init(last_commited_index, 10000000000UL); /// collect all logs if (changelog.size() == 0) LOG_INFO(logger, "Changelog empty"); From fd51839b97ebd219d8d47b948f99dbe2e1ee7f04 Mon Sep 17 00:00:00 2001 From: priera Date: Tue, 22 Aug 2023 14:47:57 +0200 Subject: [PATCH 0253/1687] basic implementation --- src/Storages/StorageReplicatedMergeTree.cpp | 5 ++++- ..._replace_partition_from_table_zookeeper.reference | 3 +++ .../00626_replace_partition_from_table_zookeeper.sh | 12 ++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 76a2ad9883c..1c52fe9fe49 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7568,6 +7568,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); + std::set replaced_parts; for (const auto & src_part : src_all_parts) { /// We also make some kind of deduplication to avoid duplicated parts in case of ATTACH PARTITION @@ -7580,13 +7581,15 @@ void StorageReplicatedMergeTree::replacePartitionFrom( "' has inconsistent granularity with table", partition_id, src_part->name); String hash_hex = src_part->checksums.getTotalChecksumHex(); + const bool is_duplicated_part = replaced_parts.contains(hash_hex); + replaced_parts.insert(hash_hex); if (replace) LOG_INFO(log, "Trying to replace {} with hash_hex {}", src_part->name, hash_hex); else LOG_INFO(log, "Trying to attach {} with hash_hex {}", src_part->name, hash_hex); - String block_id_path = replace ? "" : (fs::path(zookeeper_path) / "blocks" / (partition_id + "_replace_from_" + hash_hex)); + String block_id_path = (replace || is_duplicated_part) ? "" : (fs::path(zookeeper_path) / "blocks" / (partition_id + "_replace_from_" + hash_hex)); auto lock = allocateBlockNumber(partition_id, zookeeper, block_id_path); if (!lock) diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference index c6208941ac6..2ac02e17e3f 100644 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference @@ -9,6 +9,9 @@ REPLACE simple REPLACE empty 2 4 2 4 +REPLACE empty partition with duplicated parts +6 12 +6 12 REPLACE recursive 4 8 4 8 diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index 334025cba28..1c7b753c4cb 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -57,6 +57,18 @@ $CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" $CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT 'REPLACE empty partition with duplicated parts';" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (3, '0', 1), (3, '0', 3);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (3, '0', 1), (3, '0', 3);" +query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 3 FROM src;" + +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" + +query_with_retry "ALTER TABLE src DROP PARTITION 3;" +query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 3 FROM src;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE recursive';" query_with_retry "ALTER TABLE dst_r1 DROP PARTITION 1;" From 0fd26152e3d7099c0eec70d8e3887bd996fb04a1 Mon Sep 17 00:00:00 2001 From: Artur Malchanau Date: Wed, 23 Aug 2023 12:42:29 +0300 Subject: [PATCH 0254/1687] Do not warn about arch_sys_counter clock --- programs/server/Server.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d87b308c340..0c5387d98e8 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -450,11 +450,11 @@ void checkForUsersNotInMainConfig( /// Unused in other builds #if defined(OS_LINUX) -static String readString(const String & path) +static String readLine(const String & path) { ReadBufferFromFile in(path); String contents; - readStringUntilEOF(contents, in); + readStringUntilNewlineInto(contents, in); return contents; } @@ -479,9 +479,16 @@ static void sanityChecks(Server & server) #if defined(OS_LINUX) try { + const std::unordered_set fastClockSources = { + // ARM clock + "arch_sys_counter", + // KVM guest clock + "kvm-clock", + // X86 clock + "tsc", + }; const char * filename = "/sys/devices/system/clocksource/clocksource0/current_clocksource"; - String clocksource = readString(filename); - if (clocksource.find("tsc") == std::string::npos && clocksource.find("kvm-clock") == std::string::npos) + if (fastClockSources.count(readLine(filename)) == 0) server.context()->addWarningMessage("Linux is not using a fast clock source. Performance can be degraded. Check " + String(filename)); } catch (...) @@ -501,7 +508,7 @@ static void sanityChecks(Server & server) try { const char * filename = "/sys/kernel/mm/transparent_hugepage/enabled"; - if (readString(filename).find("[always]") != std::string::npos) + if (readLine(filename).find("[always]") != std::string::npos) server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\". Check " + String(filename)); } catch (...) From 5b17a6cfb591e85a629d89a161cf107904c5846c Mon Sep 17 00:00:00 2001 From: priera Date: Wed, 23 Aug 2023 13:06:59 +0200 Subject: [PATCH 0255/1687] new test --- ...e_partition_from_table_zookeeper.reference | 3 - ..._replace_partition_from_table_zookeeper.sh | 220 +++++++++--------- ..._with_duplicated_parts_zookeeper.reference | 3 + ...rtition_with_duplicated_parts_zookeeper.sh | 38 +++ 4 files changed, 151 insertions(+), 113 deletions(-) create mode 100644 tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.reference create mode 100755 tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference index 2ac02e17e3f..c6208941ac6 100644 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference @@ -9,9 +9,6 @@ REPLACE simple REPLACE empty 2 4 2 4 -REPLACE empty partition with duplicated parts -6 12 -6 12 REPLACE recursive 4 8 4 8 diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index 1c7b753c4cb..44ab712bcf8 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) @@ -8,162 +7,163 @@ CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh +. $CURDIR/../shell_config.sh -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" +function query_with_retry +{ + retry=0 + until [ $retry -ge 5 ] + do + result=`$CLICKHOUSE_CLIENT $2 --query="$1" 2>&1` + if [ "$?" == 0 ]; then + echo -n $result + return + else + retry=$(($retry + 1)) + sleep 3 + fi + done + echo "Query '$1' failed with '$result'" +} -$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r1 (p UInt64, k String, d UInt64) -ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '1') PARTITION BY p ORDER BY k -SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r2 (p UInt64, k String, d UInt64) -ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '2') PARTITION BY p ORDER BY k -SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.dst_r1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.dst_r2;" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (0, '0', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (2, '0', 1);" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test.dst_r1 (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/dst_1', '1') PARTITION BY p ORDER BY k SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test.dst_r2 (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/dst_1', '2') PARTITION BY p ORDER BY k SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0;" + +$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (0, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '1', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (2, '0', 1);" $CLICKHOUSE_CLIENT --query="SELECT 'Initial';" -$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (0, '1', 2);" -$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2), (1, '2', 2);" -$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (2, '1', 2);" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (0, '1', 2);" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (1, '1', 2), (1, '2', 2);" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (2, '1', 2);" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM src;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.src;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE simple';" -query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM src;" -query_with_retry "ALTER TABLE src DROP PARTITION 1;" +query_with_retry "ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src;" +query_with_retry "ALTER TABLE test.src DROP PARTITION 1;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM src;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.src;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE empty';" -query_with_retry "ALTER TABLE src DROP PARTITION 1;" -query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM src;" +query_with_retry "ALTER TABLE test.src DROP PARTITION 1;" +query_with_retry "ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT 'REPLACE empty partition with duplicated parts';" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (3, '0', 1), (3, '0', 3);" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (3, '0', 1), (3, '0', 3);" -query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 3 FROM src;" - -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" - -query_with_retry "ALTER TABLE src DROP PARTITION 3;" -query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 3 FROM src;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE recursive';" -query_with_retry "ALTER TABLE dst_r1 DROP PARTITION 1;" -$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2), (1, '2', 2);" +query_with_retry "ALTER TABLE test.dst_r1 DROP PARTITION 1;" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (1, '1', 2), (1, '2', 2);" $CLICKHOUSE_CLIENT --query="CREATE table test_block_numbers (m UInt64) ENGINE MergeTree() ORDER BY tuple();" -$CLICKHOUSE_CLIENT --query="INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='$CLICKHOUSE_DATABASE' AND table='dst_r1' AND active AND name LIKE '1_%';" +$CLICKHOUSE_CLIENT --query="INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='test' AND table='dst_r1' AND active AND name LIKE '1_%';" -query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" +query_with_retry "ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" -$CLICKHOUSE_CLIENT --query="INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='$CLICKHOUSE_DATABASE' AND table='dst_r1' AND active AND name LIKE '1_%';" +$CLICKHOUSE_CLIENT --query="INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='test' AND table='dst_r1' AND active AND name LIKE '1_%';" $CLICKHOUSE_CLIENT --query="SELECT (max(m) - min(m) > 1) AS new_block_is_generated FROM test_block_numbers;" $CLICKHOUSE_CLIENT --query="DROP TABLE test_block_numbers;" $CLICKHOUSE_CLIENT --query="SELECT 'ATTACH FROM';" -query_with_retry "ALTER TABLE dst_r1 DROP PARTITION 1;" -$CLICKHOUSE_CLIENT --query="DROP TABLE src;" +query_with_retry "ALTER TABLE test.dst_r1 DROP PARTITION 1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE test.src;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '1', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r2 VALUES (1, '1', 2);" -query_with_retry "ALTER TABLE dst_r2 ATTACH PARTITION 1 FROM src;" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r2 VALUES (1, '1', 2);" +query_with_retry "ALTER TABLE test.dst_r2 ATTACH PARTITION 1 FROM test.src;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE with fetch';" -$CLICKHOUSE_CLIENT --query="DROP TABLE src;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2); -- trash part to be deleted" +$CLICKHOUSE_CLIENT --query="DROP TABLE test.src;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '1', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (1, '1', 2); -- trash part to be deleted" # Stop replication at the second replica and remove source table to use fetch instead of copying -$CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES dst_r2;" -query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM src;" -$CLICKHOUSE_CLIENT --query="DROP TABLE src;" -$CLICKHOUSE_CLIENT --query="SYSTEM START REPLICATION QUEUES dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES test.dst_r2;" +query_with_retry "ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE test.src;" +$CLICKHOUSE_CLIENT --query="SYSTEM START REPLICATION QUEUES test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE with fetch of merged';" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" -query_with_retry "ALTER TABLE dst_r1 DROP PARTITION 1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.src;" +query_with_retry "ALTER TABLE test.dst_r1 DROP PARTITION 1;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2); -- trash part to be deleted" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '1', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (1, '1', 2); -- trash part to be deleted" -$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES dst_r2;" -$CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES dst_r2;" -query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM src;" -$CLICKHOUSE_CLIENT --query="DROP TABLE src;" +$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES test.dst_r2;" +query_with_retry "ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE test.src;" # do not wait other replicas to execute OPTIMIZE -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r1;" -query_with_retry "OPTIMIZE TABLE dst_r1 PARTITION 1;" --replication_alter_partitions_sync=0 --optimize_throw_if_noop=1 +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r1;" +query_with_retry "OPTIMIZE TABLE test.dst_r1 PARTITION 1;" "--replication_alter_partitions_sync=0 --optimize_throw_if_noop=1" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SYSTEM START REPLICATION QUEUES dst_r2;" -$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES dst_r2;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM START REPLICATION QUEUES test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM test.dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'After restart';" -$CLICKHOUSE_CLIENT --query="SYSTEM RESTART REPLICA dst_r1;" -$CLICKHOUSE_CLIENT --query="SYSTEM RESTART REPLICA dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" +$CLICKHOUSE_CLIENT --query="USE test;" +$CLICKHOUSE_CLIENT --query="SYSTEM RESTART REPLICA test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SYSTEM RESTART REPLICAS;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'DETACH+ATTACH PARTITION';" -query_with_retry "ALTER TABLE dst_r1 DETACH PARTITION 0;" -query_with_retry "ALTER TABLE dst_r1 DETACH PARTITION 1;" -query_with_retry "ALTER TABLE dst_r1 DETACH PARTITION 2;" -query_with_retry "ALTER TABLE dst_r1 ATTACH PARTITION 1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" +query_with_retry "ALTER TABLE test.dst_r1 DETACH PARTITION 0;" +query_with_retry "ALTER TABLE test.dst_r1 DETACH PARTITION 1;" +query_with_retry "ALTER TABLE test.dst_r1 DETACH PARTITION 2;" +query_with_retry "ALTER TABLE test.dst_r1 ATTACH PARTITION 1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.dst_r1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.dst_r2;" diff --git a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.reference b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.reference new file mode 100644 index 00000000000..ec6c6395f95 --- /dev/null +++ b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.reference @@ -0,0 +1,3 @@ +REPLACE empty partition with duplicated parts +4 8 +4 8 diff --git a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh new file mode 100755 index 00000000000..b4e5a6b3072 --- /dev/null +++ b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-s3-storage + +# Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem +# (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) +# and when we do DETACH TABLE / ATTACH TABLE or SYSTEM RESTART REPLICA, these files may be discovered +# and discarded after restart with Warning/Error messages in log. This is Ok. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r1 (p UInt64, k String, d UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '1') PARTITION BY p ORDER BY k +SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r2 (p UInt64, k String, d UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '2') PARTITION BY p ORDER BY k +SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" + +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (0, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1), (1, '0', 3);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1), (1, '0', 3);" + +$CLICKHOUSE_CLIENT --query="SELECT 'REPLACE empty partition with duplicated parts';" +query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM src;" + +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" From ed41b0d466ccda380a594b7dd5d8e3c8ebf4dea1 Mon Sep 17 00:00:00 2001 From: priera Date: Wed, 23 Aug 2023 13:15:53 +0200 Subject: [PATCH 0256/1687] removed wrong change --- ..._replace_partition_from_table_zookeeper.sh | 208 +++++++++--------- 1 file changed, 98 insertions(+), 110 deletions(-) diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index 44ab712bcf8..334025cba28 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: zookeeper, no-s3-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) @@ -7,163 +8,150 @@ CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh -function query_with_retry -{ - retry=0 - until [ $retry -ge 5 ] - do - result=`$CLICKHOUSE_CLIENT $2 --query="$1" 2>&1` - if [ "$?" == 0 ]; then - echo -n $result - return - else - retry=$(($retry + 1)) - sleep 3 - fi - done - echo "Query '$1' failed with '$result'" -} +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.src;" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.dst_r1;" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.dst_r2;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r1 (p UInt64, k String, d UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '1') PARTITION BY p ORDER BY k +SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE dst_r2 (p UInt64, k String, d UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst_1', '2') PARTITION BY p ORDER BY k +SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test.dst_r1 (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/dst_1', '1') PARTITION BY p ORDER BY k SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test.dst_r2 (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/dst_1', '2') PARTITION BY p ORDER BY k SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0;" - -$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (0, '0', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '0', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '1', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (2, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (0, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (2, '0', 1);" $CLICKHOUSE_CLIENT --query="SELECT 'Initial';" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (0, '1', 2);" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (1, '1', 2), (1, '2', 2);" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (2, '1', 2);" +$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (0, '1', 2);" +$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2), (1, '2', 2);" +$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (2, '1', 2);" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.src;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM src;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE simple';" -query_with_retry "ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src;" -query_with_retry "ALTER TABLE test.src DROP PARTITION 1;" +query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM src;" +query_with_retry "ALTER TABLE src DROP PARTITION 1;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.src;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM src;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE empty';" -query_with_retry "ALTER TABLE test.src DROP PARTITION 1;" -query_with_retry "ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src;" +query_with_retry "ALTER TABLE src DROP PARTITION 1;" +query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM src;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE recursive';" -query_with_retry "ALTER TABLE test.dst_r1 DROP PARTITION 1;" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (1, '1', 2), (1, '2', 2);" +query_with_retry "ALTER TABLE dst_r1 DROP PARTITION 1;" +$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2), (1, '2', 2);" $CLICKHOUSE_CLIENT --query="CREATE table test_block_numbers (m UInt64) ENGINE MergeTree() ORDER BY tuple();" -$CLICKHOUSE_CLIENT --query="INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='test' AND table='dst_r1' AND active AND name LIKE '1_%';" +$CLICKHOUSE_CLIENT --query="INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='$CLICKHOUSE_DATABASE' AND table='dst_r1' AND active AND name LIKE '1_%';" -query_with_retry "ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" +query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" -$CLICKHOUSE_CLIENT --query="INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='test' AND table='dst_r1' AND active AND name LIKE '1_%';" +$CLICKHOUSE_CLIENT --query="INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='$CLICKHOUSE_DATABASE' AND table='dst_r1' AND active AND name LIKE '1_%';" $CLICKHOUSE_CLIENT --query="SELECT (max(m) - min(m) > 1) AS new_block_is_generated FROM test_block_numbers;" $CLICKHOUSE_CLIENT --query="DROP TABLE test_block_numbers;" $CLICKHOUSE_CLIENT --query="SELECT 'ATTACH FROM';" -query_with_retry "ALTER TABLE test.dst_r1 DROP PARTITION 1;" -$CLICKHOUSE_CLIENT --query="DROP TABLE test.src;" +query_with_retry "ALTER TABLE dst_r1 DROP PARTITION 1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE src;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '0', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '1', 1);" +$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r2 VALUES (1, '1', 2);" -query_with_retry "ALTER TABLE test.dst_r2 ATTACH PARTITION 1 FROM test.src;" +$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r2 VALUES (1, '1', 2);" +query_with_retry "ALTER TABLE dst_r2 ATTACH PARTITION 1 FROM src;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE with fetch';" -$CLICKHOUSE_CLIENT --query="DROP TABLE test.src;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '0', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '1', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (1, '1', 2); -- trash part to be deleted" +$CLICKHOUSE_CLIENT --query="DROP TABLE src;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2); -- trash part to be deleted" # Stop replication at the second replica and remove source table to use fetch instead of copying -$CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES test.dst_r2;" -query_with_retry "ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src;" -$CLICKHOUSE_CLIENT --query="DROP TABLE test.src;" -$CLICKHOUSE_CLIENT --query="SYSTEM START REPLICATION QUEUES test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES dst_r2;" +query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE src;" +$CLICKHOUSE_CLIENT --query="SYSTEM START REPLICATION QUEUES dst_r2;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE with fetch of merged';" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.src;" -query_with_retry "ALTER TABLE test.dst_r1 DROP PARTITION 1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" +query_with_retry "ALTER TABLE dst_r1 DROP PARTITION 1;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '0', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.src VALUES (1, '1', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO test.dst_r1 VALUES (1, '1', 2); -- trash part to be deleted" +$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2); -- trash part to be deleted" -$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES test.dst_r2;" -query_with_retry "ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src;" -$CLICKHOUSE_CLIENT --query="DROP TABLE test.src;" +$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES dst_r2;" +query_with_retry "ALTER TABLE dst_r1 REPLACE PARTITION 1 FROM src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE src;" # do not wait other replicas to execute OPTIMIZE -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r1;" -query_with_retry "OPTIMIZE TABLE test.dst_r1 PARTITION 1;" "--replication_alter_partitions_sync=0 --optimize_throw_if_noop=1" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r1;" +query_with_retry "OPTIMIZE TABLE dst_r1 PARTITION 1;" --replication_alter_partitions_sync=0 --optimize_throw_if_noop=1 -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM test.dst_r1;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM dst_r1;" -$CLICKHOUSE_CLIENT --query="SYSTEM START REPLICATION QUEUES test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM START REPLICATION QUEUES dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d), uniqExact(_part) FROM dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'After restart';" -$CLICKHOUSE_CLIENT --query="USE test;" -$CLICKHOUSE_CLIENT --query="SYSTEM RESTART REPLICA test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SYSTEM RESTART REPLICAS;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" +$CLICKHOUSE_CLIENT --query="SYSTEM RESTART REPLICA dst_r1;" +$CLICKHOUSE_CLIENT --query="SYSTEM RESTART REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'DETACH+ATTACH PARTITION';" -query_with_retry "ALTER TABLE test.dst_r1 DETACH PARTITION 0;" -query_with_retry "ALTER TABLE test.dst_r1 DETACH PARTITION 1;" -query_with_retry "ALTER TABLE test.dst_r1 DETACH PARTITION 2;" -query_with_retry "ALTER TABLE test.dst_r1 ATTACH PARTITION 1;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r1;" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA test.dst_r2;" -$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM test.dst_r2;" +query_with_retry "ALTER TABLE dst_r1 DETACH PARTITION 0;" +query_with_retry "ALTER TABLE dst_r1 DETACH PARTITION 1;" +query_with_retry "ALTER TABLE dst_r1 DETACH PARTITION 2;" +query_with_retry "ALTER TABLE dst_r1 ATTACH PARTITION 1;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.src;" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.dst_r1;" -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.dst_r2;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" From 7154cb0ed16f20a2f5bf7241768e7ff04dd31a8b Mon Sep 17 00:00:00 2001 From: helifu Date: Wed, 31 May 2023 09:09:15 +0800 Subject: [PATCH 0257/1687] Support dynamic adjustment of log level --- programs/keeper/Keeper.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 22f0b2c2ac6..341d6311329 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -485,6 +485,8 @@ try unused_event, [&](ConfigurationPtr config, bool /* initial_loading */) { + updateLevels(*config, logger()); + if (config->has("keeper_server")) global_context->updateKeeperConfiguration(*config); From 8b64f3eb7a895c7d47150cf1b4c5f64ec68afb18 Mon Sep 17 00:00:00 2001 From: helifu Date: Fri, 9 Jun 2023 19:55:22 +0800 Subject: [PATCH 0258/1687] Add a test case --- .../test_keeper_dynamic_log_level/__init__.py | 0 .../configs/keeper_config.xml | 25 ++++++ .../configs/logger.xml | 10 +++ .../test_keeper_dynamic_log_level/test.py | 85 +++++++++++++++++++ 4 files changed, 120 insertions(+) create mode 100644 tests/integration/test_keeper_dynamic_log_level/__init__.py create mode 100644 tests/integration/test_keeper_dynamic_log_level/configs/keeper_config.xml create mode 100644 tests/integration/test_keeper_dynamic_log_level/configs/logger.xml create mode 100644 tests/integration/test_keeper_dynamic_log_level/test.py diff --git a/tests/integration/test_keeper_dynamic_log_level/__init__.py b/tests/integration/test_keeper_dynamic_log_level/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_dynamic_log_level/configs/keeper_config.xml b/tests/integration/test_keeper_dynamic_log_level/configs/keeper_config.xml new file mode 100644 index 00000000000..54202de188b --- /dev/null +++ b/tests/integration/test_keeper_dynamic_log_level/configs/keeper_config.xml @@ -0,0 +1,25 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + * + + + 5000 + 10000 + 5000 + 75 + trace + + + + + 1 + localhost + 9234 + + + + diff --git a/tests/integration/test_keeper_dynamic_log_level/configs/logger.xml b/tests/integration/test_keeper_dynamic_log_level/configs/logger.xml new file mode 100644 index 00000000000..ee9236a7d78 --- /dev/null +++ b/tests/integration/test_keeper_dynamic_log_level/configs/logger.xml @@ -0,0 +1,10 @@ + + + warning + /var/log/clickhouse-keeper.log + error + /var/log/clickhouse-keeper.err.log + 200M + 10 + + \ No newline at end of file diff --git a/tests/integration/test_keeper_dynamic_log_level/test.py b/tests/integration/test_keeper_dynamic_log_level/test.py new file mode 100644 index 00000000000..7ae55fbf411 --- /dev/null +++ b/tests/integration/test_keeper_dynamic_log_level/test.py @@ -0,0 +1,85 @@ +import pytest +import time +import sys + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=[ + "configs/keeper_config.xml", + "configs/logger.xml", + ], + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_adjust_log_level(start_cluster): + assert ( + int( + node.exec_in_container( + [ + "bash", + "-c", + "grep '' /var/log/clickhouse-keeper.log | wc -l", + ], + privileged=True, + user="root", + ) + ) == 0 + ) + + # Adjust log level. + node.exec_in_container( + [ + "bash", + "-c", + """echo " + + + trace + /var/log/clickhouse-keeper.log + error + /var/log/clickhouse-keeper.err.log + 200M + 10 + + + " > /etc/clickhouse-server/config.d/logger.xml + """, + ] + ) + time.sleep(3) + node.exec_in_container( + [ + "bash", + "-c", + "sync", + ], + privileged=True, + user="root", + ) + assert ( + int( + node.exec_in_container( + [ + "bash", + "-c", + "grep '' /var/log/clickhouse-keeper.log | wc -l", + ], + privileged=True, + user="root", + ) + ) >= 1 + ) + From 0cea777aa900f93d5dac89581fa82bf446196199 Mon Sep 17 00:00:00 2001 From: helifu Date: Fri, 9 Jun 2023 20:45:40 +0800 Subject: [PATCH 0259/1687] Fix a style check issue --- tests/integration/test_keeper_dynamic_log_level/test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_keeper_dynamic_log_level/test.py b/tests/integration/test_keeper_dynamic_log_level/test.py index 7ae55fbf411..edf417c0de1 100644 --- a/tests/integration/test_keeper_dynamic_log_level/test.py +++ b/tests/integration/test_keeper_dynamic_log_level/test.py @@ -36,7 +36,8 @@ def test_adjust_log_level(start_cluster): privileged=True, user="root", ) - ) == 0 + ) + == 0 ) # Adjust log level. @@ -80,6 +81,7 @@ def test_adjust_log_level(start_cluster): privileged=True, user="root", ) - ) >= 1 + ) + >= 1 ) From 270adafe53ff5b24c18c361a1aad650a5ffa5ad0 Mon Sep 17 00:00:00 2001 From: helifu Date: Mon, 12 Jun 2023 09:30:00 +0800 Subject: [PATCH 0260/1687] Fix a style check issue --- tests/integration/test_keeper_dynamic_log_level/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_keeper_dynamic_log_level/test.py b/tests/integration/test_keeper_dynamic_log_level/test.py index edf417c0de1..c5cbedce082 100644 --- a/tests/integration/test_keeper_dynamic_log_level/test.py +++ b/tests/integration/test_keeper_dynamic_log_level/test.py @@ -84,4 +84,3 @@ def test_adjust_log_level(start_cluster): ) >= 1 ) - From f5ca37d0b6fccb780fef14f1764adce2bbedd39c Mon Sep 17 00:00:00 2001 From: helifu Date: Tue, 13 Jun 2023 15:05:05 +0800 Subject: [PATCH 0261/1687] Update the test case --- .../test_keeper_dynamic_log_level/configs/logger.xml | 7 ++++--- .../integration/test_keeper_dynamic_log_level/test.py | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_keeper_dynamic_log_level/configs/logger.xml b/tests/integration/test_keeper_dynamic_log_level/configs/logger.xml index ee9236a7d78..d32cba64cdb 100644 --- a/tests/integration/test_keeper_dynamic_log_level/configs/logger.xml +++ b/tests/integration/test_keeper_dynamic_log_level/configs/logger.xml @@ -1,10 +1,11 @@ warning - /var/log/clickhouse-keeper.log + /var/log/clickhouse-server/clickhouse-server.log error - /var/log/clickhouse-keeper.err.log + /var/log/clickhouse-server/clickhouse-server.err.log 200M 10 - \ No newline at end of file + + diff --git a/tests/integration/test_keeper_dynamic_log_level/test.py b/tests/integration/test_keeper_dynamic_log_level/test.py index c5cbedce082..84b000833c0 100644 --- a/tests/integration/test_keeper_dynamic_log_level/test.py +++ b/tests/integration/test_keeper_dynamic_log_level/test.py @@ -31,7 +31,7 @@ def test_adjust_log_level(start_cluster): [ "bash", "-c", - "grep '' /var/log/clickhouse-keeper.log | wc -l", + "grep '' /var/log/clickhouse-server/clickhouse-server.log | wc -l", ], privileged=True, user="root", @@ -49,9 +49,9 @@ def test_adjust_log_level(start_cluster): trace - /var/log/clickhouse-keeper.log + /var/log/clickhouse-server/clickhouse-server.log error - /var/log/clickhouse-keeper.err.log + /var/log/clickhouse-server/clickhouse-server.err.log 200M 10 @@ -60,7 +60,7 @@ def test_adjust_log_level(start_cluster): """, ] ) - time.sleep(3) + time.sleep(2) node.exec_in_container( [ "bash", @@ -76,7 +76,7 @@ def test_adjust_log_level(start_cluster): [ "bash", "-c", - "grep '' /var/log/clickhouse-keeper.log | wc -l", + "grep '' /var/log/clickhouse-server/clickhouse-server.log | wc -l", ], privileged=True, user="root", From 178194ec500f8d76401327df16417f3e43ec1fdb Mon Sep 17 00:00:00 2001 From: helifu Date: Wed, 23 Aug 2023 19:50:27 +0800 Subject: [PATCH 0262/1687] Updated according to review comments --- tests/integration/test_keeper_dynamic_log_level/test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_dynamic_log_level/test.py b/tests/integration/test_keeper_dynamic_log_level/test.py index 84b000833c0..2941a243950 100644 --- a/tests/integration/test_keeper_dynamic_log_level/test.py +++ b/tests/integration/test_keeper_dynamic_log_level/test.py @@ -60,7 +60,8 @@ def test_adjust_log_level(start_cluster): """, ] ) - time.sleep(2) + time.sleep(3) + node.query("SELECT * FROM system.zookeeper SETTINGS allow_unrestricted_reads_from_keeper = 'true'") node.exec_in_container( [ "bash", From 9300d2848d43daee18e7a63a57a4f69bd4427c85 Mon Sep 17 00:00:00 2001 From: helifu Date: Wed, 23 Aug 2023 20:13:54 +0800 Subject: [PATCH 0263/1687] Fix a style check issue --- tests/integration/test_keeper_dynamic_log_level/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_dynamic_log_level/test.py b/tests/integration/test_keeper_dynamic_log_level/test.py index 2941a243950..5fdeedb6086 100644 --- a/tests/integration/test_keeper_dynamic_log_level/test.py +++ b/tests/integration/test_keeper_dynamic_log_level/test.py @@ -61,7 +61,9 @@ def test_adjust_log_level(start_cluster): ] ) time.sleep(3) - node.query("SELECT * FROM system.zookeeper SETTINGS allow_unrestricted_reads_from_keeper = 'true'") + node.query( + "SELECT * FROM system.zookeeper SETTINGS allow_unrestricted_reads_from_keeper = 'true'" + ) node.exec_in_container( [ "bash", From 97cdd0c27649d21fe27b290df42f052700a37ea1 Mon Sep 17 00:00:00 2001 From: Artur Malchanau Date: Wed, 23 Aug 2023 15:35:30 +0300 Subject: [PATCH 0264/1687] Fix clang-tidy warning --- programs/server/Server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 0c5387d98e8..336c15652c2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -488,7 +488,7 @@ static void sanityChecks(Server & server) "tsc", }; const char * filename = "/sys/devices/system/clocksource/clocksource0/current_clocksource"; - if (fastClockSources.count(readLine(filename)) == 0) + if (!fastClockSources.contains(readLine(filename))) server.context()->addWarningMessage("Linux is not using a fast clock source. Performance can be degraded. Check " + String(filename)); } catch (...) From 407b2e3b58163418a35532e0841acdd479ab9dab Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 23 Aug 2023 14:08:34 +0000 Subject: [PATCH 0265/1687] require columns participating in interpolate expression --- src/Interpreters/ExpressionAnalyzer.cpp | 14 +++++- src/Interpreters/TreeRewriter.cpp | 43 +++++++++++++++++++ .../02863_interpolate_subquery.reference | 0 .../02863_interpolate_subquery.sql | 7 +++ 4 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02863_interpolate_subquery.reference create mode 100644 tests/queries/0_stateless/02863_interpolate_subquery.sql diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index acedede579b..4cb1ee2a9a9 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1513,14 +1513,16 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai for (const auto & child : select_query->select()->children) select.insert(child->getAliasOrColumnName()); + NameSet required_by_interpolate; /// collect columns required for interpolate expressions - /// interpolate expression can use any available column - auto find_columns = [&step, &select](IAST * function) + auto find_columns = [&step, &select, &required_by_interpolate](IAST * function) { - auto f_impl = [&step, &select](IAST * fn, auto fi) + auto f_impl = [&step, &select, &required_by_interpolate](IAST * fn, auto fi) { if (auto * ident = fn->as()) { + required_by_interpolate.insert(ident->getColumnName()); /// exclude columns from select expression - they are already available if (!select.contains(ident->getColumnName())) step.addRequiredOutput(ident->getColumnName()); @@ -1536,6 +1538,14 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai for (const auto & interpolate : interpolate_list->children) find_columns(interpolate->as()->expr.get()); + + if (!required_result_columns.empty()) + { + NameSet required_result_columns_set(required_result_columns.begin(), required_result_columns.end()); + for (const auto & name : required_by_interpolate) + if (!required_result_columns_set.contains(name)) + required_result_columns.push_back(name); + } } if (optimize_read_in_order) diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 5b384a5f74b..b71086f2188 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -388,6 +388,44 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const else return; + NameSet required_by_interpolate; + + if (select_query->interpolate()) + { + auto & children = select_query->interpolate()->children; + if (!children.empty()) + { + NameToNameSetMap expressions; + + auto interpolate_visitor = [](const ASTPtr ast, NameSet & columns) -> void + { + auto interpolate_visitor_impl = [](const ASTPtr node, NameSet & cols, auto self) -> void + { + if (const auto * ident = node->as()) + cols.insert(ident->name()); + else if (const auto * func = node->as()) + for (const auto & elem : func->arguments->children) + self(elem, cols, self); + }; + interpolate_visitor_impl(ast, columns, interpolate_visitor_impl); + }; + + for (const auto & elem : children) + { + if (auto * interpolate = elem->as()) + { + NameSet needed_columns; + interpolate_visitor(interpolate->expr, needed_columns); + expressions.emplace(interpolate->column, std::move(needed_columns)); + } + } + + for (const auto & name : required_result_columns) + if (const auto it = expressions.find(name); it != expressions.end()) + required_by_interpolate.insert(it->second.begin(), it->second.end()); + } + } + ASTs new_elements; new_elements.reserve(elements.size()); @@ -403,6 +441,11 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const new_elements.push_back(elem); --it->second; } + else if (required_by_interpolate.contains(name)) + { + /// Columns required by interpolate expression are not always in the required_result_columns + new_elements.push_back(elem); + } else if (select_query->distinct || hasArrayJoin(elem)) { /// ARRAY JOIN cannot be optimized out since it may change number of rows, diff --git a/tests/queries/0_stateless/02863_interpolate_subquery.reference b/tests/queries/0_stateless/02863_interpolate_subquery.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02863_interpolate_subquery.sql b/tests/queries/0_stateless/02863_interpolate_subquery.sql new file mode 100644 index 00000000000..4d8ba5f9cb2 --- /dev/null +++ b/tests/queries/0_stateless/02863_interpolate_subquery.sql @@ -0,0 +1,7 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/53640 +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (i UInt32, a UInt32) ENGINE=Memory; +SELECT i, col1 FROM ( + SELECT i, a AS col1, a AS col2 FROM tab ORDER BY i WITH FILL INTERPOLATE (col1 AS col1+col2, col2) +); +DROP TABLE tab; From 8b2342c2056be286b1d03624edf9e14446d943c7 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Wed, 23 Aug 2023 14:16:36 +0000 Subject: [PATCH 0266/1687] Minor code cleanup: remove some redundant includes of InterpreterAlterQuery.h --- src/Storages/StorageBuffer.cpp | 1 - src/Storages/StorageMergeTree.cpp | 1 - src/Storages/StorageNull.cpp | 1 - src/Storages/StorageReplicatedMergeTree.cpp | 1 - 4 files changed, 4 deletions(-) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index eb154d0d943..e011565edc1 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9d7f6903b46..94abd73be41 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index 0ced128c8ef..5e4fde99306 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 76a2ad9883c..d6ed3525b45 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -87,7 +87,6 @@ #include #include #include -#include #include #include #include From a5b6a91b54c2c8393d324a1b3789e05b3a5b96a6 Mon Sep 17 00:00:00 2001 From: Anton Kozlov Date: Tue, 8 Mar 2022 13:58:39 +0000 Subject: [PATCH 0267/1687] [bloom filter] Write hashes for bloom filter into a hash set for optimal sizing --- .../MergeTreeIndexAggregatorBloomFilter.cpp | 16 ++-- .../MergeTreeIndexAggregatorBloomFilter.h | 3 +- .../MergeTreeIndexGranuleBloomFilter.cpp | 81 +++++-------------- .../MergeTreeIndexGranuleBloomFilter.h | 7 +- .../02231_bloom_filter_sizing.reference | 6 ++ .../0_stateless/02231_bloom_filter_sizing.sql | 53 ++++++++++++ 6 files changed, 96 insertions(+), 70 deletions(-) create mode 100644 tests/queries/0_stateless/02231_bloom_filter_sizing.reference create mode 100644 tests/queries/0_stateless/02231_bloom_filter_sizing.sql diff --git a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp index ef98accfbc6..e6134d8b239 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes MergeTreeIndexAggregatorBloomFilter::MergeTreeIndexAggregatorBloomFilter( size_t bits_per_row_, size_t hash_functions_, const Names & columns_name_) - : bits_per_row(bits_per_row_), hash_functions(hash_functions_), index_columns_name(columns_name_) + : bits_per_row(bits_per_row_), hash_functions(hash_functions_), index_columns_name(columns_name_), column_hashes(columns_name_.size()) { assert(bits_per_row != 0); assert(hash_functions != 0); @@ -32,9 +32,9 @@ bool MergeTreeIndexAggregatorBloomFilter::empty() const MergeTreeIndexGranulePtr MergeTreeIndexAggregatorBloomFilter::getGranuleAndReset() { - const auto granule = std::make_shared(bits_per_row, hash_functions, total_rows, granule_index_blocks); + const auto granule = std::make_shared(bits_per_row, hash_functions, column_hashes); total_rows = 0; - granule_index_blocks.clear(); + column_hashes.clear(); return granule; } @@ -47,17 +47,19 @@ void MergeTreeIndexAggregatorBloomFilter::update(const Block & block, size_t * p Block granule_index_block; size_t max_read_rows = std::min(block.rows() - *pos, limit); - for (const auto & index_column_name : index_columns_name) + for (size_t column = 0; column < index_columns_name.size(); ++column) { - const auto & column_and_type = block.getByName(index_column_name); + const auto & column_and_type = block.getByName(index_columns_name[column]); auto index_column = BloomFilterHash::hashWithColumn(column_and_type.type, column_and_type.column, *pos, max_read_rows); - granule_index_block.insert({index_column, std::make_shared(), column_and_type.name}); + const auto &index_col = checkAndGetColumn(index_column.get()); + const auto & index_data = index_col->getData(); + for (const auto &hash: index_data) + column_hashes[column].insert(hash); } *pos += max_read_rows; total_rows += max_read_rows; - granule_index_blocks.push_back(granule_index_block); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h index 9877db8ee30..d20653b7689 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -22,8 +23,8 @@ private: size_t hash_functions; const Names index_columns_name; + std::vector> column_hashes; size_t total_rows = 0; - Blocks granule_index_blocks; }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp index 267708b5312..7db3aa3a6b1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp @@ -8,7 +8,6 @@ #include #include - namespace DB { namespace ErrorCodes @@ -16,21 +15,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -static void assertGranuleBlocksStructure(const Blocks & granule_index_blocks) -{ - Block prev_block; - for (size_t index = 0; index < granule_index_blocks.size(); ++index) - { - const Block & granule_index_block = granule_index_blocks[index]; - - if (index != 0) - assertBlocksHaveEqualStructure(prev_block, granule_index_block, "Granule blocks of bloom filter has difference structure."); - - prev_block = granule_index_block; - } -} - - MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(size_t bits_per_row_, size_t hash_functions_, size_t index_columns_) : bits_per_row(bits_per_row_), hash_functions(hash_functions_) { @@ -39,42 +23,28 @@ MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(size_t bits_p } MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter( - size_t bits_per_row_, size_t hash_functions_, size_t total_rows_, const Blocks & granule_index_blocks_) - : total_rows(total_rows_), bits_per_row(bits_per_row_), hash_functions(hash_functions_) + size_t bits_per_row_, size_t hash_functions_, const std::vector>& column_hashes_) + : bits_per_row(bits_per_row_), hash_functions(hash_functions_), bloom_filters(column_hashes_.size()) { - if (granule_index_blocks_.empty() || !total_rows) - throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: granule_index_blocks empty or total_rows is zero."); + if (column_hashes_.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule_index_blocks empty or total_rows is zero."); - assertGranuleBlocksStructure(granule_index_blocks_); + size_t bloom_filter_max_size = 0; + for (const auto & column_hash : column_hashes_) + bloom_filter_max_size = std::max(bloom_filter_max_size, column_hash.size()); - for (size_t index = 0; index < granule_index_blocks_.size(); ++index) + static size_t atom_size = 8; + + // If multiple columns are given, we will initialize all the bloom filters + // with the size of the highest-cardinality one. This is done for compatibility with + // existing binary serialization format + total_rows = bloom_filter_max_size; + size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; + + for (size_t column = 0, columns = column_hashes_.size(); column < columns; ++column) { - Block granule_index_block = granule_index_blocks_[index]; - - if (unlikely(!granule_index_block || !granule_index_block.rows())) - throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: granule_index_block is empty."); - - if (index == 0) - { - static size_t atom_size = 8; - - for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column) - { - size_t total_items = total_rows; - - if (const auto * array_col = typeid_cast(granule_index_block.getByPosition(column).column.get())) - { - const IColumn * nested_col = array_col->getDataPtr().get(); - total_items = nested_col->size(); - } - - size_t bytes_size = (bits_per_row * total_items + atom_size - 1) / atom_size; - bloom_filters.emplace_back(std::make_shared(bytes_size, hash_functions, 0)); - } - } - - for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column) - fillingBloomFilter(bloom_filters[column], granule_index_block, column); + bloom_filters[column] = std::make_shared(bytes_size, hash_functions, 0); + fillingBloomFilter(bloom_filters[column], column_hashes_[column]); } } @@ -123,18 +93,11 @@ void MergeTreeIndexGranuleBloomFilter::serializeBinary(WriteBuffer & ostr) const } } -void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column) const +void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter(BloomFilterPtr & bf, const HashSet &hashes) const { - const auto & column = granule_index_block.getByPosition(index_hash_column); - - if (const auto * hash_column = typeid_cast(column.column.get())) - { - const auto & hash_column_vec = hash_column->getData(); - - for (const auto & bf_base_hash : hash_column_vec) - for (size_t i = 0; i < hash_functions; ++i) - bf->addHashWithSeed(bf_base_hash, BloomFilterHash::bf_hash_seed[i]); - } + for (const auto & bf_base_hash : hashes) + for (size_t i = 0; i < hash_functions; ++i) + bf->addHashWithSeed(bf_base_hash.getKey(), BloomFilterHash::bf_hash_seed[i]); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h index 82bd91138a7..1d6080ebe24 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -11,7 +12,7 @@ class MergeTreeIndexGranuleBloomFilter final : public IMergeTreeIndexGranule public: MergeTreeIndexGranuleBloomFilter(size_t bits_per_row_, size_t hash_functions_, size_t index_columns_); - MergeTreeIndexGranuleBloomFilter(size_t bits_per_row_, size_t hash_functions_, size_t total_rows_, const Blocks & granule_index_blocks_); + MergeTreeIndexGranuleBloomFilter(size_t bits_per_row_, size_t hash_functions_, const std::vector> &column_hashes); bool empty() const override; @@ -21,12 +22,12 @@ public: const std::vector & getFilters() const { return bloom_filters; } private: - size_t total_rows; + size_t total_rows = 0; size_t bits_per_row; size_t hash_functions; std::vector bloom_filters; - void fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column) const; + void fillingBloomFilter(BloomFilterPtr & bf, const HashSet &hashes) const; }; diff --git a/tests/queries/0_stateless/02231_bloom_filter_sizing.reference b/tests/queries/0_stateless/02231_bloom_filter_sizing.reference new file mode 100644 index 00000000000..bdba311c092 --- /dev/null +++ b/tests/queries/0_stateless/02231_bloom_filter_sizing.reference @@ -0,0 +1,6 @@ +Bloom filter on sort key +10000 +0 +Bloom filter on non-sort key +10000 +0 diff --git a/tests/queries/0_stateless/02231_bloom_filter_sizing.sql b/tests/queries/0_stateless/02231_bloom_filter_sizing.sql new file mode 100644 index 00000000000..233e3111067 --- /dev/null +++ b/tests/queries/0_stateless/02231_bloom_filter_sizing.sql @@ -0,0 +1,53 @@ +SELECT 'Bloom filter on sort key'; +DROP TABLE IF EXISTS bloom_filter_sizing_pk; +CREATE TABLE bloom_filter_sizing_pk( + key UInt64, + value UInt64, + + -- Very high granularity to have one filter per part. + INDEX key_bf key TYPE bloom_filter(0.01) GRANULARITY 2147483648 +) ENGINE=MergeTree ORDER BY key; + +INSERT INTO bloom_filter_sizing_pk +SELECT +number % 100 as key, -- 100 unique keys +number as value -- whatever +FROM numbers(1000 * 1000); + +-- +-- Merge everything into a single part +-- +OPTIMIZE TABLE bloom_filter_sizing_pk FINAL; + +SELECT COUNT() from bloom_filter_sizing_pk WHERE key = 1; + +-- Check bloom filter size. According to https://hur.st/bloomfilter/?n=100&p=0.01 for 100 keys it should be less that 200B +SELECT COUNT() from system.parts where database = currentDatabase() AND table = 'bloom_filter_sizing_pk' and secondary_indices_uncompressed_bytes > 200 and active; + +SELECT 'Bloom filter on non-sort key'; +DROP TABLE IF EXISTS bloom_filter_sizing_sec; +CREATE TABLE bloom_filter_sizing_sec( + key1 UInt64, + key2 UInt64, + value UInt64, + + -- Very high granularity to have one filter per part. + INDEX key_bf key2 TYPE bloom_filter(0.01) GRANULARITY 2147483648 +) ENGINE=MergeTree ORDER BY key1; + +INSERT INTO bloom_filter_sizing_sec +SELECT +number % 100 as key1, -- 100 unique keys +rand() % 100 as key2, -- 100 unique keys +number as value -- whatever +FROM numbers(1000 * 1000); + +-- +-- Merge everything into a single part +-- +OPTIMIZE TABLE bloom_filter_sizing_sec FINAL; + +SELECT COUNT() from bloom_filter_sizing_sec WHERE key1 = 1; + +-- Check bloom filter size. According to https://hur.st/bloomfilter/?n=100&p=0.01 for 100 keys it should be less that 200B +SELECT COUNT() from system.parts where database = currentDatabase() AND table = 'bloom_filter_sizing_sec' and secondary_indices_uncompressed_bytes > 200 and active; From 6565da25ff2a873a933df8b4605c931dd0987f54 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Wed, 23 Aug 2023 17:24:02 +0200 Subject: [PATCH 0268/1687] Fix test --- tests/queries/0_stateless/02787_transform_null.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02787_transform_null.reference b/tests/queries/0_stateless/02787_transform_null.reference index a650dbbd173..a84dd83d712 100644 --- a/tests/queries/0_stateless/02787_transform_null.reference +++ b/tests/queries/0_stateless/02787_transform_null.reference @@ -5,5 +5,5 @@ ONE a a \N 0 \N 0 \N -1 1 1 \N 1 1 +1 1 1 1 1 1 a \N 3 3 3 3 From 8f6d636ff447a8c72502afe54734ee46f26d7da3 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Wed, 23 Aug 2023 17:26:23 +0200 Subject: [PATCH 0269/1687] Fix error reporting --- src/Functions/transform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index fa41841b7d1..b7582b37017 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -168,7 +168,7 @@ namespace if (in->size() > default_non_const->size()) { throw Exception( - ErrorCodes::BAD_ARGUMENTS, + ErrorCodes::LOGICAL_ERROR, "Fourth argument of function {} must be a constant or a column at least as big as the second and third arguments", getName()); } From 3e81018d49a17b83ba72187b7881a8802fc47cfc Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 23 Aug 2023 19:38:14 +0000 Subject: [PATCH 0270/1687] Remove branch references from .gitmodules Branch references becomes outdated too easily and they mostly spread confusion (see the discussion in https://github.com/ClickHouse/arrow/pull/40). --- .gitmodules | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.gitmodules b/.gitmodules index c3592372b7e..a2eb0cb5c1a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,7 +13,6 @@ [submodule "contrib/zlib-ng"] path = contrib/zlib-ng url = https://github.com/ClickHouse/zlib-ng - branch = clickhouse-2.0.x [submodule "contrib/googletest"] path = contrib/googletest url = https://github.com/google/googletest @@ -47,7 +46,6 @@ [submodule "contrib/arrow"] path = contrib/arrow url = https://github.com/ClickHouse/arrow - branch = blessed/release-6.0.1 [submodule "contrib/thrift"] path = contrib/thrift url = https://github.com/apache/thrift @@ -93,7 +91,6 @@ [submodule "contrib/grpc"] path = contrib/grpc url = https://github.com/ClickHouse/grpc - branch = v1.33.2 [submodule "contrib/aws"] path = contrib/aws url = https://github.com/ClickHouse/aws-sdk-cpp @@ -140,11 +137,9 @@ [submodule "contrib/cassandra"] path = contrib/cassandra url = https://github.com/ClickHouse/cpp-driver - branch = clickhouse [submodule "contrib/libuv"] path = contrib/libuv url = https://github.com/ClickHouse/libuv - branch = clickhouse [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt @@ -157,11 +152,9 @@ [submodule "contrib/cyrus-sasl"] path = contrib/cyrus-sasl url = https://github.com/ClickHouse/cyrus-sasl - branch = cyrus-sasl-2.1 [submodule "contrib/croaring"] path = contrib/croaring url = https://github.com/RoaringBitmap/CRoaring - branch = v0.2.66 [submodule "contrib/miniselect"] path = contrib/miniselect url = https://github.com/danlark1/miniselect @@ -174,7 +167,6 @@ [submodule "contrib/abseil-cpp"] path = contrib/abseil-cpp url = https://github.com/abseil/abseil-cpp - branch = lts_2021_11_02 [submodule "contrib/dragonbox"] path = contrib/dragonbox url = https://github.com/ClickHouse/dragonbox @@ -187,7 +179,6 @@ [submodule "contrib/boringssl"] path = contrib/boringssl url = https://github.com/ClickHouse/boringssl - branch = unknown_branch_from_artur [submodule "contrib/NuRaft"] path = contrib/NuRaft url = https://github.com/ClickHouse/NuRaft @@ -248,7 +239,6 @@ [submodule "contrib/annoy"] path = contrib/annoy url = https://github.com/ClickHouse/annoy - branch = ClickHouse-master [submodule "contrib/qpl"] path = contrib/qpl url = https://github.com/intel/qpl @@ -282,7 +272,6 @@ [submodule "contrib/openssl"] path = contrib/openssl url = https://github.com/openssl/openssl - branch = openssl-3.0 [submodule "contrib/google-benchmark"] path = contrib/google-benchmark url = https://github.com/google/benchmark From 59ca2374b583efdeaf81e30417b756213850fae8 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 23 Aug 2023 19:39:45 +0000 Subject: [PATCH 0271/1687] Better --- src/Common/BoolArgsToTemplateArgsDispatcher.h | 39 -------------- src/Interpreters/Aggregator.cpp | 54 +++++++++++-------- src/Interpreters/Aggregator.h | 3 +- .../02845_group_by_constant_keys.reference | 36 +++++++++++++ .../02845_group_by_constant_keys.sql | 32 +++++++++-- 5 files changed, 98 insertions(+), 66 deletions(-) delete mode 100644 src/Common/BoolArgsToTemplateArgsDispatcher.h diff --git a/src/Common/BoolArgsToTemplateArgsDispatcher.h b/src/Common/BoolArgsToTemplateArgsDispatcher.h deleted file mode 100644 index 3e72db49cd2..00000000000 --- a/src/Common/BoolArgsToTemplateArgsDispatcher.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -/// Special struct that helps to convert bool variables to function template bool arguments. -/// It can be used to avoid multiple nested if/else on bool arguments. How to use it: -/// Imagine you have template function -/// template return_type foo(...); -/// and bool variables b1, b2, ..., bn. To pass these variables as template for foo you can do the following: -/// -/// auto call_foo = []() -/// { -/// return foo(...); -/// } -/// -/// BoolArgsToTemplateArgsDispatcher::call(call_foo, b1, b2, ..., bn); - -template -struct BoolArgsToTemplateArgsDispatcher -{ - template - static auto call(Functor f, Args1&&... args) - { - return f.template operator()(std::forward(args)...); - } - - template - static auto call(Functor f, bool b, Args1&&... ar1) - { - if (b) - return BoolArgsToTemplateArgsDispatcher::call(f, std::forward(ar1)...); - else - return BoolArgsToTemplateArgsDispatcher::call(f, std::forward(ar1)...); - } -}; - -} diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index d9e9c66e4e2..90f6fc7b978 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -39,7 +39,6 @@ #include #include #include -#include #include @@ -1066,30 +1065,40 @@ void NO_INLINE Aggregator::executeImpl( { typename Method::State state(key_columns, key_sizes, aggregation_state_cache); - auto call_execute_impl_batch = [&]() - { - executeImplBatch(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, overflow_row); - }; - - bool use_compiled_functions = false; -#if USE_EMBEDDED_COMPILER - use_compiled_functions = compiled_aggregate_functions_holder && !hasSparseArguments(aggregate_instructions); -#endif - if (!no_more_keys) { /// Prefetching doesn't make sense for small hash tables, because they fit in caches entirely. const bool prefetch = Method::State::has_cheap_key_calculation && params.enable_prefetch && (method.data.getBufferSizeInBytes() > min_bytes_for_prefetch); - BoolArgsToTemplateArgsDispatcher::call(call_execute_impl_batch, no_more_keys, use_compiled_functions, prefetch, all_keys_are_const); + +#if USE_EMBEDDED_COMPILER + if (compiled_aggregate_functions_holder && !hasSparseArguments(aggregate_instructions)) + { + if (prefetch) + executeImplBatch( + method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); + else + executeImplBatch( + method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); + } + else +#endif + { + if (prefetch) + executeImplBatch( + method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); + else + executeImplBatch( + method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); + } } else { - BoolArgsToTemplateArgsDispatcher::call(call_execute_impl_batch, no_more_keys, false, false, all_keys_are_const); + executeImplBatch(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); } } -template +template void NO_INLINE Aggregator::executeImplBatch( Method & method, typename Method::State & state, @@ -1097,6 +1106,7 @@ void NO_INLINE Aggregator::executeImplBatch( size_t row_begin, size_t row_end, AggregateFunctionInstruction * aggregate_instructions, + bool all_keys_are_const, AggregateDataPtr overflow_row) const { using KeyHolder = decltype(state.getKeyHolder(0, std::declval())); @@ -1113,7 +1123,7 @@ void NO_INLINE Aggregator::executeImplBatch( /// For all rows. AggregateDataPtr place = aggregates_pool->alloc(0); - if constexpr (all_keys_are_const) + if (all_keys_are_const) { state.emplaceKey(method.data, 0, *aggregates_pool).setMapped(place); } @@ -1140,7 +1150,7 @@ void NO_INLINE Aggregator::executeImplBatch( } /// Optimization for special case when aggregating by 8bit key. - if constexpr (!no_more_keys && !all_keys_are_const && std::is_same_v) + if constexpr (!no_more_keys && std::is_same_v) { /// We use another method if there are aggregate functions with -Array combinator. bool has_arrays = false; @@ -1153,7 +1163,7 @@ void NO_INLINE Aggregator::executeImplBatch( } } - if (!has_arrays && !hasSparseArguments(aggregate_instructions)) + if (!has_arrays && !hasSparseArguments(aggregate_instructions) && !all_keys_are_const) { for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) { @@ -1184,7 +1194,7 @@ void NO_INLINE Aggregator::executeImplBatch( /// For all rows. size_t start, end; /// If all keys are const, key columns contain only 1 row. - if constexpr (all_keys_are_const) + if (all_keys_are_const) { start = 0; end = 1; @@ -1201,7 +1211,7 @@ void NO_INLINE Aggregator::executeImplBatch( if constexpr (!no_more_keys) { - if constexpr (prefetch && !all_keys_are_const && HasPrefetchMemberFunc) + if constexpr (prefetch && HasPrefetchMemberFunc) { if (i == row_begin + prefetching.iterationsToMeasure()) prefetch_look_ahead = prefetching.calcPrefetchLookAhead(); @@ -1273,7 +1283,7 @@ void NO_INLINE Aggregator::executeImplBatch( { /// If all keys are constant and this is new key /// we don't need to do anything and just skip the whole block. - if constexpr (all_keys_are_const) + if (all_keys_are_const) return; aggregate_data = overflow_row; } @@ -1299,7 +1309,7 @@ void NO_INLINE Aggregator::executeImplBatch( columns_data.emplace_back(getColumnData(inst->batch_arguments[argument_index])); } - if constexpr (all_keys_are_const) + if (all_keys_are_const) { auto add_into_aggregate_states_function_single_place = compiled_aggregate_functions_holder->compiled_aggregate_functions.add_into_aggregate_states_function_single_place; add_into_aggregate_states_function_single_place(row_begin, row_end, columns_data.data(), places[0]); @@ -1323,7 +1333,7 @@ void NO_INLINE Aggregator::executeImplBatch( AggregateFunctionInstruction * inst = aggregate_instructions + i; - if constexpr (all_keys_are_const) + if (all_keys_are_const) { if (inst->offsets) inst->batch_that->addBatchSinglePlace(inst->offsets[static_cast(row_begin) - 1], inst->offsets[row_end - 1], places[0] + inst->state_offset, inst->batch_arguments, aggregates_pool); diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index dc0391c4289..d5b5abddd84 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1297,7 +1297,7 @@ private: AggregateDataPtr overflow_row) const; /// Specialization for a particular value no_more_keys. - template + template void executeImplBatch( Method & method, typename Method::State & state, @@ -1305,6 +1305,7 @@ private: size_t row_begin, size_t row_end, AggregateFunctionInstruction * aggregate_instructions, + bool all_keys_are_const, AggregateDataPtr overflow_row) const; /// For case when there are no keys (all aggregate into one row). diff --git a/tests/queries/0_stateless/02845_group_by_constant_keys.reference b/tests/queries/0_stateless/02845_group_by_constant_keys.reference index 60e40ea54a7..67cbdf0c025 100644 --- a/tests/queries/0_stateless/02845_group_by_constant_keys.reference +++ b/tests/queries/0_stateless/02845_group_by_constant_keys.reference @@ -2,3 +2,39 @@ 10000000 1 2 3 10000000 1 2 3 10000000 1 2 3 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 data.Parquet +10 data.2.Parquet +10 data.1.Parquet +10 data.Parquet +10 data.2.Parquet +10 data.1.Parquet +10 data.Parquet +10 data.2.Parquet +10 data.1.Parquet +10 data.Parquet +10 data.2.Parquet +10 data.1.Parquet +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 diff --git a/tests/queries/0_stateless/02845_group_by_constant_keys.sql b/tests/queries/0_stateless/02845_group_by_constant_keys.sql index 0223cf1df60..109a5a9a730 100644 --- a/tests/queries/0_stateless/02845_group_by_constant_keys.sql +++ b/tests/queries/0_stateless/02845_group_by_constant_keys.sql @@ -1,5 +1,29 @@ -SELECT count(number), 1 AS k1, 2 as k2, 3 as k3 FROM numbers_mt(10000000) GROUP BY k1, k2, k3 settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=0, compile_aggregate_expressions=0; -SELECT count(number), 1 AS k1, 2 as k2, 3 as k3 FROM numbers_mt(10000000) GROUP BY k1, k2, k3 settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=1, compile_aggregate_expressions = 0; -SELECT count(number), 1 AS k1, 2 as k2, 3 as k3 FROM numbers_mt(10000000) GROUP BY k1, k2, k3 settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=0, compile_aggregate_expressions = 1; -SELECT count(number), 1 AS k1, 2 as k2, 3 as k3 FROM numbers_mt(10000000) GROUP BY k1, k2, k3 settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=1, compile_aggregate_expressions = 1; +select count(number), 1 AS k1, 2 as k2, 3 as k3 from numbers_mt(10000000) group by k1, k2, k3 settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=0, compile_aggregate_expressions=0; +select count(number), 1 AS k1, 2 as k2, 3 as k3 from numbers_mt(10000000) group by k1, k2, k3 settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=1, compile_aggregate_expressions = 0; +select count(number), 1 AS k1, 2 as k2, 3 as k3 from numbers_mt(10000000) group by k1, k2, k3 settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=0, compile_aggregate_expressions = 1; +select count(number), 1 AS k1, 2 as k2, 3 as k3 from numbers_mt(10000000) group by k1, k2, k3 settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=1, compile_aggregate_expressions = 1; + +drop table if exists test; +create table test (x UInt64) engine=File(Parquet); +set engine_file_allow_create_multiple_files = 1; +insert into test select * from numbers(10); +insert into test select * from numbers(10); +insert into test select * from numbers(10); + +select count() from test group by _file settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=0, compile_aggregate_expressions=0; +select count() from test group by _file settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=1, compile_aggregate_expressions=0; +select count() from test group by _file settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=0, compile_aggregate_expressions=1; +select count() from test group by _file settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=1, compile_aggregate_expressions=1; + +select count(), _file from test group by _file settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=0, compile_aggregate_expressions=0; +select count(), _file from test group by _file settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=1, compile_aggregate_expressions=0; +select count(), _file from test group by _file settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=0, compile_aggregate_expressions=1; +select count(), _file from test group by _file settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=1, compile_aggregate_expressions=1; + +select count() from test group by _file, _path settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=0, compile_aggregate_expressions=0; +select count() from test group by _file, _path settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=1, compile_aggregate_expressions=0; +select count() from test group by _file, _path settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=0, compile_aggregate_expressions=1; +select count() from test group by _file, _path settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=1, compile_aggregate_expressions=1; + +drop table test; From 27fb1b5ced804a37fee6c9fd110e029bb6cc01c5 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 23 Aug 2023 22:18:30 +0200 Subject: [PATCH 0272/1687] Fix test --- tests/queries/0_stateless/02845_group_by_constant_keys.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02845_group_by_constant_keys.sql b/tests/queries/0_stateless/02845_group_by_constant_keys.sql index 109a5a9a730..e16fd3fd1d2 100644 --- a/tests/queries/0_stateless/02845_group_by_constant_keys.sql +++ b/tests/queries/0_stateless/02845_group_by_constant_keys.sql @@ -4,7 +4,7 @@ select count(number), 1 AS k1, 2 as k2, 3 as k3 from numbers_mt(10000000) group select count(number), 1 AS k1, 2 as k2, 3 as k3 from numbers_mt(10000000) group by k1, k2, k3 settings optimize_group_by_constant_keys=1, enable_software_prefetch_in_aggregation=1, compile_aggregate_expressions = 1; drop table if exists test; -create table test (x UInt64) engine=File(Parquet); +create table test (x UInt64) engine=File(JSON); set engine_file_allow_create_multiple_files = 1; insert into test select * from numbers(10); insert into test select * from numbers(10); From 5154c1b9d0eb6fd484d2ca1f7920747bfc3de84e Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 23 Aug 2023 22:55:46 +0200 Subject: [PATCH 0273/1687] Use same UUIDs for tables on replicas created by RESTORE ON CLUSTER. --- src/Backups/BackupUtils.cpp | 25 ++++++++ src/Backups/BackupUtils.h | 6 +- src/Backups/DDLAdjustingForBackupVisitor.cpp | 3 - src/Backups/IRestoreCoordination.h | 6 +- src/Backups/RestoreCoordinationLocal.cpp | 35 ++++++++++ src/Backups/RestoreCoordinationLocal.h | 7 ++ src/Backups/RestoreCoordinationRemote.cpp | 40 ++++++++++++ src/Backups/RestoreCoordinationRemote.h | 4 ++ src/Backups/RestorerFromBackup.cpp | 67 ++++++++++---------- src/Interpreters/InterpreterCreateQuery.cpp | 29 +++------ src/Parsers/ASTCreateQuery.cpp | 44 +++++++++++++ src/Parsers/ASTCreateQuery.h | 12 ++++ 12 files changed, 221 insertions(+), 57 deletions(-) diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index 89b75a103c2..6efca053f05 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -1,6 +1,9 @@ #include +#include #include #include +#include +#include #include #include @@ -95,4 +98,26 @@ AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & return required_access; } +bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context) +{ + auto adjust_before_comparison = [&](const IAST & query) -> ASTPtr + { + auto new_query = query.clone(); + adjustCreateQueryForBackup(new_query, global_context, nullptr); + ASTCreateQuery & create = typeid_cast(*new_query); + create.setUUID({}); + create.if_not_exists = false; + return new_query; + }; + + ASTPtr query1 = adjust_before_comparison(restored_table_create_query); + ASTPtr query2 = adjust_before_comparison(create_query_from_backup); + return serializeAST(*query1) == serializeAST(*query2); +} + +bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context) +{ + return compareRestoredTableDef(restored_database_create_query, create_query_from_backup, global_context); +} + } diff --git a/src/Backups/BackupUtils.h b/src/Backups/BackupUtils.h index 3dc0a58d304..7976de818e2 100644 --- a/src/Backups/BackupUtils.h +++ b/src/Backups/BackupUtils.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -12,8 +13,11 @@ class DDLRenamingMap; /// Initializes a DDLRenamingMap from a BACKUP or RESTORE query. DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements); - /// Returns access required to execute BACKUP query. AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements); +/// Checks the definition of a restored table - it must correspond to the definition from the backup. +bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context); +bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context); + } diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp index 8223e08f127..0bff3cc9f4e 100644 --- a/src/Backups/DDLAdjustingForBackupVisitor.cpp +++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp @@ -81,9 +81,6 @@ namespace void visitCreateQuery(ASTCreateQuery & create, const DDLAdjustingForBackupVisitor::Data & data) { - create.uuid = UUIDHelpers::Nil; - create.to_inner_uuid = UUIDHelpers::Nil; - if (create.storage) visitStorage(*create.storage, data); } diff --git a/src/Backups/IRestoreCoordination.h b/src/Backups/IRestoreCoordination.h index 2f9e8d171f6..fd6f014c326 100644 --- a/src/Backups/IRestoreCoordination.h +++ b/src/Backups/IRestoreCoordination.h @@ -7,6 +7,7 @@ namespace DB { class Exception; enum class UserDefinedSQLObjectType; +class ASTCreateQuery; /// Replicas use this class to coordinate what they're reading from a backup while executing RESTORE ON CLUSTER. /// There are two implementation of this interface: RestoreCoordinationLocal and RestoreCoordinationRemote. @@ -40,10 +41,13 @@ public: /// The function returns false if user-defined function at a specified zk path are being already restored by another replica. virtual bool acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type) = 0; + /// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica, + /// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly). + virtual void generateUUIDForTable(ASTCreateQuery & create_query) = 0; + /// This function is used to check if concurrent restores are running /// other than the restore passed to the function virtual bool hasConcurrentRestores(const std::atomic & num_active_restores) const = 0; - }; } diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp index 068c4fe7e52..6e9a05e06c2 100644 --- a/src/Backups/RestoreCoordinationLocal.cpp +++ b/src/Backups/RestoreCoordinationLocal.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -51,6 +52,40 @@ bool RestoreCoordinationLocal::acquireReplicatedSQLObjects(const String &, UserD return true; } +void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_query) +{ + String query_str = serializeAST(create_query); + + auto find_in_map = [&] + { + auto it = create_query_uuids.find(query_str); + if (it != create_query_uuids.end()) + { + create_query.setUUID(it->second); + return true; + } + return false; + }; + + { + std::lock_guard lock{mutex}; + if (find_in_map()) + return; + } + + create_query.setUUID({}); + auto new_uuids = create_query.generateRandomUUID(); + String new_query_str = serializeAST(create_query); + + { + std::lock_guard lock{mutex}; + if (find_in_map()) + return; + create_query_uuids[query_str] = new_uuids; + create_query_uuids[new_query_str] = new_uuids; + } +} + bool RestoreCoordinationLocal::hasConcurrentRestores(const std::atomic & num_active_restores) const { if (num_active_restores > 1) diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h index e27f0d1ef88..339b754fca5 100644 --- a/src/Backups/RestoreCoordinationLocal.h +++ b/src/Backups/RestoreCoordinationLocal.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -39,6 +40,10 @@ public: /// The function returns false if user-defined function at a specified zk path are being already restored by another replica. bool acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type) override; + /// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica, + /// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly). + void generateUUIDForTable(ASTCreateQuery & create_query) override; + bool hasConcurrentRestores(const std::atomic & num_active_restores) const override; private: @@ -46,6 +51,8 @@ private: std::set> acquired_tables_in_replicated_databases; std::unordered_set acquired_data_in_replicated_tables; + std::unordered_map create_query_uuids; + mutable std::mutex mutex; }; diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp index 37abebb26b7..077921f4d32 100644 --- a/src/Backups/RestoreCoordinationRemote.cpp +++ b/src/Backups/RestoreCoordinationRemote.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include @@ -87,6 +89,7 @@ void RestoreCoordinationRemote::createRootNodes() ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_tables_data_acquired", "", zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_access_storages_acquired", "", zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_sql_objects_acquired", "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/table_uuids", "", zkutil::CreateMode::Persistent)); zk->tryMulti(ops, responses); }); } @@ -231,6 +234,43 @@ bool RestoreCoordinationRemote::acquireReplicatedSQLObjects(const String & loade return result; } +void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query) +{ + String query_str = serializeAST(create_query); + + create_query.setUUID({}); + String new_uuids_str = create_query.generateRandomUUID().toString(); + String new_query_str = serializeAST(create_query); + + auto holder = with_retries.createRetriesControlHolder("generateUUIDForTable"); + holder.retries_ctl.retryLoop( + [&, &zk = holder.faulty_zookeeper]() + { + with_retries.renewZooKeeper(zk); + + String path = zookeeper_path + "/table_uuids/" + escapeForFileName(query_str); + String path2 = zookeeper_path + "/table_uuids/" + escapeForFileName(new_query_str); + + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCreateRequest(path, new_uuids_str, zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(path2, new_uuids_str, zkutil::CreateMode::Persistent)); + + Coordination::Responses responses; + Coordination::Error res = zk->tryMulti(ops, responses); + + if (res == Coordination::Error::ZOK) + return; + + if ((res == Coordination::Error::ZNODEEXISTS) && (responses[0]->error == Coordination::Error::ZNODEEXISTS)) + { + create_query.setUUID(ASTCreateQuery::UUIDs::fromString(zk->get(path))); + return; + } + + zkutil::KeeperMultiException::check(res, ops, responses); + }); +} + void RestoreCoordinationRemote::removeAllNodes() { /// Usually this function is called by the initiator when a restore operation is complete so we don't need the coordination anymore. diff --git a/src/Backups/RestoreCoordinationRemote.h b/src/Backups/RestoreCoordinationRemote.h index eb0fcff9c2d..22d0c0ed6df 100644 --- a/src/Backups/RestoreCoordinationRemote.h +++ b/src/Backups/RestoreCoordinationRemote.h @@ -46,6 +46,10 @@ public: /// The function returns false if user-defined function at a specified zk path are being already restored by another replica. bool acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type) override; + /// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica, + /// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly). + void generateUUIDForTable(ASTCreateQuery & create_query) override; + bool hasConcurrentRestores(const std::atomic & num_active_restores) const override; private: diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 8a0908c6b67..026671edd6a 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -571,12 +571,14 @@ void RestorerFromBackup::createDatabase(const String & database_name) const if (database_info.is_predefined_database) return; - auto create_database_query = database_info.create_database_query; - if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists) - { - create_database_query = create_database_query->clone(); - create_database_query->as().if_not_exists = true; - } + auto create_database_query = typeid_cast>(database_info.create_database_query->clone()); + + /// Generate a new UUID for a database. + /// The generated UUID will be ignored if the database does not support UUIDs. + restore_coordination->generateUUIDForTable(*create_database_query); + + /// Add the clause `IF NOT EXISTS` if that is specified in the restore settings. + create_database_query->if_not_exists = (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists); LOG_TRACE(log, "Creating database {}: {}", backQuoteIfNeed(database_name), serializeAST(*create_database_query)); @@ -605,17 +607,17 @@ void RestorerFromBackup::checkDatabase(const String & database_name) if (!restore_settings.allow_different_database_def && !database_info.is_predefined_database) { /// Check that the database's definition is the same as expected. - ASTPtr create_database_query = database->getCreateDatabaseQuery(); - adjustCreateQueryForBackup(create_database_query, context->getGlobalContext(), nullptr); - ASTPtr expected_create_query = database_info.create_database_query; - if (serializeAST(*create_database_query) != serializeAST(*expected_create_query)) + + ASTPtr existing_database_def = database->getCreateDatabaseQuery(); + ASTPtr database_def_from_backup = database_info.create_database_query; + if (!compareRestoredDatabaseDef(*existing_database_def, *database_def_from_backup, context->getGlobalContext())) { throw Exception( ErrorCodes::CANNOT_RESTORE_DATABASE, "The database has a different definition: {} " "comparing to its definition in the backup: {}", - serializeAST(*create_database_query), - serializeAST(*expected_create_query)); + serializeAST(*existing_database_def), + serializeAST(*database_def_from_backup)); } } } @@ -714,20 +716,23 @@ void RestorerFromBackup::createTable(const QualifiedTableName & table_name) if (table_info.is_predefined_table) return; - auto create_table_query = table_info.create_table_query; - if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists) - { - create_table_query = create_table_query->clone(); - create_table_query->as().if_not_exists = true; - } + auto create_table_query = typeid_cast>(table_info.create_table_query->clone()); + + /// Generate a new UUID for a table (the same table on different hosts must use the same UUID, `restore_coordination` will make it so). + /// The generated UUID will be ignored if the database does not support UUIDs. + restore_coordination->generateUUIDForTable(*create_table_query); + + /// Add the clause `IF NOT EXISTS` if that is specified in the restore settings. + create_table_query->if_not_exists = (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists); LOG_TRACE( log, "Creating {}: {}", tableNameWithTypeToString(table_name.database, table_name.table, false), serializeAST(*create_table_query)); try { - DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_name.database); - table_info.database = database; + if (!table_info.database) + table_info.database = DatabaseCatalog::instance().getDatabase(table_name.database); + DatabasePtr database = table_info.database; /// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some /// database-specific things). @@ -747,37 +752,33 @@ void RestorerFromBackup::createTable(const QualifiedTableName & table_name) void RestorerFromBackup::checkTable(const QualifiedTableName & table_name) { auto & table_info = table_infos.at(table_name); - auto database = table_info.database; try { - if (!database) - { - database = DatabaseCatalog::instance().getDatabase(table_name.database); - table_info.database = database; - } - auto resolved_id = (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE) ? context->resolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal) : context->resolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal); + if (!table_info.database) + table_info.database = DatabaseCatalog::instance().getDatabase(table_name.database); + DatabasePtr database = table_info.database; + StoragePtr storage = database->getTable(resolved_id.table_name, context); table_info.storage = storage; table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); if (!restore_settings.allow_different_table_def && !table_info.is_predefined_table) { - ASTPtr create_table_query = database->getCreateTableQuery(resolved_id.table_name, context); - adjustCreateQueryForBackup(create_table_query, context->getGlobalContext(), nullptr); - ASTPtr expected_create_query = table_info.create_table_query; - if (serializeAST(*create_table_query) != serializeAST(*expected_create_query)) + ASTPtr existing_table_def = database->getCreateTableQuery(resolved_id.table_name, context); + ASTPtr table_def_from_backup = table_info.create_table_query; + if (!compareRestoredTableDef(*existing_table_def, *table_def_from_backup, context->getGlobalContext())) { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE, "The table has a different definition: {} " "comparing to its definition in the backup: {}", - serializeAST(*create_table_query), - serializeAST(*expected_create_query)); + serializeAST(*existing_table_def), + serializeAST(*table_def_from_backup)); } } } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 92d74f4f18a..6da99a79f91 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -219,10 +219,12 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) else { bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; - if (create.uuid != UUIDHelpers::Nil && !is_on_cluster) + if (create.uuid != UUIDHelpers::Nil && !is_on_cluster && !internal) throw Exception(ErrorCodes::INCORRECT_QUERY, "Ordinary database engine does not support UUID"); - /// Ignore UUID if it's ON CLUSTER query + /// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either + /// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or + /// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts. create.uuid = UUIDHelpers::Nil; metadata_path = metadata_path / "metadata" / database_name_escaped; } @@ -983,19 +985,6 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); } -static void generateUUIDForTable(ASTCreateQuery & create) -{ - if (create.uuid == UUIDHelpers::Nil) - create.uuid = UUIDHelpers::generateV4(); - - /// If destination table (to_table_id) is not specified for materialized view, - /// then MV will create inner table. We should generate UUID of inner table here, - /// so it will be the same on all hosts if query in ON CLUSTER or database engine is Replicated. - bool need_uuid_for_inner_table = !create.attach && create.is_materialized_view && !create.to_table_id; - if (need_uuid_for_inner_table && create.to_inner_uuid == UUIDHelpers::Nil) - create.to_inner_uuid = UUIDHelpers::generateV4(); -} - void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const DatabasePtr & database) const { const auto * kind = create.is_dictionary ? "Dictionary" : "Table"; @@ -1028,17 +1017,19 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data kind_upper, create.table); } - generateUUIDForTable(create); + create.generateRandomUUID(); } else { bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; bool has_uuid = create.uuid != UUIDHelpers::Nil || create.to_inner_uuid != UUIDHelpers::Nil; - if (has_uuid && !is_on_cluster) + if (has_uuid && !is_on_cluster && !internal) throw Exception(ErrorCodes::INCORRECT_QUERY, "{} UUID specified, but engine of database {} is not Atomic", kind, create.getDatabase()); - /// Ignore UUID if it's ON CLUSTER query + /// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either + /// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or + /// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts. create.uuid = UUIDHelpers::Nil; create.to_inner_uuid = UUIDHelpers::Nil; } @@ -1619,7 +1610,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont /// For CREATE query generate UUID on initiator, so it will be the same on all hosts. /// It will be ignored if database does not support UUIDs. - generateUUIDForTable(create); + create.generateRandomUUID(); /// For cross-replication cluster we cannot use UUID in replica path. String cluster_name_expanded = local_context->getMacros()->expand(cluster_name); diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 196681a8801..bb0e4008491 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include namespace DB @@ -460,4 +462,46 @@ bool ASTCreateQuery::isParameterizedView() const return false; } + +ASTCreateQuery::UUIDs::UUIDs(const ASTCreateQuery & query) + : uuid(query.uuid) + , to_inner_uuid(query.to_inner_uuid) +{ +} + +String ASTCreateQuery::UUIDs::toString() const +{ + WriteBufferFromOwnString out; + out << "{" << uuid << "," << to_inner_uuid << "}"; + return out.str(); +} + +ASTCreateQuery::UUIDs ASTCreateQuery::UUIDs::fromString(const String & str) +{ + ReadBufferFromString in{str}; + ASTCreateQuery::UUIDs res; + in >> "{" >> res.uuid >> "," >> res.to_inner_uuid >> "}"; + return res; +} + +ASTCreateQuery::UUIDs ASTCreateQuery::generateRandomUUID() +{ + if (uuid == UUIDHelpers::Nil) + uuid = UUIDHelpers::generateV4(); + + /// If destination table (to_table_id) is not specified for materialized view, + /// then MV will create inner table. We should generate UUID of inner table here. + bool need_uuid_for_inner_table = !attach && is_materialized_view && !to_table_id; + if (need_uuid_for_inner_table && (to_inner_uuid == UUIDHelpers::Nil)) + to_inner_uuid = UUIDHelpers::generateV4(); + + return UUIDs{*this}; +} + +void ASTCreateQuery::setUUID(const UUIDs & uuids) +{ + uuid = uuids.uuid; + to_inner_uuid = uuids.to_inner_uuid; +} + } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index ae45a244a03..25d910913a4 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -146,6 +146,18 @@ public: QueryKind getQueryKind() const override { return QueryKind::Create; } + struct UUIDs + { + UUID uuid = UUIDHelpers::Nil; + UUID to_inner_uuid = UUIDHelpers::Nil; + UUIDs() = default; + explicit UUIDs(const ASTCreateQuery & query); + String toString() const; + static UUIDs fromString(const String & str); + }; + UUIDs generateRandomUUID(); + void setUUID(const UUIDs & uuids); + protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; From 95de1610b81be44e2dffe94b1835ca309b10af43 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 23 Aug 2023 17:10:18 -0400 Subject: [PATCH 0274/1687] Update pull_request.yml --- .github/workflows/pull_request.yml | 78 +++++++++++++++--------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 1129ee20d1f..15d1f508e1b 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -242,9 +242,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ######################################################################################### - #################################### ORDINARY BUILDS #################################### - ######################################################################################### +######################################################################################### +#################################### ORDINARY BUILDS #################################### +######################################################################################### BuilderDebRelease: needs: [DockerHubPush, FastTest, StyleCheck] runs-on: [self-hosted, builder] @@ -593,9 +593,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - ########################################################################################## - ##################################### SPECIAL BUILDS ##################################### - ########################################################################################## +########################################################################################## +##################################### SPECIAL BUILDS ##################################### +########################################################################################## BuilderDebShared: needs: [DockerHubPush, FastTest, StyleCheck] runs-on: [self-hosted, builder] @@ -983,9 +983,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - ############################################################################################ - ##################################### Docker images ####################################### - ############################################################################################ +############################################################################################ +##################################### Docker images ####################################### +############################################################################################ DockerServerImages: needs: - BuilderDebRelease @@ -1011,9 +1011,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################ - ##################################### BUILD REPORTER ####################################### - ############################################################################################ +############################################################################################ +##################################### BUILD REPORTER ####################################### +############################################################################################ BuilderReport: needs: - BuilderBinRelease @@ -1106,9 +1106,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################## - ########################### FUNCTIONAl STATELESS TESTS ####################################### - ############################################################################################## +############################################################################################## +########################### FUNCTIONAl STATELESS TESTS ####################################### +############################################################################################## FunctionalStatelessTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -2067,9 +2067,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################## - ############################ FUNCTIONAl STATEFUL TESTS ####################################### - ############################################################################################## +############################################################################################## +############################ FUNCTIONAl STATEFUL TESTS ####################################### +############################################################################################## FunctionalStatefulTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -2315,9 +2315,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################## - ######################################### STRESS TESTS ####################################### - ############################################################################################## +############################################################################################## +######################################### STRESS TESTS ####################################### +############################################################################################## StressTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -2492,9 +2492,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################## - ##################################### AST FUZZERS ############################################ - ############################################################################################## +############################################################################################## +##################################### AST FUZZERS ############################################ +############################################################################################## ASTFuzzerTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2665,9 +2665,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################# - ############################# INTEGRATION TESTS ############################################# - ############################################################################################# +############################################################################################# +############################# INTEGRATION TESTS ############################################# +############################################################################################# IntegrationTestsAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -3026,9 +3026,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################# - #################################### UNIT TESTS ############################################# - ############################################################################################# +############################################################################################# +#################################### UNIT TESTS ############################################# +############################################################################################# UnitTestsAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -3199,9 +3199,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################# - #################################### PERFORMANCE TESTS ###################################### - ############################################################################################# +############################################################################################# +#################################### PERFORMANCE TESTS ###################################### +############################################################################################# PerformanceComparisonX86-0: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] @@ -3490,9 +3490,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################# - #################################### OSS FUZZ ############################################### - ############################################################################################# +############################################################################################# +#################################### OSS FUZZ ############################################### +############################################################################################# Fuzzing: if: contains(github.event.pull_request.labels.*.name, 'build-fuzzers') runs-on: [self-hosted, fuzzer-unit-tester] @@ -3521,9 +3521,9 @@ jobs: with: name: ${{ matrix.sanitizer }}-artifacts path: ./out/artifacts - ############################################################################################# - ###################################### JEPSEN TESTS ######################################### - ############################################################################################# +############################################################################################# +###################################### JEPSEN TESTS ######################################### +############################################################################################# Jepsen: # This is special test NOT INCLUDED in FinishCheck # When it's skipped, all dependent tasks will be skipped too. From 7b9d6c4b9645fa3a9c726dd967ad71236c86c5e5 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 23 Aug 2023 22:56:01 +0200 Subject: [PATCH 0275/1687] Add test. --- .../test_backup_restore_on_cluster/test.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index a9ebbeb66c2..dfce2f15413 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -276,6 +276,37 @@ def test_table_with_parts_in_queue_considered_non_empty(): ) +def test_replicated_table_with_uuid_in_zkpath(): + node1.query( + "CREATE TABLE tbl ON CLUSTER 'cluster' (" + "x UInt8, y String" + ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/{uuid}','{replica}')" + "ORDER BY x" + ) + + node1.query("INSERT INTO tbl VALUES (1, 'AA')") + node2.query("INSERT INTO tbl VALUES (2, 'BB')") + + backup_name = new_backup_name() + node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}") + + # The table `tbl2` is expected to have a different UUID so it's ok to have both `tbl` and `tbl2` at the same time. + node2.query(f"RESTORE TABLE tbl AS tbl2 ON CLUSTER 'cluster' FROM {backup_name}") + + node1.query("INSERT INTO tbl2 VALUES (3, 'CC')") + + node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl") + node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl2") + + for instance in [node1, node2]: + assert instance.query("SELECT * FROM tbl ORDER BY x") == TSV( + [[1, "AA"], [2, "BB"]] + ) + assert instance.query("SELECT * FROM tbl2 ORDER BY x") == TSV( + [[1, "AA"], [2, "BB"], [3, "CC"]] + ) + + def test_replicated_table_with_not_synced_insert(): node1.query( "CREATE TABLE tbl ON CLUSTER 'cluster' (" From 4c16f34784471dab3da553f37c544b0fc402472d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Wed, 23 Aug 2023 23:45:19 +0000 Subject: [PATCH 0276/1687] Perf-test edit --- a.txt | 1 + b.txt | 1 + c.txt | 1 + metadata/INFORMATION_SCHEMA.sql | 2 + metadata/default | 1 + metadata/default.sql | 2 + metadata/information_schema.sql | 2 + metadata/system | 1 + metadata/system.sql | 2 + ...se_server_wait_server_pool_long.config.xml | 39 +++++++++++++++++ ...allow_plaintext_and_no_password.config.xml | 28 +++++++++++++ ...2422_allow_implicit_no_password.config.xml | 27 ++++++++++++ preprocessed_configs/config.xml | 42 +++++++++++++++++++ status | 3 ++ tests/performance/codec_gcd.xml | 7 ++-- uuid | 1 + 16 files changed, 156 insertions(+), 4 deletions(-) create mode 100644 a.txt create mode 100644 b.txt create mode 100644 c.txt create mode 100644 metadata/INFORMATION_SCHEMA.sql create mode 120000 metadata/default create mode 100644 metadata/default.sql create mode 100644 metadata/information_schema.sql create mode 120000 metadata/system create mode 100644 metadata/system.sql create mode 100644 preprocessed_configs/01737_clickhouse_server_wait_server_pool_long.config.xml create mode 100644 preprocessed_configs/02207_allow_plaintext_and_no_password.config.xml create mode 100644 preprocessed_configs/02422_allow_implicit_no_password.config.xml create mode 100644 preprocessed_configs/config.xml create mode 100644 status create mode 100644 uuid diff --git a/a.txt b/a.txt new file mode 100644 index 00000000000..9606d7d1d72 --- /dev/null +++ b/a.txt @@ -0,0 +1 @@ +aaaaaaaaa \ No newline at end of file diff --git a/b.txt b/b.txt new file mode 100644 index 00000000000..50e2dfd3b13 --- /dev/null +++ b/b.txt @@ -0,0 +1 @@ +bbbbbbbbb \ No newline at end of file diff --git a/c.txt b/c.txt new file mode 100644 index 00000000000..9d7f9d63ef2 --- /dev/null +++ b/c.txt @@ -0,0 +1 @@ +ccccccccc \ No newline at end of file diff --git a/metadata/INFORMATION_SCHEMA.sql b/metadata/INFORMATION_SCHEMA.sql new file mode 100644 index 00000000000..291582fd1eb --- /dev/null +++ b/metadata/INFORMATION_SCHEMA.sql @@ -0,0 +1,2 @@ +ATTACH DATABASE INFORMATION_SCHEMA +ENGINE = Memory diff --git a/metadata/default b/metadata/default new file mode 120000 index 00000000000..3ddc943ad10 --- /dev/null +++ b/metadata/default @@ -0,0 +1 @@ +/home/ubuntu/ClickHouse/store/93f/93f22e9a-fe34-4ce5-b663-c9f0567952b3/ \ No newline at end of file diff --git a/metadata/default.sql b/metadata/default.sql new file mode 100644 index 00000000000..73c122bc4af --- /dev/null +++ b/metadata/default.sql @@ -0,0 +1,2 @@ +ATTACH DATABASE _ UUID '93f22e9a-fe34-4ce5-b663-c9f0567952b3' +ENGINE = Atomic diff --git a/metadata/information_schema.sql b/metadata/information_schema.sql new file mode 100644 index 00000000000..6cea934b49d --- /dev/null +++ b/metadata/information_schema.sql @@ -0,0 +1,2 @@ +ATTACH DATABASE information_schema +ENGINE = Memory diff --git a/metadata/system b/metadata/system new file mode 120000 index 00000000000..7700b92ceb6 --- /dev/null +++ b/metadata/system @@ -0,0 +1 @@ +/home/ubuntu/ClickHouse/store/556/556d1975-4e6d-4344-9e80-3de3c9e41691/ \ No newline at end of file diff --git a/metadata/system.sql b/metadata/system.sql new file mode 100644 index 00000000000..eefaeb68f02 --- /dev/null +++ b/metadata/system.sql @@ -0,0 +1,2 @@ +ATTACH DATABASE _ UUID '556d1975-4e6d-4344-9e80-3de3c9e41691' +ENGINE = Atomic diff --git a/preprocessed_configs/01737_clickhouse_server_wait_server_pool_long.config.xml b/preprocessed_configs/01737_clickhouse_server_wait_server_pool_long.config.xml new file mode 100644 index 00000000000..f93e6d470d2 --- /dev/null +++ b/preprocessed_configs/01737_clickhouse_server_wait_server_pool_long.config.xml @@ -0,0 +1,39 @@ + + + + + trace + true + + + 9000 + + ./ + + 0 + + + + + + + ::/0 + + + default + default + 1 + + + + + + + + + + + diff --git a/preprocessed_configs/02207_allow_plaintext_and_no_password.config.xml b/preprocessed_configs/02207_allow_plaintext_and_no_password.config.xml new file mode 100644 index 00000000000..2a21805bc4b --- /dev/null +++ b/preprocessed_configs/02207_allow_plaintext_and_no_password.config.xml @@ -0,0 +1,28 @@ + + + + + trace + true + + + 9000 + 0 + 0 + . + 0 + + + + + users.xml + + + + ./ + + + diff --git a/preprocessed_configs/02422_allow_implicit_no_password.config.xml b/preprocessed_configs/02422_allow_implicit_no_password.config.xml new file mode 100644 index 00000000000..aaba080fbb2 --- /dev/null +++ b/preprocessed_configs/02422_allow_implicit_no_password.config.xml @@ -0,0 +1,27 @@ + + + + + trace + true + + + 9000 + 0 + . + 0 + + + + + users.xml + + + + ./ + + + diff --git a/preprocessed_configs/config.xml b/preprocessed_configs/config.xml new file mode 100644 index 00000000000..2e19ef1f75f --- /dev/null +++ b/preprocessed_configs/config.xml @@ -0,0 +1,42 @@ + + + + + + trace + true + + + 8123 + 9000 + 9004 + + ./ + + true + + + + + + + ::/0 + + + default + default + 1 + + + + + + + + + + + diff --git a/status b/status new file mode 100644 index 00000000000..bf3f66f88f6 --- /dev/null +++ b/status @@ -0,0 +1,3 @@ +PID: 2786663 +Started at: 2023-08-23 10:31:43 +Revision: 54477 diff --git a/tests/performance/codec_gcd.xml b/tests/performance/codec_gcd.xml index e6caa4beb82..897f312c70e 100644 --- a/tests/performance/codec_gcd.xml +++ b/tests/performance/codec_gcd.xml @@ -1,9 +1,8 @@ CREATE TABLE gcd_codec (n UInt64 CODEC(GCD, LZ4)) ENGINE = MergeTree ORDER BY tuple() - INSERT INTO gcd_codec SELECT number FROM system.numbers LIMIT 20000000 SETTINGS max_threads=1 - INSERT INTO gcd_codec SELECT number*1000 FROM system.numbers LIMIT 20000000 SETTINGS max_threads=1 - INSERT INTO gcd_codec SELECT intHash64(number) FROM system.numbers LIMIT 20000000 SETTINGS max_threads=1 + INSERT INTO gcd_codec SELECT * FROM generateRandom() LIMIT 133722869 SETTINGS max_threads=1 + SELECT * FROM gcd_codec SETTINGS max_threads=1 DROP TABLE gcd_codec - \ No newline at end of file +
diff --git a/uuid b/uuid new file mode 100644 index 00000000000..0929c96512d --- /dev/null +++ b/uuid @@ -0,0 +1 @@ +fdc770bd-b7a0-4c10-a26e-798c2d72c736 \ No newline at end of file From 32a57300ffd70eef5ee571ed976ff4629d0fe7d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80=20?= =?UTF-8?q?=D0=9D=D0=B0=D0=BC?= Date: Wed, 23 Aug 2023 23:48:47 +0000 Subject: [PATCH 0277/1687] Deleted extra files --- a.txt | 1 - b.txt | 1 - c.txt | 1 - metadata/INFORMATION_SCHEMA.sql | 2 - metadata/default | 1 - metadata/default.sql | 2 - metadata/information_schema.sql | 2 - metadata/system | 1 - metadata/system.sql | 2 - ...se_server_wait_server_pool_long.config.xml | 39 ----------------- ...allow_plaintext_and_no_password.config.xml | 28 ------------- ...2422_allow_implicit_no_password.config.xml | 27 ------------ preprocessed_configs/config.xml | 42 ------------------- 13 files changed, 149 deletions(-) delete mode 100644 a.txt delete mode 100644 b.txt delete mode 100644 c.txt delete mode 100644 metadata/INFORMATION_SCHEMA.sql delete mode 120000 metadata/default delete mode 100644 metadata/default.sql delete mode 100644 metadata/information_schema.sql delete mode 120000 metadata/system delete mode 100644 metadata/system.sql delete mode 100644 preprocessed_configs/01737_clickhouse_server_wait_server_pool_long.config.xml delete mode 100644 preprocessed_configs/02207_allow_plaintext_and_no_password.config.xml delete mode 100644 preprocessed_configs/02422_allow_implicit_no_password.config.xml delete mode 100644 preprocessed_configs/config.xml diff --git a/a.txt b/a.txt deleted file mode 100644 index 9606d7d1d72..00000000000 --- a/a.txt +++ /dev/null @@ -1 +0,0 @@ -aaaaaaaaa \ No newline at end of file diff --git a/b.txt b/b.txt deleted file mode 100644 index 50e2dfd3b13..00000000000 --- a/b.txt +++ /dev/null @@ -1 +0,0 @@ -bbbbbbbbb \ No newline at end of file diff --git a/c.txt b/c.txt deleted file mode 100644 index 9d7f9d63ef2..00000000000 --- a/c.txt +++ /dev/null @@ -1 +0,0 @@ -ccccccccc \ No newline at end of file diff --git a/metadata/INFORMATION_SCHEMA.sql b/metadata/INFORMATION_SCHEMA.sql deleted file mode 100644 index 291582fd1eb..00000000000 --- a/metadata/INFORMATION_SCHEMA.sql +++ /dev/null @@ -1,2 +0,0 @@ -ATTACH DATABASE INFORMATION_SCHEMA -ENGINE = Memory diff --git a/metadata/default b/metadata/default deleted file mode 120000 index 3ddc943ad10..00000000000 --- a/metadata/default +++ /dev/null @@ -1 +0,0 @@ -/home/ubuntu/ClickHouse/store/93f/93f22e9a-fe34-4ce5-b663-c9f0567952b3/ \ No newline at end of file diff --git a/metadata/default.sql b/metadata/default.sql deleted file mode 100644 index 73c122bc4af..00000000000 --- a/metadata/default.sql +++ /dev/null @@ -1,2 +0,0 @@ -ATTACH DATABASE _ UUID '93f22e9a-fe34-4ce5-b663-c9f0567952b3' -ENGINE = Atomic diff --git a/metadata/information_schema.sql b/metadata/information_schema.sql deleted file mode 100644 index 6cea934b49d..00000000000 --- a/metadata/information_schema.sql +++ /dev/null @@ -1,2 +0,0 @@ -ATTACH DATABASE information_schema -ENGINE = Memory diff --git a/metadata/system b/metadata/system deleted file mode 120000 index 7700b92ceb6..00000000000 --- a/metadata/system +++ /dev/null @@ -1 +0,0 @@ -/home/ubuntu/ClickHouse/store/556/556d1975-4e6d-4344-9e80-3de3c9e41691/ \ No newline at end of file diff --git a/metadata/system.sql b/metadata/system.sql deleted file mode 100644 index eefaeb68f02..00000000000 --- a/metadata/system.sql +++ /dev/null @@ -1,2 +0,0 @@ -ATTACH DATABASE _ UUID '556d1975-4e6d-4344-9e80-3de3c9e41691' -ENGINE = Atomic diff --git a/preprocessed_configs/01737_clickhouse_server_wait_server_pool_long.config.xml b/preprocessed_configs/01737_clickhouse_server_wait_server_pool_long.config.xml deleted file mode 100644 index f93e6d470d2..00000000000 --- a/preprocessed_configs/01737_clickhouse_server_wait_server_pool_long.config.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - trace - true - - - 9000 - - ./ - - 0 - - - - - - - ::/0 - - - default - default - 1 - - - - - - - - - - - diff --git a/preprocessed_configs/02207_allow_plaintext_and_no_password.config.xml b/preprocessed_configs/02207_allow_plaintext_and_no_password.config.xml deleted file mode 100644 index 2a21805bc4b..00000000000 --- a/preprocessed_configs/02207_allow_plaintext_and_no_password.config.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - trace - true - - - 9000 - 0 - 0 - . - 0 - - - - - users.xml - - - - ./ - - - diff --git a/preprocessed_configs/02422_allow_implicit_no_password.config.xml b/preprocessed_configs/02422_allow_implicit_no_password.config.xml deleted file mode 100644 index aaba080fbb2..00000000000 --- a/preprocessed_configs/02422_allow_implicit_no_password.config.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - trace - true - - - 9000 - 0 - . - 0 - - - - - users.xml - - - - ./ - - - diff --git a/preprocessed_configs/config.xml b/preprocessed_configs/config.xml deleted file mode 100644 index 2e19ef1f75f..00000000000 --- a/preprocessed_configs/config.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - trace - true - - - 8123 - 9000 - 9004 - - ./ - - true - - - - - - - ::/0 - - - default - default - 1 - - - - - - - - - - - From 73a9d4478eeef5b740f6fd2247cdd74575638f7c Mon Sep 17 00:00:00 2001 From: seshWCS Date: Thu, 24 Aug 2023 00:11:16 +0000 Subject: [PATCH 0278/1687] Deleted extra --- status | 3 --- uuid | 1 - 2 files changed, 4 deletions(-) delete mode 100644 status delete mode 100644 uuid diff --git a/status b/status deleted file mode 100644 index bf3f66f88f6..00000000000 --- a/status +++ /dev/null @@ -1,3 +0,0 @@ -PID: 2786663 -Started at: 2023-08-23 10:31:43 -Revision: 54477 diff --git a/uuid b/uuid deleted file mode 100644 index 0929c96512d..00000000000 --- a/uuid +++ /dev/null @@ -1 +0,0 @@ -fdc770bd-b7a0-4c10-a26e-798c2d72c736 \ No newline at end of file From 893de8934dbec1b7663d31e8201df540af9772f5 Mon Sep 17 00:00:00 2001 From: seshWCS Date: Thu, 24 Aug 2023 00:13:49 +0000 Subject: [PATCH 0279/1687] Edited perf-test --- tests/performance/codec_gcd.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/performance/codec_gcd.xml b/tests/performance/codec_gcd.xml index 897f312c70e..bbceec8f4a8 100644 --- a/tests/performance/codec_gcd.xml +++ b/tests/performance/codec_gcd.xml @@ -1,8 +1,14 @@ CREATE TABLE gcd_codec (n UInt64 CODEC(GCD, LZ4)) ENGINE = MergeTree ORDER BY tuple() + CREATE TABLE delta_codec (n UInt64 CODEC(Delta, LZ4)) ENGINE = MergeTree ORDER BY tuple() + INSERT INTO gcd_codec SELECT * FROM generateRandom() LIMIT 133722869 SETTINGS max_threads=1 SELECT * FROM gcd_codec SETTINGS max_threads=1 + INSERT INTO delta_codec SELECT * FROM generateRandom() LIMIT 133722869 SETTINGS max_threads=1 + SELECT * FROM delta_codec_codec SETTINGS max_threads=1 + DROP TABLE gcd_codec + DROP TABLE delta_codec From c82e6fdb231ea442944922840d14f6600c0a70d0 Mon Sep 17 00:00:00 2001 From: seshWCS Date: Thu, 24 Aug 2023 00:16:00 +0000 Subject: [PATCH 0280/1687] Fix --- tests/performance/codec_gcd.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/performance/codec_gcd.xml b/tests/performance/codec_gcd.xml index bbceec8f4a8..47729631136 100644 --- a/tests/performance/codec_gcd.xml +++ b/tests/performance/codec_gcd.xml @@ -3,10 +3,10 @@ CREATE TABLE delta_codec (n UInt64 CODEC(Delta, LZ4)) ENGINE = MergeTree ORDER BY tuple() - INSERT INTO gcd_codec SELECT * FROM generateRandom() LIMIT 133722869 SETTINGS max_threads=1 + INSERT INTO gcd_codec SELECT * FROM generateRandom() LIMIT 133722877 SETTINGS max_threads=1 SELECT * FROM gcd_codec SETTINGS max_threads=1 - INSERT INTO delta_codec SELECT * FROM generateRandom() LIMIT 133722869 SETTINGS max_threads=1 + INSERT INTO delta_codec SELECT * FROM generateRandom() LIMIT 133722877 SETTINGS max_threads=1 SELECT * FROM delta_codec_codec SETTINGS max_threads=1 DROP TABLE gcd_codec From 2b80dbbb341d5bb0b9c221a8c75382065b6dbb5c Mon Sep 17 00:00:00 2001 From: seshWCS Date: Thu, 24 Aug 2023 00:20:09 +0000 Subject: [PATCH 0281/1687] Fix --- tests/performance/codec_gcd.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/performance/codec_gcd.xml b/tests/performance/codec_gcd.xml index 47729631136..06134f8c0b5 100644 --- a/tests/performance/codec_gcd.xml +++ b/tests/performance/codec_gcd.xml @@ -3,10 +3,10 @@ CREATE TABLE delta_codec (n UInt64 CODEC(Delta, LZ4)) ENGINE = MergeTree ORDER BY tuple() - INSERT INTO gcd_codec SELECT * FROM generateRandom() LIMIT 133722877 SETTINGS max_threads=1 + INSERT INTO gcd_codec SELECT * FROM generateRandom() LIMIT 1337228 SETTINGS max_threads=1 SELECT * FROM gcd_codec SETTINGS max_threads=1 - INSERT INTO delta_codec SELECT * FROM generateRandom() LIMIT 133722877 SETTINGS max_threads=1 + INSERT INTO delta_codec SELECT * FROM generateRandom() LIMIT 1337228 SETTINGS max_threads=1 SELECT * FROM delta_codec_codec SETTINGS max_threads=1 DROP TABLE gcd_codec From 6761ccdf3f4c4715db1cd3b0f7cd5b7f3f0366e5 Mon Sep 17 00:00:00 2001 From: seshWCS Date: Thu, 24 Aug 2023 00:26:55 +0000 Subject: [PATCH 0282/1687] UInt64 -> DateTime64 --- tests/performance/codec_gcd.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/performance/codec_gcd.xml b/tests/performance/codec_gcd.xml index 06134f8c0b5..0cca46d802e 100644 --- a/tests/performance/codec_gcd.xml +++ b/tests/performance/codec_gcd.xml @@ -1,6 +1,6 @@ - CREATE TABLE gcd_codec (n UInt64 CODEC(GCD, LZ4)) ENGINE = MergeTree ORDER BY tuple() - CREATE TABLE delta_codec (n UInt64 CODEC(Delta, LZ4)) ENGINE = MergeTree ORDER BY tuple() + CREATE TABLE gcd_codec (n DateTime64 CODEC(GCD, LZ4)) ENGINE = MergeTree ORDER BY tuple() + CREATE TABLE delta_codec (n DateTime64 CODEC(Delta, LZ4)) ENGINE = MergeTree ORDER BY tuple() INSERT INTO gcd_codec SELECT * FROM generateRandom() LIMIT 1337228 SETTINGS max_threads=1 From 2db0e0879d65f06f24029f5727b30464885f9d84 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Wed, 23 Aug 2023 17:39:33 -0700 Subject: [PATCH 0283/1687] Simply docs for possible values for date units --- .../functions/date-time-functions.md | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 56a17519b61..8f673572e11 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -738,16 +738,16 @@ age('unit', startdate, enddate, [timezone]) - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - - `microsecond` (possible abbreviations: `us`, `u`) - - `millisecond` (possible abbreviations: `ms`) - - `second` (possible abbreviations: `ss`, `s`) - - `minute` (possible abbreviations: `mi`, `n`) - - `hour` (possible abbreviations: `hh`, `h`) - - `day` (possible abbreviations: `dd`, `d`) - - `week` (possible abbreviations: `wk`, `ww`) - - `month` (possible abbreviations: `mm`, `m`) - - `quarter` (possible abbreviations: `qq`, `q`) - - `year` (possible abbreviations: `yyyy`, `yy`) + - `microsecond` `microseconds` `us` `u` + - `millisecond` `milliseconds` `ms` + - `second` `seconds` `ss` `s` + - `minute` `minutes` `mi` `n` + - `hour` `hours` `hh` `h` + - `day` `days` `dd` `d` + - `week` `weeks` `wk` `ww` + - `month` `months` `mm` `m` + - `quarter` `quarters` `qq` `q` + - `year` `years` `yyyy` `yy` - `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). From af43e95f8d1940504333539aa4b011e0472681e2 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Wed, 23 Aug 2023 17:42:55 -0700 Subject: [PATCH 0284/1687] Update date-time-functions doc --- .../functions/date-time-functions.md | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 8f673572e11..6bcc9d654c5 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -815,16 +815,16 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - - `microsecond` (possible abbreviations: `microseconds`, `us`, `u`) - - `millisecond` (possible abbreviations: `milliseconds`, `ms`) - - `second` (possible abbreviations: `seconds`, `ss`, `s`) - - `minute` (possible abbreviations: `minutes`, `mi`, `n`) - - `hour` (possible abbreviations: `hours`, `hh`, `h`) - - `day` (possible abbreviations: `days`, `dd`, `d`) - - `week` (possible abbreviations: `weeks`, `wk`, `ww`) - - `month` (possible abbreviations: `months`, `mm`, `m`) - - `quarter` (possible abbreviations: `quarters`, `qq`, `q`) - - `year` (possible abbreviations: `years`, `yyyy`, `yy`) + - `microsecond` `microseconds` `us` `u` + - `millisecond` `milliseconds` `ms` + - `second` `seconds` `ss` `s` + - `minute` `minutes` `mi` `n` + - `hour` `hours` `hh` `h` + - `day` `days` `dd` `d` + - `week` `weeks` `wk` `ww` + - `month` `months` `mm` `m` + - `quarter` `quarters` `qq` `q` + - `year` `years` `yyyy` `yy` - `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). From f9de4897eef4483bbfd0217bd2339ce02146eee5 Mon Sep 17 00:00:00 2001 From: seshWCS Date: Thu, 24 Aug 2023 01:00:04 +0000 Subject: [PATCH 0285/1687] Edited while --- src/Compression/CompressionCodecGCD.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 0207e298174..e13826dc8b4 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -84,7 +84,7 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest) T gcd_divider{}; const auto * cur_source = source; - while (cur_source < source_end) + while (gcd_divider != T(1) && cur_source < source_end) { if (cur_source == source) { @@ -94,10 +94,6 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest) { gcd_divider = boost::math::gcd(gcd_divider, unalignedLoad(cur_source)); } - if (gcd_divider == T(1)) - { - break; - } } unalignedStore(dest, gcd_divider); From b46e8db2541ae9dc58353057c71f07ff68ef5eee Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Tue, 22 Aug 2023 22:51:55 -0700 Subject: [PATCH 0286/1687] generate gperf perfect hashtable --- src/Functions/HTMLCharacterReference.gperf | 2145 ++++++ src/Functions/HTMLCharacterReference.h | 7141 ++++++++++++++++++++ 2 files changed, 9286 insertions(+) create mode 100644 src/Functions/HTMLCharacterReference.gperf create mode 100644 src/Functions/HTMLCharacterReference.h diff --git a/src/Functions/HTMLCharacterReference.gperf b/src/Functions/HTMLCharacterReference.gperf new file mode 100644 index 00000000000..339b0172538 --- /dev/null +++ b/src/Functions/HTMLCharacterReference.gperf @@ -0,0 +1,2145 @@ +%language=C++ +%define class-name HTMLCharacterHash +%define lookup-function-name Lookup +%readonly-tables +%compare-strncmp +%includes +%{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" +#pragma GCC diagnostic ignored "-Wunused-macros" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wshorten-64-to-32" +%} +struct NameAndGlyph { +const char *name; +const char *glyph; +}; +%% +"Á", "Á" +"á", "á" +"Ă", "Ă" +"ă", "ă" +"∾", "∾" +"∿", "∿" +"∾̳", "∾̳" +"Â", "Â" +"â", "â" +"´", "´" +"А", "А" +"а", "а" +"Æ", "Æ" +"æ", "æ" +"⁡", "⁡" +"𝔄", "𝔄" +"𝔞", "𝔞" +"À", "À" +"à", "à" +"ℵ", "ℵ" +"ℵ", "ℵ" +"Α", "Α" +"α", "α" +"Ā", "Ā" +"ā", "ā" +"⨿", "⨿" +"&", "&" +"&", "&" +"⩓", "⩓" +"∧", "∧" +"⩕", "⩕" +"⩜", "⩜" +"⩘", "⩘" +"⩚", "⩚" +"∠", "∠" +"⦤", "⦤" +"∠", "∠" +"∡", "∡" +"⦨", "⦨" +"⦩", "⦩" +"⦪", "⦪" +"⦫", "⦫" +"⦬", "⦬" +"⦭", "⦭" +"⦮", "⦮" +"⦯", "⦯" +"∟", "∟" +"⊾", "⊾" +"⦝", "⦝" +"∢", "∢" +"Å", "Å" +"⍼", "⍼" +"Ą", "Ą" +"ą", "ą" +"𝔸", "𝔸" +"𝕒", "𝕒" +"≈", "≈" +"⩯", "⩯" +"⩰", "⩰" +"≊", "≊" +"≋", "≋" +"'", "'" +"⁡", "⁡" +"≈", "≈" +"≊", "≊" +"Å", "Å" +"å", "å" +"𝒜", "𝒜" +"𝒶", "𝒶" +"≔", "≔" +"*", "*" +"≈", "≈" +"≍", "≍" +"Ã", "Ã" +"ã", "ã" +"Ä", "Ä" +"ä", "ä" +"∳", "∳" +"⨑", "⨑" +"≌", "≌" +"϶", "϶" +"‵", "‵" +"∽", "∽" +"⋍", "⋍" +"∖", "∖" +"⫧", "⫧" +"⊽", "⊽" +"⌆", "⌆" +"⌅", "⌅" +"⌅", "⌅" +"⎵", "⎵" +"⎶", "⎶" +"≌", "≌" +"Б", "Б" +"б", "б" +"„", "„" +"∵", "∵" +"∵", "∵" +"∵", "∵" +"⦰", "⦰" +"϶", "϶" +"ℬ", "ℬ" +"ℬ", "ℬ" +"Β", "Β" +"β", "β" +"ℶ", "ℶ" +"≬", "≬" +"𝔅", "𝔅" +"𝔟", "𝔟" +"⋂", "⋂" +"◯", "◯" +"⋃", "⋃" +"⨀", "⨀" +"⨁", "⨁" +"⨂", "⨂" +"⨆", "⨆" +"★", "★" +"▽", "▽" +"△", "△" +"⨄", "⨄" +"⋁", "⋁" +"⋀", "⋀" +"⤍", "⤍" +"⧫", "⧫" +"▪", "▪" +"▴", "▴" +"▾", "▾" +"◂", "◂" +"▸", "▸" +"␣", "␣" +"▒", "▒" +"░", "░" +"▓", "▓" +"█", "█" +"=⃥", "=⃥" +"≡⃥", "≡⃥" +"⫭", "⫭" +"⌐", "⌐" +"𝔹", "𝔹" +"𝕓", "𝕓" +"⊥", "⊥" +"⊥", "⊥" +"⋈", "⋈" +"⧉", "⧉" +"╗", "╗" +"╖", "╖" +"╕", "╕" +"┐", "┐" +"╔", "╔" +"╓", "╓" +"╒", "╒" +"┌", "┌" +"═", "═" +"─", "─" +"╦", "╦" +"╤", "╤" +"╥", "╥" +"┬", "┬" +"╩", "╩" +"╧", "╧" +"╨", "╨" +"┴", "┴" +"⊟", "⊟" +"⊞", "⊞" +"⊠", "⊠" +"╝", "╝" +"╜", "╜" +"╛", "╛" +"┘", "┘" +"╚", "╚" +"╙", "╙" +"╘", "╘" +"└", "└" +"║", "║" +"│", "│" +"╬", "╬" +"╫", "╫" +"╪", "╪" +"┼", "┼" +"╣", "╣" +"╢", "╢" +"╡", "╡" +"┤", "┤" +"╠", "╠" +"╟", "╟" +"╞", "╞" +"├", "├" +"‵", "‵" +"˘", "˘" +"˘", "˘" +"¦", "¦" +"ℬ", "ℬ" +"𝒷", "𝒷" +"⁏", "⁏" +"∽", "∽" +"⋍", "⋍" +"\", "\\" +"⧅", "⧅" +"⟈", "⟈" +"•", "•" +"•", "•" +"≎", "≎" +"⪮", "⪮" +"≏", "≏" +"≎", "≎" +"≏", "≏" +"Ć", "Ć" +"ć", "ć" +"⋒", "⋒" +"∩", "∩" +"⩄", "⩄" +"⩉", "⩉" +"⩋", "⩋" +"⩇", "⩇" +"⩀", "⩀" +"ⅅ", "ⅅ" +"∩︀", "∩︀" +"⁁", "⁁" +"ˇ", "ˇ" +"ℭ", "ℭ" +"⩍", "⩍" +"Č", "Č" +"č", "č" +"Ç", "Ç" +"ç", "ç" +"Ĉ", "Ĉ" +"ĉ", "ĉ" +"∰", "∰" +"⩌", "⩌" +"⩐", "⩐" +"Ċ", "Ċ" +"ċ", "ċ" +"¸", "¸" +"¸", "¸" +"⦲", "⦲" +"¢", "¢" +"·", "·" +"·", "·" +"ℭ", "ℭ" +"𝔠", "𝔠" +"Ч", "Ч" +"ч", "ч" +"✓", "✓" +"✓", "✓" +"Χ", "Χ" +"χ", "χ" +"○", "○" +"ˆ", "ˆ" +"≗", "≗" +"↺", "↺" +"↻", "↻" +"⊛", "⊛" +"⊚", "⊚" +"⊝", "⊝" +"⊙", "⊙" +"®", "®" +"Ⓢ", "Ⓢ" +"⊖", "⊖" +"⊕", "⊕" +"⊗", "⊗" +"⧃", "⧃" +"≗", "≗" +"⨐", "⨐" +"⫯", "⫯" +"⧂", "⧂" +"∲", "∲" +"”", "”" +"’", "’" +"♣", "♣" +"♣", "♣" +"∷", "∷" +":", ":" +"⩴", "⩴" +"≔", "≔" +"≔", "≔" +",", "," +"@", "@" +"∁", "∁" +"∘", "∘" +"∁", "∁" +"ℂ", "ℂ" +"≅", "≅" +"⩭", "⩭" +"≡", "≡" +"∯", "∯" +"∮", "∮" +"∮", "∮" +"ℂ", "ℂ" +"𝕔", "𝕔" +"∐", "∐" +"∐", "∐" +"©", "©" +"©", "©" +"℗", "℗" +"∳", "∳" +"↵", "↵" +"⨯", "⨯" +"✗", "✗" +"𝒞", "𝒞" +"𝒸", "𝒸" +"⫏", "⫏" +"⫑", "⫑" +"⫐", "⫐" +"⫒", "⫒" +"⋯", "⋯" +"⤸", "⤸" +"⤵", "⤵" +"⋞", "⋞" +"⋟", "⋟" +"↶", "↶" +"⤽", "⤽" +"⋓", "⋓" +"∪", "∪" +"⩈", "⩈" +"≍", "≍" +"⩆", "⩆" +"⩊", "⩊" +"⊍", "⊍" +"⩅", "⩅" +"∪︀", "∪︀" +"↷", "↷" +"⤼", "⤼" +"⋞", "⋞" +"⋟", "⋟" +"⋎", "⋎" +"⋏", "⋏" +"¤", "¤" +"↶", "↶" +"↷", "↷" +"⋎", "⋎" +"⋏", "⋏" +"∲", "∲" +"∱", "∱" +"⌭", "⌭" +"‡", "‡" +"†", "†" +"ℸ", "ℸ" +"↡", "↡" +"⇓", "⇓" +"↓", "↓" +"‐", "‐" +"⫤", "⫤" +"⊣", "⊣" +"⤏", "⤏" +"˝", "˝" +"Ď", "Ď" +"ď", "ď" +"Д", "Д" +"д", "д" +"ⅅ", "ⅅ" +"ⅆ", "ⅆ" +"‡", "‡" +"⇊", "⇊" +"⤑", "⤑" +"⩷", "⩷" +"°", "°" +"∇", "∇" +"Δ", "Δ" +"δ", "δ" +"⦱", "⦱" +"⥿", "⥿" +"𝔇", "𝔇" +"𝔡", "𝔡" +"⥥", "⥥" +"⇃", "⇃" +"⇂", "⇂" +"´", "´" +"˙", "˙" +"˝", "˝" +"`", "`" +"˜", "˜" +"⋄", "⋄" +"⋄", "⋄" +"⋄", "⋄" +"♦", "♦" +"♦", "♦" +"¨", "¨" +"ⅆ", "ⅆ" +"ϝ", "ϝ" +"⋲", "⋲" +"÷", "÷" +"÷", "÷" +"⋇", "⋇" +"⋇", "⋇" +"Ђ", "Ђ" +"ђ", "ђ" +"⌞", "⌞" +"⌍", "⌍" +"$", "$" +"𝔻", "𝔻" +"𝕕", "𝕕" +"¨", "¨" +"˙", "˙" +"⃜", "◌⃜" +"≐", "≐" +"≑", "≑" +"≐", "≐" +"∸", "∸" +"∔", "∔" +"⊡", "⊡" +"⌆", "⌆" +"∯", "∯" +"¨", "¨" +"⇓", "⇓" +"⇐", "⇐" +"⇔", "⇔" +"⫤", "⫤" +"⟸", "⟸" +"⟺", "⟺" +"⟹", "⟹" +"⇒", "⇒" +"⊨", "⊨" +"⇑", "⇑" +"⇕", "⇕" +"∥", "∥" +"↓", "↓" +"⇓", "⇓" +"↓", "↓" +"⤓", "⤓" +"⇵", "⇵" +"̑", "◌̑" +"⇊", "⇊" +"⇃", "⇃" +"⇂", "⇂" +"⥐", "⥐" +"⥞", "⥞" +"↽", "↽" +"⥖", "⥖" +"⥟", "⥟" +"⇁", "⇁" +"⥗", "⥗" +"⊤", "⊤" +"↧", "↧" +"⤐", "⤐" +"⌟", "⌟" +"⌌", "⌌" +"𝒟", "𝒟" +"𝒹", "𝒹" +"Ѕ", "Ѕ" +"ѕ", "ѕ" +"⧶", "⧶" +"Đ", "Đ" +"đ", "đ" +"⋱", "⋱" +"▿", "▿" +"▾", "▾" +"⇵", "⇵" +"⥯", "⥯" +"⦦", "⦦" +"Џ", "Џ" +"џ", "џ" +"⟿", "⟿" +"É", "É" +"é", "é" +"⩮", "⩮" +"Ě", "Ě" +"ě", "ě" +"≖", "≖" +"Ê", "Ê" +"ê", "ê" +"≕", "≕" +"Э", "Э" +"э", "э" +"⩷", "⩷" +"Ė", "Ė" +"≑", "≑" +"ė", "ė" +"ⅇ", "ⅇ" +"≒", "≒" +"𝔈", "𝔈" +"𝔢", "𝔢" +"⪚", "⪚" +"È", "È" +"è", "è" +"⪖", "⪖" +"⪘", "⪘" +"⪙", "⪙" +"∈", "∈" +"⏧", "⏧" +"ℓ", "ℓ" +"⪕", "⪕" +"⪗", "⪗" +"Ē", "Ē" +"ē", "ē" +"∅", "∅" +"∅", "∅" +"◻", "◻" +"∅", "∅" +"▫", "▫" +" ", " " +" ", " " +" ", " " +"Ŋ", "Ŋ" +"ŋ", "ŋ" +" ", " " +"Ę", "Ę" +"ę", "ę" +"𝔼", "𝔼" +"𝕖", "𝕖" +"⋕", "⋕" +"⧣", "⧣" +"⩱", "⩱" +"ε", "ε" +"Ε", "Ε" +"ε", "ε" +"ϵ", "ϵ" +"≖", "≖" +"≕", "≕" +"≂", "≂" +"⪖", "⪖" +"⪕", "⪕" +"⩵", "⩵" +"=", "=" +"≂", "≂" +"≟", "≟" +"⇌", "⇌" +"≡", "≡" +"⩸", "⩸" +"⧥", "⧥" +"⥱", "⥱" +"≓", "≓" +"ℰ", "ℰ" +"ℯ", "ℯ" +"≐", "≐" +"⩳", "⩳" +"≂", "≂" +"Η", "Η" +"η", "η" +"Ð", "Ð" +"ð", "ð" +"Ë", "Ë" +"ë", "ë" +"€", "€" +"!", "!" +"∃", "∃" +"∃", "∃" +"ℰ", "ℰ" +"ⅇ", "ⅇ" +"ⅇ", "ⅇ" +"≒", "≒" +"Ф", "Ф" +"ф", "ф" +"♀", "♀" +"ffi", "ffi" +"ff", "ff" +"ffl", "ffl" +"𝔉", "𝔉" +"𝔣", "𝔣" +"fi", "fi" +"◼", "◼" +"▪", "▪" +"fj", "fj" +"♭", "♭" +"fl", "fl" +"▱", "▱" +"ƒ", "ƒ" +"𝔽", "𝔽" +"𝕗", "𝕗" +"∀", "∀" +"∀", "∀" +"⋔", "⋔" +"⫙", "⫙" +"ℱ", "ℱ" +"⨍", "⨍" +"½", "½" +"⅓", "⅓" +"¼", "¼" +"⅕", "⅕" +"⅙", "⅙" +"⅛", "⅛" +"⅔", "⅔" +"⅖", "⅖" +"¾", "¾" +"⅗", "⅗" +"⅜", "⅜" +"⅘", "⅘" +"⅚", "⅚" +"⅝", "⅝" +"⅞", "⅞" +"⁄", "⁄" +"⌢", "⌢" +"ℱ", "ℱ" +"𝒻", "𝒻" +"ǵ", "ǵ" +"Γ", "Γ" +"γ", "γ" +"Ϝ", "Ϝ" +"ϝ", "ϝ" +"⪆", "⪆" +"Ğ", "Ğ" +"ğ", "ğ" +"Ģ", "Ģ" +"Ĝ", "Ĝ" +"ĝ", "ĝ" +"Г", "Г" +"г", "г" +"Ġ", "Ġ" +"ġ", "ġ" +"≧", "≧" +"≥", "≥" +"⪌", "⪌" +"⋛", "⋛" +"≥", "≥" +"≧", "≧" +"⩾", "⩾" +"⩾", "⩾" +"⪩", "⪩" +"⪀", "⪀" +"⪂", "⪂" +"⪄", "⪄" +"⋛︀", "⋛︀" +"⪔", "⪔" +"𝔊", "𝔊" +"𝔤", "𝔤" +"⋙", "⋙" +"≫", "≫" +"⋙", "⋙" +"ℷ", "ℷ" +"Ѓ", "Ѓ" +"ѓ", "ѓ" +"≷", "≷" +"⪥", "⪥" +"⪒", "⪒" +"⪤", "⪤" +"⪊", "⪊" +"⪊", "⪊" +"≩", "≩" +"⪈", "⪈" +"⪈", "⪈" +"≩", "≩" +"⋧", "⋧" +"𝔾", "𝔾" +"𝕘", "𝕘" +"`", "`" +"≥", "≥" +"⋛", "⋛" +"≧", "≧" +"⪢", "⪢" +"≷", "≷" +"⩾", "⩾" +"≳", "≳" +"𝒢", "𝒢" +"ℊ", "ℊ" +"≳", "≳" +"⪎", "⪎" +"⪐", "⪐" +">", ">" +"≫", "≫" +">", ">" +"⪧", "⪧" +"⩺", "⩺" +"⋗", "⋗" +"⦕", "⦕" +"⩼", "⩼" +"⪆", "⪆" +"⥸", "⥸" +"⋗", "⋗" +"⋛", "⋛" +"⪌", "⪌" +"≷", "≷" +"≳", "≳" +"≩︀", "≩︀" +"≩︀", "≩︀" +"ˇ", "ˇ" +" ", " " +"½", "½" +"ℋ", "ℋ" +"Ъ", "Ъ" +"ъ", "ъ" +"⇔", "⇔" +"↔", "↔" +"⥈", "⥈" +"↭", "↭" +"^", "^" +"ℏ", "ℏ" +"Ĥ", "Ĥ" +"ĥ", "ĥ" +"♥", "♥" +"♥", "♥" +"…", "…" +"⊹", "⊹" +"ℌ", "ℌ" +"𝔥", "𝔥" +"ℋ", "ℋ" +"⤥", "⤥" +"⤦", "⤦" +"⇿", "⇿" +"∻", "∻" +"↩", "↩" +"↪", "↪" +"ℍ", "ℍ" +"𝕙", "𝕙" +"―", "―" +"─", "─" +"ℋ", "ℋ" +"𝒽", "𝒽" +"ℏ", "ℏ" +"Ħ", "Ħ" +"ħ", "ħ" +"≎", "≎" +"≏", "≏" +"⁃", "⁃" +"‐", "‐" +"Í", "Í" +"í", "í" +"⁣", "⁣" +"Î", "Î" +"î", "î" +"И", "И" +"и", "и" +"İ", "İ" +"Е", "Е" +"е", "е" +"¡", "¡" +"⇔", "⇔" +"ℑ", "ℑ" +"𝔦", "𝔦" +"Ì", "Ì" +"ì", "ì" +"ⅈ", "ⅈ" +"⨌", "⨌" +"∭", "∭" +"⧜", "⧜" +"℩", "℩" +"IJ", "IJ" +"ij", "ij" +"ℑ", "ℑ" +"Ī", "Ī" +"ī", "ī" +"ℑ", "ℑ" +"ⅈ", "ⅈ" +"ℐ", "ℐ" +"ℑ", "ℑ" +"ı", "ı" +"⊷", "⊷" +"Ƶ", "Ƶ" +"⇒", "⇒" +"∈", "∈" +"℅", "℅" +"∞", "∞" +"⧝", "⧝" +"ı", "ı" +"∬", "∬" +"∫", "∫" +"⊺", "⊺" +"ℤ", "ℤ" +"∫", "∫" +"⊺", "⊺" +"⋂", "⋂" +"⨗", "⨗" +"⨼", "⨼" +"⁣", "⁣" +"⁢", "⁢" +"Ё", "Ё" +"ё", "ё" +"Į", "Į" +"į", "į" +"𝕀", "𝕀" +"𝕚", "𝕚" +"Ι", "Ι" +"ι", "ι" +"⨼", "⨼" +"¿", "¿" +"ℐ", "ℐ" +"𝒾", "𝒾" +"∈", "∈" +"⋵", "⋵" +"⋹", "⋹" +"⋴", "⋴" +"⋳", "⋳" +"∈", "∈" +"⁢", "⁢" +"Ĩ", "Ĩ" +"ĩ", "ĩ" +"І", "І" +"і", "і" +"Ï", "Ï" +"ï", "ï" +"Ĵ", "Ĵ" +"ĵ", "ĵ" +"Й", "Й" +"й", "й" +"𝔍", "𝔍" +"𝔧", "𝔧" +"ȷ", "ȷ" +"𝕁", "𝕁" +"𝕛", "𝕛" +"𝒥", "𝒥" +"𝒿", "𝒿" +"Ј", "Ј" +"ј", "ј" +"Є", "Є" +"є", "є" +"Κ", "Κ" +"κ", "κ" +"ϰ", "ϰ" +"Ķ", "Ķ" +"ķ", "ķ" +"К", "К" +"к", "к" +"𝔎", "𝔎" +"𝔨", "𝔨" +"ĸ", "ĸ" +"Х", "Х" +"х", "х" +"Ќ", "Ќ" +"ќ", "ќ" +"𝕂", "𝕂" +"𝕜", "𝕜" +"𝒦", "𝒦" +"𝓀", "𝓀" +"⇚", "⇚" +"Ĺ", "Ĺ" +"ĺ", "ĺ" +"⦴", "⦴" +"ℒ", "ℒ" +"Λ", "Λ" +"λ", "λ" +"⟪", "⟪" +"⟨", "⟨" +"⦑", "⦑" +"⟨", "⟨" +"⪅", "⪅" +"ℒ", "ℒ" +"«", "«" +"↞", "↞" +"⇐", "⇐" +"←", "←" +"⇤", "⇤" +"⤟", "⤟" +"⤝", "⤝" +"↩", "↩" +"↫", "↫" +"⤹", "⤹" +"⥳", "⥳" +"↢", "↢" +"⪫", "⪫" +"⤛", "⤛" +"⤙", "⤙" +"⪭", "⪭" +"⪭︀", "⪭︀" +"⤎", "⤎" +"⤌", "⤌" +"❲", "❲" +"{", "{" +"[", "[" +"⦋", "⦋" +"⦏", "⦏" +"⦍", "⦍" +"Ľ", "Ľ" +"ľ", "ľ" +"Ļ", "Ļ" +"ļ", "ļ" +"⌈", "⌈" +"{", "{" +"Л", "Л" +"л", "л" +"⤶", "⤶" +"“", "“" +"„", "„" +"⥧", "⥧" +"⥋", "⥋" +"↲", "↲" +"≦", "≦" +"≤", "≤" +"⟨", "⟨" +"←", "←" +"⇐", "⇐" +"←", "←" +"⇤", "⇤" +"⇆", "⇆" +"↢", "↢" +"⌈", "⌈" +"⟦", "⟦" +"⥡", "⥡" +"⇃", "⇃" +"⥙", "⥙" +"⌊", "⌊" +"↽", "↽" +"↼", "↼" +"⇇", "⇇" +"↔", "↔" +"⇔", "⇔" +"↔", "↔" +"⇆", "⇆" +"⇋", "⇋" +"↭", "↭" +"⥎", "⥎" +"⊣", "⊣" +"↤", "↤" +"⥚", "⥚" +"⋋", "⋋" +"⊲", "⊲" +"⧏", "⧏" +"⊴", "⊴" +"⥑", "⥑" +"⥠", "⥠" +"↿", "↿" +"⥘", "⥘" +"↼", "↼" +"⥒", "⥒" +"⪋", "⪋" +"⋚", "⋚" +"≤", "≤" +"≦", "≦" +"⩽", "⩽" +"⩽", "⩽" +"⪨", "⪨" +"⩿", "⩿" +"⪁", "⪁" +"⪃", "⪃" +"⋚︀", "⋚︀" +"⪓", "⪓" +"⪅", "⪅" +"⋖", "⋖" +"⋚", "⋚" +"⪋", "⪋" +"⋚", "⋚" +"≦", "≦" +"≶", "≶" +"≶", "≶" +"⪡", "⪡" +"≲", "≲" +"⩽", "⩽" +"≲", "≲" +"⥼", "⥼" +"⌊", "⌊" +"𝔏", "𝔏" +"𝔩", "𝔩" +"≶", "≶" +"⪑", "⪑" +"⥢", "⥢" +"↽", "↽" +"↼", "↼" +"⥪", "⥪" +"▄", "▄" +"Љ", "Љ" +"љ", "љ" +"⋘", "⋘" +"≪", "≪" +"⇇", "⇇" +"⌞", "⌞" +"⇚", "⇚" +"⥫", "⥫" +"◺", "◺" +"Ŀ", "Ŀ" +"ŀ", "ŀ" +"⎰", "⎰" +"⎰", "⎰" +"⪉", "⪉" +"⪉", "⪉" +"≨", "≨" +"⪇", "⪇" +"⪇", "⪇" +"≨", "≨" +"⋦", "⋦" +"⟬", "⟬" +"⇽", "⇽" +"⟦", "⟦" +"⟵", "⟵" +"⟸", "⟸" +"⟵", "⟵" +"⟷", "⟷" +"⟺", "⟺" +"⟷", "⟷" +"⟼", "⟼" +"⟶", "⟶" +"⟹", "⟹" +"⟶", "⟶" +"↫", "↫" +"↬", "↬" +"⦅", "⦅" +"𝕃", "𝕃" +"𝕝", "𝕝" +"⨭", "⨭" +"⨴", "⨴" +"∗", "∗" +"_", "_" +"↙", "↙" +"↘", "↘" +"◊", "◊" +"◊", "◊" +"⧫", "⧫" +"(", "(" +"⦓", "⦓" +"⇆", "⇆" +"⌟", "⌟" +"⇋", "⇋" +"⥭", "⥭" +"‎", "‎" +"⊿", "⊿" +"‹", "‹" +"ℒ", "ℒ" +"𝓁", "𝓁" +"↰", "↰" +"↰", "↰" +"≲", "≲" +"⪍", "⪍" +"⪏", "⪏" +"[", "[" +"‘", "‘" +"‚", "‚" +"Ł", "Ł" +"ł", "ł" +"<", "<" +"≪", "≪" +"<", "<" +"⪦", "⪦" +"⩹", "⩹" +"⋖", "⋖" +"⋋", "⋋" +"⋉", "⋉" +"⥶", "⥶" +"⩻", "⩻" +"◃", "◃" +"⊴", "⊴" +"◂", "◂" +"⦖", "⦖" +"⥊", "⥊" +"⥦", "⥦" +"≨︀", "≨︀" +"≨︀", "≨︀" +"¯", "¯" +"♂", "♂" +"✠", "✠" +"✠", "✠" +"⤅", "⤅" +"↦", "↦" +"↦", "↦" +"↧", "↧" +"↤", "↤" +"↥", "↥" +"▮", "▮" +"⨩", "⨩" +"М", "М" +"м", "м" +"—", "—" +"∺", "∺" +"∡", "∡" +" ", " " +"ℳ", "ℳ" +"𝔐", "𝔐" +"𝔪", "𝔪" +"℧", "℧" +"µ", "µ" +"∣", "∣" +"*", "*" +"⫰", "⫰" +"·", "·" +"−", "−" +"⊟", "⊟" +"∸", "∸" +"⨪", "⨪" +"∓", "∓" +"⫛", "⫛" +"…", "…" +"∓", "∓" +"⊧", "⊧" +"𝕄", "𝕄" +"𝕞", "𝕞" +"∓", "∓" +"ℳ", "ℳ" +"𝓂", "𝓂" +"∾", "∾" +"Μ", "Μ" +"μ", "μ" +"⊸", "⊸" +"⊸", "⊸" +"∇", "∇" +"Ń", "Ń" +"ń", "ń" +"∠⃒", "∠⃒" +"≉", "≉" +"⩰̸", "⩰̸" +"≋̸", "≋̸" +"ʼn", "ʼn" +"≉", "≉" +"♮", "♮" +"♮", "♮" +"ℕ", "ℕ" +" ", " " +"≎̸", "≎̸" +"≏̸", "≏̸" +"⩃", "⩃" +"Ň", "Ň" +"ň", "ň" +"Ņ", "Ņ" +"ņ", "ņ" +"≇", "≇" +"⩭̸", "⩭̸" +"⩂", "⩂" +"Н", "Н" +"н", "н" +"–", "–" +"≠", "≠" +"⤤", "⤤" +"⇗", "⇗" +"↗", "↗" +"↗", "↗" +"≐̸", "≐̸" +"​", "​" +"​", "​" +"​", "​" +"​", "​" +"≢", "≢" +"⤨", "⤨" +"≂̸", "≂̸" +"≫", "≫" +"≪", "≪" +" ", "␊" +"∄", "∄" +"∄", "∄" +"𝔑", "𝔑" +"𝔫", "𝔫" +"≧̸", "≧̸" +"≱", "≱" +"≱", "≱" +"≧̸", "≧̸" +"⩾̸", "⩾̸" +"⩾̸", "⩾̸" +"⋙̸", "⋙̸" +"≵", "≵" +"≫⃒", "≫⃒" +"≯", "≯" +"≯", "≯" +"≫̸", "≫̸" +"⇎", "⇎" +"↮", "↮" +"⫲", "⫲" +"∋", "∋" +"⋼", "⋼" +"⋺", "⋺" +"∋", "∋" +"Њ", "Њ" +"њ", "њ" +"⇍", "⇍" +"↚", "↚" +"‥", "‥" +"≦̸", "≦̸" +"≰", "≰" +"⇍", "⇍" +"↚", "↚" +"⇎", "⇎" +"↮", "↮" +"≰", "≰" +"≦̸", "≦̸" +"⩽̸", "⩽̸" +"⩽̸", "⩽̸" +"≮", "≮" +"⋘̸", "⋘̸" +"≴", "≴" +"≪⃒", "≪⃒" +"≮", "≮" +"⋪", "⋪" +"⋬", "⋬" +"≪̸", "≪̸" +"∤", "∤" +"⁠", "⁠" +" ", " " +"ℕ", "ℕ" +"𝕟", "𝕟" +"⫬", "⫬" +"¬", "¬" +"≢", "≢" +"≭", "≭" +"∦", "∦" +"∉", "∉" +"≠", "≠" +"≂̸", "≂̸" +"∄", "∄" +"≯", "≯" +"≱", "≱" +"≧̸", "≧̸" +"≫̸", "≫̸" +"≹", "≹" +"⩾̸", "⩾̸" +"≵", "≵" +"≎̸", "≎̸" +"≏̸", "≏̸" +"∉", "∉" +"⋵̸", "⋵̸" +"⋹̸", "⋹̸" +"∉", "∉" +"⋷", "⋷" +"⋶", "⋶" +"⋪", "⋪" +"⧏̸", "⧏̸" +"⋬", "⋬" +"≮", "≮" +"≰", "≰" +"≸", "≸" +"≪̸", "≪̸" +"⩽̸", "⩽̸" +"≴", "≴" +"⪢̸", "⪢̸" +"⪡̸", "⪡̸" +"∌", "∌" +"∌", "∌" +"⋾", "⋾" +"⋽", "⋽" +"⊀", "⊀" +"⪯̸", "⪯̸" +"⋠", "⋠" +"∌", "∌" +"⋫", "⋫" +"⧐̸", "⧐̸" +"⋭", "⋭" +"⊏̸", "⊏̸" +"⋢", "⋢" +"⊐̸", "⊐̸" +"⋣", "⋣" +"⊂⃒", "⊂⃒" +"⊈", "⊈" +"⊁", "⊁" +"⪰̸", "⪰̸" +"⋡", "⋡" +"≿̸", "≿̸" +"⊃⃒", "⊃⃒" +"⊉", "⊉" +"≁", "≁" +"≄", "≄" +"≇", "≇" +"≉", "≉" +"∤", "∤" +"∦", "∦" +"∦", "∦" +"⫽⃥", "⫽⃥" +"∂̸", "∂̸" +"⨔", "⨔" +"⊀", "⊀" +"⋠", "⋠" +"⪯̸", "⪯̸" +"⊀", "⊀" +"⪯̸", "⪯̸" +"⇏", "⇏" +"↛", "↛" +"⤳̸", "⤳̸" +"↝̸", "↝̸" +"⇏", "⇏" +"↛", "↛" +"⋫", "⋫" +"⋭", "⋭" +"⊁", "⊁" +"⋡", "⋡" +"⪰̸", "⪰̸" +"𝒩", "𝒩" +"𝓃", "𝓃" +"∤", "∤" +"∦", "∦" +"≁", "≁" +"≄", "≄" +"≄", "≄" +"∤", "∤" +"∦", "∦" +"⋢", "⋢" +"⋣", "⋣" +"⊄", "⊄" +"⫅̸", "⫅̸" +"⊈", "⊈" +"⊂⃒", "⊂⃒" +"⊈", "⊈" +"⫅̸", "⫅̸" +"⊁", "⊁" +"⪰̸", "⪰̸" +"⊅", "⊅" +"⫆̸", "⫆̸" +"⊉", "⊉" +"⊃⃒", "⊃⃒" +"⊉", "⊉" +"⫆̸", "⫆̸" +"≹", "≹" +"Ñ", "Ñ" +"ñ", "ñ" +"≸", "≸" +"⋪", "⋪" +"⋬", "⋬" +"⋫", "⋫" +"⋭", "⋭" +"Ν", "Ν" +"ν", "ν" +"#", "#" +"№", "№" +" ", " " +"≍⃒", "≍⃒" +"⊯", "⊯" +"⊮", "⊮" +"⊭", "⊭" +"⊬", "⊬" +"≥⃒", "≥⃒" +">⃒", ">⃒" +"⤄", "⤄" +"⧞", "⧞" +"⤂", "⤂" +"≤⃒", "≤⃒" +"<⃒", "<⃒" +"⊴⃒", "⊴⃒" +"⤃", "⤃" +"⊵⃒", "⊵⃒" +"∼⃒", "∼⃒" +"⤣", "⤣" +"⇖", "⇖" +"↖", "↖" +"↖", "↖" +"⤧", "⤧" +"Ó", "Ó" +"ó", "ó" +"⊛", "⊛" +"⊚", "⊚" +"Ô", "Ô" +"ô", "ô" +"О", "О" +"о", "о" +"⊝", "⊝" +"Ő", "Ő" +"ő", "ő" +"⨸", "⨸" +"⊙", "⊙" +"⦼", "⦼" +"Œ", "Œ" +"œ", "œ" +"⦿", "⦿" +"𝔒", "𝔒" +"𝔬", "𝔬" +"˛", "˛" +"Ò", "Ò" +"ò", "ò" +"⧁", "⧁" +"⦵", "⦵" +"Ω", "Ω" +"∮", "∮" +"↺", "↺" +"⦾", "⦾" +"⦻", "⦻" +"‾", "‾" +"⧀", "⧀" +"Ō", "Ō" +"ō", "ō" +"Ω", "Ω" +"ω", "ω" +"Ο", "Ο" +"ο", "ο" +"⦶", "⦶" +"⊖", "⊖" +"𝕆", "𝕆" +"𝕠", "𝕠" +"⦷", "⦷" +"“", "“" +"‘", "‘" +"⦹", "⦹" +"⊕", "⊕" +"⩔", "⩔" +"∨", "∨" +"↻", "↻" +"⩝", "⩝" +"ℴ", "ℴ" +"ℴ", "ℴ" +"ª", "ª" +"º", "º" +"⊶", "⊶" +"⩖", "⩖" +"⩗", "⩗" +"⩛", "⩛" +"Ⓢ", "Ⓢ" +"𝒪", "𝒪" +"ℴ", "ℴ" +"Ø", "Ø" +"ø", "ø" +"⊘", "⊘" +"Õ", "Õ" +"õ", "õ" +"⨷", "⨷" +"⊗", "⊗" +"⨶", "⨶" +"Ö", "Ö" +"ö", "ö" +"⌽", "⌽" +"‾", "‾" +"⏞", "⏞" +"⎴", "⎴" +"⏜", "⏜" +"∥", "∥" +"¶", "¶" +"∥", "∥" +"⫳", "⫳" +"⫽", "⫽" +"∂", "∂" +"∂", "∂" +"П", "П" +"п", "п" +"%", "%" +".", "." +"‰", "‰" +"⊥", "⊥" +"‱", "‱" +"𝔓", "𝔓" +"𝔭", "𝔭" +"Φ", "Φ" +"φ", "φ" +"ϕ", "ϕ" +"ℳ", "ℳ" +"☎", "☎" +"Π", "Π" +"π", "π" +"⋔", "⋔" +"ϖ", "ϖ" +"ℏ", "ℏ" +"ℎ", "ℎ" +"ℏ", "ℏ" +"+", "+" +"⨣", "⨣" +"⊞", "⊞" +"⨢", "⨢" +"∔", "∔" +"⨥", "⨥" +"⩲", "⩲" +"±", "±" +"±", "±" +"⨦", "⨦" +"⨧", "⨧" +"±", "±" +"ℌ", "ℌ" +"⨕", "⨕" +"ℙ", "ℙ" +"𝕡", "𝕡" +"£", "£" +"⪻", "⪻" +"≺", "≺" +"⪷", "⪷" +"≼", "≼" +"⪳", "⪳" +"⪯", "⪯" +"≺", "≺" +"⪷", "⪷" +"≼", "≼" +"≺", "≺" +"⪯", "⪯" +"≼", "≼" +"≾", "≾" +"⪯", "⪯" +"⪹", "⪹" +"⪵", "⪵" +"⋨", "⋨" +"≾", "≾" +"″", "″" +"′", "′" +"ℙ", "ℙ" +"⪹", "⪹" +"⪵", "⪵" +"⋨", "⋨" +"∏", "∏" +"∏", "∏" +"⌮", "⌮" +"⌒", "⌒" +"⌓", "⌓" +"∝", "∝" +"∷", "∷" +"∝", "∝" +"∝", "∝" +"≾", "≾" +"⊰", "⊰" +"𝒫", "𝒫" +"𝓅", "𝓅" +"Ψ", "Ψ" +"ψ", "ψ" +" ", " " +"𝔔", "𝔔" +"𝔮", "𝔮" +"⨌", "⨌" +"ℚ", "ℚ" +"𝕢", "𝕢" +"⁗", "⁗" +"𝒬", "𝒬" +"𝓆", "𝓆" +"ℍ", "ℍ" +"⨖", "⨖" +"?", "?" +"≟", "≟" +""", "\"" +""", "\"" +"⇛", "⇛" +"∽̱", "∽̱" +"Ŕ", "Ŕ" +"ŕ", "ŕ" +"√", "√" +"⦳", "⦳" +"⟫", "⟫" +"⟩", "⟩" +"⦒", "⦒" +"⦥", "⦥" +"⟩", "⟩" +"»", "»" +"↠", "↠" +"⇒", "⇒" +"→", "→" +"⥵", "⥵" +"⇥", "⇥" +"⤠", "⤠" +"⤳", "⤳" +"⤞", "⤞" +"↪", "↪" +"↬", "↬" +"⥅", "⥅" +"⥴", "⥴" +"⤖", "⤖" +"↣", "↣" +"↝", "↝" +"⤜", "⤜" +"⤚", "⤚" +"∶", "∶" +"ℚ", "ℚ" +"⤐", "⤐" +"⤏", "⤏" +"⤍", "⤍" +"❳", "❳" +"}", "}" +"]", "]" +"⦌", "⦌" +"⦎", "⦎" +"⦐", "⦐" +"Ř", "Ř" +"ř", "ř" +"Ŗ", "Ŗ" +"ŗ", "ŗ" +"⌉", "⌉" +"}", "}" +"Р", "Р" +"р", "р" +"⤷", "⤷" +"⥩", "⥩" +"”", "”" +"”", "”" +"↳", "↳" +"ℜ", "ℜ" +"ℜ", "ℜ" +"ℛ", "ℛ" +"ℜ", "ℜ" +"ℝ", "ℝ" +"▭", "▭" +"®", "®" +"®", "®" +"∋", "∋" +"⇋", "⇋" +"⥯", "⥯" +"⥽", "⥽" +"⌋", "⌋" +"ℜ", "ℜ" +"𝔯", "𝔯" +"⥤", "⥤" +"⇁", "⇁" +"⇀", "⇀" +"⥬", "⥬" +"Ρ", "Ρ" +"ρ", "ρ" +"ϱ", "ϱ" +"⟩", "⟩" +"→", "→" +"⇒", "⇒" +"→", "→" +"⇥", "⇥" +"⇄", "⇄" +"↣", "↣" +"⌉", "⌉" +"⟧", "⟧" +"⥝", "⥝" +"⇂", "⇂" +"⥕", "⥕" +"⌋", "⌋" +"⇁", "⇁" +"⇀", "⇀" +"⇄", "⇄" +"⇌", "⇌" +"⇉", "⇉" +"↝", "↝" +"⊢", "⊢" +"↦", "↦" +"⥛", "⥛" +"⋌", "⋌" +"⊳", "⊳" +"⧐", "⧐" +"⊵", "⊵" +"⥏", "⥏" +"⥜", "⥜" +"↾", "↾" +"⥔", "⥔" +"⇀", "⇀" +"⥓", "⥓" +"˚", "˚" +"≓", "≓" +"⇄", "⇄" +"⇌", "⇌" +"‏", "‏" +"⎱", "⎱" +"⎱", "⎱" +"⫮", "⫮" +"⟭", "⟭" +"⇾", "⇾" +"⟧", "⟧" +"⦆", "⦆" +"ℝ", "ℝ" +"𝕣", "𝕣" +"⨮", "⨮" +"⨵", "⨵" +"⥰", "⥰" +")", ")" +"⦔", "⦔" +"⨒", "⨒" +"⇉", "⇉" +"⇛", "⇛" +"›", "›" +"ℛ", "ℛ" +"𝓇", "𝓇" +"↱", "↱" +"↱", "↱" +"]", "]" +"’", "’" +"’", "’" +"⋌", "⋌" +"⋊", "⋊" +"▹", "▹" +"⊵", "⊵" +"▸", "▸" +"⧎", "⧎" +"⧴", "⧴" +"⥨", "⥨" +"℞", "℞" +"Ś", "Ś" +"ś", "ś" +"‚", "‚" +"⪼", "⪼" +"≻", "≻" +"⪸", "⪸" +"Š", "Š" +"š", "š" +"≽", "≽" +"⪴", "⪴" +"⪰", "⪰" +"Ş", "Ş" +"ş", "ş" +"Ŝ", "Ŝ" +"ŝ", "ŝ" +"⪺", "⪺" +"⪶", "⪶" +"⋩", "⋩" +"⨓", "⨓" +"≿", "≿" +"С", "С" +"с", "с" +"⋅", "⋅" +"⊡", "⊡" +"⩦", "⩦" +"⤥", "⤥" +"⇘", "⇘" +"↘", "↘" +"↘", "↘" +"§", "§" +";", ";" +"⤩", "⤩" +"∖", "∖" +"∖", "∖" +"✶", "✶" +"𝔖", "𝔖" +"𝔰", "𝔰" +"⌢", "⌢" +"♯", "♯" +"Щ", "Щ" +"щ", "щ" +"Ш", "Ш" +"ш", "ш" +"↓", "↓" +"←", "←" +"∣", "∣" +"∥", "∥" +"→", "→" +"↑", "↑" +"­", " " +"Σ", "Σ" +"σ", "σ" +"ς", "ς" +"ς", "ς" +"∼", "∼" +"⩪", "⩪" +"≃", "≃" +"≃", "≃" +"⪞", "⪞" +"⪠", "⪠" +"⪝", "⪝" +"⪟", "⪟" +"≆", "≆" +"⨤", "⨤" +"⥲", "⥲" +"←", "←" +"∘", "∘" +"∖", "∖" +"⨳", "⨳" +"⧤", "⧤" +"∣", "∣" +"⌣", "⌣" +"⪪", "⪪" +"⪬", "⪬" +"⪬︀", "⪬︀" +"Ь", "Ь" +"ь", "ь" +"/", "/" +"⧄", "⧄" +"⌿", "⌿" +"𝕊", "𝕊" +"𝕤", "𝕤" +"♠", "♠" +"♠", "♠" +"∥", "∥" +"⊓", "⊓" +"⊓︀", "⊓︀" +"⊔", "⊔" +"⊔︀", "⊔︀" +"√", "√" +"⊏", "⊏" +"⊑", "⊑" +"⊏", "⊏" +"⊑", "⊑" +"⊐", "⊐" +"⊒", "⊒" +"⊐", "⊐" +"⊒", "⊒" +"□", "□" +"□", "□" +"□", "□" +"⊓", "⊓" +"⊏", "⊏" +"⊑", "⊑" +"⊐", "⊐" +"⊒", "⊒" +"⊔", "⊔" +"▪", "▪" +"▪", "▪" +"→", "→" +"𝒮", "𝒮" +"𝓈", "𝓈" +"∖", "∖" +"⌣", "⌣" +"⋆", "⋆" +"⋆", "⋆" +"☆", "☆" +"★", "★" +"ϵ", "ϵ" +"ϕ", "ϕ" +"¯", "¯" +"⋐", "⋐" +"⊂", "⊂" +"⪽", "⪽" +"⫅", "⫅" +"⊆", "⊆" +"⫃", "⫃" +"⫁", "⫁" +"⫋", "⫋" +"⊊", "⊊" +"⪿", "⪿" +"⥹", "⥹" +"⋐", "⋐" +"⊂", "⊂" +"⊆", "⊆" +"⫅", "⫅" +"⊆", "⊆" +"⊊", "⊊" +"⫋", "⫋" +"⫇", "⫇" +"⫕", "⫕" +"⫓", "⫓" +"≻", "≻" +"⪸", "⪸" +"≽", "≽" +"≻", "≻" +"⪰", "⪰" +"≽", "≽" +"≿", "≿" +"⪰", "⪰" +"⪺", "⪺" +"⪶", "⪶" +"⋩", "⋩" +"≿", "≿" +"∋", "∋" +"∑", "∑" +"∑", "∑" +"♪", "♪" +"⋑", "⋑" +"⊃", "⊃" +"¹", "¹" +"²", "²" +"³", "³" +"⪾", "⪾" +"⫘", "⫘" +"⫆", "⫆" +"⊇", "⊇" +"⫄", "⫄" +"⊃", "⊃" +"⊇", "⊇" +"⟉", "⟉" +"⫗", "⫗" +"⥻", "⥻" +"⫂", "⫂" +"⫌", "⫌" +"⊋", "⊋" +"⫀", "⫀" +"⋑", "⋑" +"⊃", "⊃" +"⊇", "⊇" +"⫆", "⫆" +"⊋", "⊋" +"⫌", "⫌" +"⫈", "⫈" +"⫔", "⫔" +"⫖", "⫖" +"⤦", "⤦" +"⇙", "⇙" +"↙", "↙" +"↙", "↙" +"⤪", "⤪" +"ß", "ß" +" ", "␉" +"⌖", "⌖" +"Τ", "Τ" +"τ", "τ" +"⎴", "⎴" +"Ť", "Ť" +"ť", "ť" +"Ţ", "Ţ" +"ţ", "ţ" +"Т", "Т" +"т", "т" +"⃛", "◌⃛" +"⌕", "⌕" +"𝔗", "𝔗" +"𝔱", "𝔱" +"∴", "∴" +"∴", "∴" +"∴", "∴" +"Θ", "Θ" +"θ", "θ" +"ϑ", "ϑ" +"ϑ", "ϑ" +"≈", "≈" +"∼", "∼" +"  ", "  " +" ", " " +" ", " " +"≈", "≈" +"∼", "∼" +"Þ", "Þ" +"þ", "þ" +"∼", "∼" +"˜", "˜" +"≃", "≃" +"≅", "≅" +"≈", "≈" +"×", "×" +"⊠", "⊠" +"⨱", "⨱" +"⨰", "⨰" +"∭", "∭" +"⤨", "⤨" +"⊤", "⊤" +"⌶", "⌶" +"⫱", "⫱" +"𝕋", "𝕋" +"𝕥", "𝕥" +"⫚", "⫚" +"⤩", "⤩" +"‴", "‴" +"™", "™" +"™", "™" +"▵", "▵" +"▿", "▿" +"◃", "◃" +"⊴", "⊴" +"≜", "≜" +"▹", "▹" +"⊵", "⊵" +"◬", "◬" +"≜", "≜" +"⨺", "⨺" +"⃛", "◌⃛" +"⨹", "⨹" +"⧍", "⧍" +"⨻", "⨻" +"⏢", "⏢" +"𝒯", "𝒯" +"𝓉", "𝓉" +"Ц", "Ц" +"ц", "ц" +"Ћ", "Ћ" +"ћ", "ћ" +"Ŧ", "Ŧ" +"ŧ", "ŧ" +"≬", "≬" +"↞", "↞" +"↠", "↠" +"Ú", "Ú" +"ú", "ú" +"↟", "↟" +"⇑", "⇑" +"↑", "↑" +"⥉", "⥉" +"Ў", "Ў" +"ў", "ў" +"Ŭ", "Ŭ" +"ŭ", "ŭ" +"Û", "Û" +"û", "û" +"У", "У" +"у", "у" +"⇅", "⇅" +"Ű", "Ű" +"ű", "ű" +"⥮", "⥮" +"⥾", "⥾" +"𝔘", "𝔘" +"𝔲", "𝔲" +"Ù", "Ù" +"ù", "ù" +"⥣", "⥣" +"↿", "↿" +"↾", "↾" +"▀", "▀" +"⌜", "⌜" +"⌜", "⌜" +"⌏", "⌏" +"◸", "◸" +"Ū", "Ū" +"ū", "ū" +"¨", "¨" +"_", "_" +"⏟", "⏟" +"⎵", "⎵" +"⏝", "⏝" +"⋃", "⋃" +"⊎", "⊎" +"Ų", "Ų" +"ų", "ų" +"𝕌", "𝕌" +"𝕦", "𝕦" +"↑", "↑" +"⇑", "⇑" +"↑", "↑" +"⤒", "⤒" +"⇅", "⇅" +"↕", "↕" +"⇕", "⇕" +"↕", "↕" +"⥮", "⥮" +"↿", "↿" +"↾", "↾" +"⊎", "⊎" +"↖", "↖" +"↗", "↗" +"ϒ", "ϒ" +"υ", "υ" +"ϒ", "ϒ" +"Υ", "Υ" +"υ", "υ" +"⊥", "⊥" +"↥", "↥" +"⇈", "⇈" +"⌝", "⌝" +"⌝", "⌝" +"⌎", "⌎" +"Ů", "Ů" +"ů", "ů" +"◹", "◹" +"𝒰", "𝒰" +"𝓊", "𝓊" +"⋰", "⋰" +"Ũ", "Ũ" +"ũ", "ũ" +"▵", "▵" +"▴", "▴" +"⇈", "⇈" +"Ü", "Ü" +"ü", "ü" +"⦧", "⦧" +"⦜", "⦜" +"ϵ", "ϵ" +"ϰ", "ϰ" +"∅", "∅" +"ϕ", "ϕ" +"ϖ", "ϖ" +"∝", "∝" +"⇕", "⇕" +"↕", "↕" +"ϱ", "ϱ" +"ς", "ς" +"⊊︀", "⊊︀" +"⫋︀", "⫋︀" +"⊋︀", "⊋︀" +"⫌︀", "⫌︀" +"ϑ", "ϑ" +"⊲", "⊲" +"⊳", "⊳" +"⫫", "⫫" +"⫨", "⫨" +"⫩", "⫩" +"В", "В" +"в", "в" +"⊫", "⊫" +"⊩", "⊩" +"⊨", "⊨" +"⊢", "⊢" +"⫦", "⫦" +"⋁", "⋁" +"∨", "∨" +"⊻", "⊻" +"≚", "≚" +"⋮", "⋮" +"‖", "‖" +"|", "|" +"‖", "‖" +"|", "|" +"∣", "∣" +"|", "|" +"❘", "❘" +"≀", "≀" +" ", " " +"𝔙", "𝔙" +"𝔳", "𝔳" +"⊲", "⊲" +"⊂⃒", "⊂⃒" +"⊃⃒", "⊃⃒" +"𝕍", "𝕍" +"𝕧", "𝕧" +"∝", "∝" +"⊳", "⊳" +"𝒱", "𝒱" +"𝓋", "𝓋" +"⫋︀", "⫋︀" +"⊊︀", "⊊︀" +"⫌︀", "⫌︀" +"⊋︀", "⊋︀" +"⊪", "⊪" +"⦚", "⦚" +"Ŵ", "Ŵ" +"ŵ", "ŵ" +"⩟", "⩟" +"⋀", "⋀" +"∧", "∧" +"≙", "≙" +"℘", "℘" +"𝔚", "𝔚" +"𝔴", "𝔴" +"𝕎", "𝕎" +"𝕨", "𝕨" +"℘", "℘" +"≀", "≀" +"≀", "≀" +"𝒲", "𝒲" +"𝓌", "𝓌" +"⋂", "⋂" +"◯", "◯" +"⋃", "⋃" +"▽", "▽" +"𝔛", "𝔛" +"𝔵", "𝔵" +"⟺", "⟺" +"⟷", "⟷" +"Ξ", "Ξ" +"ξ", "ξ" +"⟸", "⟸" +"⟵", "⟵" +"⟼", "⟼" +"⋻", "⋻" +"⨀", "⨀" +"𝕏", "𝕏" +"𝕩", "𝕩" +"⨁", "⨁" +"⨂", "⨂" +"⟹", "⟹" +"⟶", "⟶" +"𝒳", "𝒳" +"𝓍", "𝓍" +"⨆", "⨆" +"⨄", "⨄" +"△", "△" +"⋁", "⋁" +"⋀", "⋀" +"Ý", "Ý" +"ý", "ý" +"Я", "Я" +"я", "я" +"Ŷ", "Ŷ" +"ŷ", "ŷ" +"Ы", "Ы" +"ы", "ы" +"¥", "¥" +"𝔜", "𝔜" +"𝔶", "𝔶" +"Ї", "Ї" +"ї", "ї" +"𝕐", "𝕐" +"𝕪", "𝕪" +"𝒴", "𝒴" +"𝓎", "𝓎" +"Ю", "Ю" +"ю", "ю" +"Ÿ", "Ÿ" +"ÿ", "ÿ" +"Ź", "Ź" +"ź", "ź" +"Ž", "Ž" +"ž", "ž" +"З", "З" +"з", "з" +"Ż", "Ż" +"ż", "ż" +"ℨ", "ℨ" +"​", "​" +"Ζ", "Ζ" +"ζ", "ζ" +"ℨ", "ℨ" +"𝔷", "𝔷" +"Ж", "Ж" +"ж", "ж" +"⇝", "⇝" +"ℤ", "ℤ" +"𝕫", "𝕫" +"𝒵", "𝒵" +"𝓏", "𝓏" +"‍", "‍" +"‌", "‌" +%% diff --git a/src/Functions/HTMLCharacterReference.h b/src/Functions/HTMLCharacterReference.h new file mode 100644 index 00000000000..4c8514238d6 --- /dev/null +++ b/src/Functions/HTMLCharacterReference.h @@ -0,0 +1,7141 @@ +/* C++ code produced by gperf version 3.1 */ +/* Command-line: gperf -t --output-file=HTMLCharacterReference.h HTMLCharacterReference.gperf */ +/* Computed positions: -k'2-9,13,15' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#endif + +#line 7 "HTMLCharacterReference.gperf" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" +#pragma GCC diagnostic ignored "-Wunused-macros" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wshorten-64-to-32" +#line 15 "HTMLCharacterReference.gperf" +struct NameAndGlyph { +const char *name; +const char *glyph; +}; +#include + +#define TOTAL_KEYWORDS 2125 +#define MIN_WORD_LENGTH 4 +#define MAX_WORD_LENGTH 33 +#define MIN_HASH_VALUE 4 +#define MAX_HASH_VALUE 17865 +/* maximum key range = 17862, duplicates = 0 */ + +class HTMLCharacterHash +{ +private: + static inline unsigned int hash (const char *str, size_t len); +public: + static const struct NameAndGlyph *Lookup (const char *str, size_t len); +}; + +inline unsigned int +HTMLCharacterHash::hash (const char *str, size_t len) +{ + static const unsigned short asso_values[] = + { + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 20, 45, 0, 15, 55, 17866, 80, 180, + 0, 0, 0, 17866, 45, 240, 2335, 0, 310, 1635, + 390, 135, 465, 1100, 1480, 30, 900, 1185, 455, 650, + 970, 75, 1820, 305, 240, 460, 555, 25, 2185, 155, + 390, 385, 280, 530, 115, 210, 250, 3440, 130, 1370, + 770, 565, 160, 595, 260, 1695, 995, 5681, 1250, 3865, + 30, 45, 80, 15, 10, 40, 0, 60, 200, 135, + 2065, 5, 2765, 340, 3506, 3449, 355, 3895, 1620, 3195, + 2754, 1270, 295, 1075, 180, 55, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, + 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866, 17866 + }; + unsigned int hval = len; + + switch (hval) + { + default: + hval += asso_values[static_cast(str[14])]; + /*FALLTHROUGH*/ + case 14: + case 13: + hval += asso_values[static_cast(str[12])]; + /*FALLTHROUGH*/ + case 12: + case 11: + case 10: + case 9: + hval += asso_values[static_cast(str[8])]; + /*FALLTHROUGH*/ + case 8: + hval += asso_values[static_cast(str[7]+1)]; + /*FALLTHROUGH*/ + case 7: + hval += asso_values[static_cast(str[6]+2)]; + /*FALLTHROUGH*/ + case 6: + hval += asso_values[static_cast(str[5]+3)]; + /*FALLTHROUGH*/ + case 5: + hval += asso_values[static_cast(str[4]+5)]; + /*FALLTHROUGH*/ + case 4: + hval += asso_values[static_cast(str[3]+1)]; + /*FALLTHROUGH*/ + case 3: + hval += asso_values[static_cast(str[2])]; + /*FALLTHROUGH*/ + case 2: + hval += asso_values[static_cast(str[1]+13)]; + break; + } + return hval; +} + +const struct NameAndGlyph * +HTMLCharacterHash::Lookup (const char *str, size_t len) +{ + static const struct NameAndGlyph wordlist[] = + { + {""}, {""}, {""}, {""}, +#line 668 "HTMLCharacterReference.gperf" + {">", ">"}, + {""}, {""}, {""}, {""}, +#line 1026 "HTMLCharacterReference.gperf" + {"<", "<"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1034 "HTMLCharacterReference.gperf" + {"◃", "◃"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 464 "HTMLCharacterReference.gperf" + {"▿", "▿"}, + {""}, {""}, {""}, +#line 1009 "HTMLCharacterReference.gperf" + {"‎", "‎"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 514 "HTMLCharacterReference.gperf" + {" ", " "}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 76 "HTMLCharacterReference.gperf" + {"≈", "≈"}, + {""}, +#line 552 "HTMLCharacterReference.gperf" + {"€", "€"}, + {""}, {""}, {""}, {""}, +#line 551 "HTMLCharacterReference.gperf" + {"ë", "ë"}, + {""}, {""}, {""}, {""}, +#line 522 "HTMLCharacterReference.gperf" + {"ε", "ε"}, + {""}, {""}, {""}, {""}, {""}, +#line 525 "HTMLCharacterReference.gperf" + {"ϵ", "ϵ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 96 "HTMLCharacterReference.gperf" + {"ä", "ä"}, +#line 528 "HTMLCharacterReference.gperf" + {"≂", "≂"}, + {""}, {""}, {""}, {""}, +#line 650 "HTMLCharacterReference.gperf" + {"⋧", "⋧"}, + {""}, {""}, {""}, {""}, +#line 975 "HTMLCharacterReference.gperf" + {"⋦", "⋦"}, + {""}, {""}, {""}, +#line 460 "HTMLCharacterReference.gperf" + {"⧶", "⧶"}, + {""}, {""}, +#line 1025 "HTMLCharacterReference.gperf" + {"≪", "≪"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 157 "HTMLCharacterReference.gperf" + {"⌐", "⌐"}, + {""}, {""}, {""}, +#line 412 "HTMLCharacterReference.gperf" + {"˙", "˙"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 90 "HTMLCharacterReference.gperf" + {"*", "*"}, + {""}, {""}, {""}, {""}, {""}, +#line 216 "HTMLCharacterReference.gperf" + {"\", "\\"}, + {""}, {""}, {""}, +#line 547 "HTMLCharacterReference.gperf" + {"η", "η"}, + {""}, {""}, {""}, +#line 34 "HTMLCharacterReference.gperf" + {"⁡", "⁡"}, + {""}, {""}, {""}, {""}, {""}, +#line 160 "HTMLCharacterReference.gperf" + {"⊥", "⊥"}, +#line 221 "HTMLCharacterReference.gperf" + {"≎", "≎"}, + {""}, {""}, +#line 791 "HTMLCharacterReference.gperf" + {"⁢", "⁢"}, +#line 332 "HTMLCharacterReference.gperf" + {"∪", "∪"}, + {""}, {""}, {""}, {""}, {""}, +#line 580 "HTMLCharacterReference.gperf" + {"⋔", "⋔"}, + {""}, {""}, {""}, {""}, {""}, +#line 581 "HTMLCharacterReference.gperf" + {"⫙", "⫙"}, +#line 161 "HTMLCharacterReference.gperf" + {"⊥", "⊥"}, + {""}, {""}, +#line 297 "HTMLCharacterReference.gperf" + {"∁", "∁"}, +#line 338 "HTMLCharacterReference.gperf" + {"⩅", "⩅"}, + {""}, {""}, {""}, +#line 644 "HTMLCharacterReference.gperf" + {"⪊", "⪊"}, + {""}, {""}, {""}, +#line 411 "HTMLCharacterReference.gperf" + {"¨", "¨"}, +#line 969 "HTMLCharacterReference.gperf" + {"⪉", "⪉"}, + {""}, {""}, +#line 757 "HTMLCharacterReference.gperf" + {"∈", "∈"}, + {""}, {""}, {""}, {""}, {""}, +#line 647 "HTMLCharacterReference.gperf" + {"⪈", "⪈"}, +#line 1995 "HTMLCharacterReference.gperf" + {"Ü", "Ü"}, + {""}, {""}, +#line 667 "HTMLCharacterReference.gperf" + {"≫", "≫"}, +#line 972 "HTMLCharacterReference.gperf" + {"⪇", "⪇"}, +#line 1973 "HTMLCharacterReference.gperf" + {"ϒ", "ϒ"}, + {""}, {""}, {""}, +#line 633 "HTMLCharacterReference.gperf" + {"𝔤", "𝔤"}, + {""}, {""}, {""}, {""}, +#line 948 "HTMLCharacterReference.gperf" + {"𝔩", "𝔩"}, + {""}, {""}, {""}, {""}, +#line 490 "HTMLCharacterReference.gperf" + {"𝔢", "𝔢"}, + {""}, {""}, {""}, {""}, +#line 382 "HTMLCharacterReference.gperf" + {"𝔡", "𝔡"}, + {""}, +#line 536 "HTMLCharacterReference.gperf" + {"≡", "≡"}, +#line 675 "HTMLCharacterReference.gperf" + {"⥸", "⥸"}, + {""}, {""}, +#line 2120 "HTMLCharacterReference.gperf" + {"Ÿ", "Ÿ"}, + {""}, {""}, {""}, +#line 802 "HTMLCharacterReference.gperf" + {"𝔍", "𝔍"}, + {""}, {""}, {""}, {""}, +#line 36 "HTMLCharacterReference.gperf" + {"𝔞", "𝔞"}, + {""}, {""}, {""}, {""}, +#line 154 "HTMLCharacterReference.gperf" + {"=⃥", "=⃥"}, +#line 2144 "HTMLCharacterReference.gperf" + {"‌", "‌"}, + {""}, {""}, {""}, +#line 567 "HTMLCharacterReference.gperf" + {"𝔣", "𝔣"}, +#line 2121 "HTMLCharacterReference.gperf" + {"ÿ", "ÿ"}, +#line 991 "HTMLCharacterReference.gperf" + {"⦅", "⦅"}, + {""}, +#line 1370 "HTMLCharacterReference.gperf" + {"⩔", "⩔"}, +#line 128 "HTMLCharacterReference.gperf" + {"𝔟", "𝔟"}, + {""}, {""}, {""}, {""}, {""}, +#line 110 "HTMLCharacterReference.gperf" + {"⎵", "⎵"}, + {""}, {""}, {""}, +#line 2135 "HTMLCharacterReference.gperf" + {"𝔷", "𝔷"}, + {""}, {""}, {""}, +#line 2069 "HTMLCharacterReference.gperf" + {"≀", "≀"}, +#line 702 "HTMLCharacterReference.gperf" + {"𝔥", "𝔥"}, +#line 797 "HTMLCharacterReference.gperf" + {"ï", "ï"}, + {""}, {""}, {""}, {""}, +#line 1734 "HTMLCharacterReference.gperf" + {"√", "√"}, + {""}, {""}, {""}, +#line 79 "HTMLCharacterReference.gperf" + {"≊", "≊"}, + {""}, {""}, {""}, {""}, +#line 381 "HTMLCharacterReference.gperf" + {"𝔇", "𝔇"}, + {""}, {""}, {""}, {""}, +#line 259 "HTMLCharacterReference.gperf" + {"𝔠", "𝔠"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 516 "HTMLCharacterReference.gperf" + {"ę", "ę"}, + {""}, {""}, +#line 763 "HTMLCharacterReference.gperf" + {"∫", "∫"}, + {""}, +#line 1035 "HTMLCharacterReference.gperf" + {"⊴", "⊴"}, + {""}, {""}, {""}, +#line 674 "HTMLCharacterReference.gperf" + {"⪆", "⪆"}, +#line 1005 "HTMLCharacterReference.gperf" + {"⇆", "⇆"}, + {""}, {""}, +#line 513 "HTMLCharacterReference.gperf" + {"ŋ", "ŋ"}, + {""}, +#line 539 "HTMLCharacterReference.gperf" + {"⥱", "⥱"}, + {""}, {""}, {""}, +#line 1003 "HTMLCharacterReference.gperf" + {"(", "("}, +#line 73 "HTMLCharacterReference.gperf" + {"ą", "ą"}, + {""}, {""}, {""}, +#line 519 "HTMLCharacterReference.gperf" + {"⋕", "⋕"}, + {""}, {""}, {""}, +#line 1485 "HTMLCharacterReference.gperf" + {"𝔔", "𝔔"}, + {""}, {""}, +#line 579 "HTMLCharacterReference.gperf" + {"∀", "∀"}, + {""}, +#line 54 "HTMLCharacterReference.gperf" + {"∠", "∠"}, + {""}, {""}, {""}, {""}, +#line 1803 "HTMLCharacterReference.gperf" + {"⋑", "⋑"}, + {""}, {""}, {""}, +#line 2068 "HTMLCharacterReference.gperf" + {"℘", "℘"}, +#line 1934 "HTMLCharacterReference.gperf" + {"𝔘", "𝔘"}, + {""}, +#line 977 "HTMLCharacterReference.gperf" + {"⇽", "⇽"}, + {""}, {""}, +#line 803 "HTMLCharacterReference.gperf" + {"𝔧", "𝔧"}, +#line 322 "HTMLCharacterReference.gperf" + {"⫐", "⫐"}, + {""}, {""}, +#line 369 "HTMLCharacterReference.gperf" + {"ⅅ", "ⅅ"}, +#line 1800 "HTMLCharacterReference.gperf" + {"∑", "∑"}, +#line 1393 "HTMLCharacterReference.gperf" + {"Ö", "Ö"}, + {""}, {""}, {""}, {""}, +#line 648 "HTMLCharacterReference.gperf" + {"⪈", "⪈"}, +#line 649 "HTMLCharacterReference.gperf" + {"≩", "≩"}, + {""}, +#line 1024 "HTMLCharacterReference.gperf" + {"<", "<"}, + {""}, +#line 973 "HTMLCharacterReference.gperf" + {"⪇", "⪇"}, +#line 974 "HTMLCharacterReference.gperf" + {"≨", "≨"}, + {""}, +#line 1299 "HTMLCharacterReference.gperf" + {"ν", "ν"}, +#line 947 "HTMLCharacterReference.gperf" + {"𝔏", "𝔏"}, + {""}, +#line 466 "HTMLCharacterReference.gperf" + {"⇵", "⇵"}, +#line 340 "HTMLCharacterReference.gperf" + {"↷", "↷"}, + {""}, +#line 2110 "HTMLCharacterReference.gperf" + {"𝔜", "𝔜"}, +#line 485 "HTMLCharacterReference.gperf" + {"≑", "≑"}, + {""}, {""}, +#line 524 "HTMLCharacterReference.gperf" + {"ε", "ε"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 315 "HTMLCharacterReference.gperf" + {"↵", "↵"}, +#line 305 "HTMLCharacterReference.gperf" + {"∮", "∮"}, + {""}, {""}, +#line 846 "HTMLCharacterReference.gperf" + {"⇐", "⇐"}, +#line 327 "HTMLCharacterReference.gperf" + {"⋞", "⋞"}, + {""}, {""}, +#line 2111 "HTMLCharacterReference.gperf" + {"𝔶", "𝔶"}, + {""}, {""}, {""}, {""}, {""}, +#line 358 "HTMLCharacterReference.gperf" + {"⇓", "⇓"}, +#line 706 "HTMLCharacterReference.gperf" + {"⇿", "⇿"}, + {""}, {""}, +#line 1000 "HTMLCharacterReference.gperf" + {"◊", "◊"}, +#line 194 "HTMLCharacterReference.gperf" + {"│", "│"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 592 "HTMLCharacterReference.gperf" + {"¾", "¾"}, +#line 1298 "HTMLCharacterReference.gperf" + {"Ν", "Ν"}, +#line 736 "HTMLCharacterReference.gperf" + {"𝔦", "𝔦"}, + {""}, {""}, +#line 309 "HTMLCharacterReference.gperf" + {"∐", "∐"}, +#line 1084 "HTMLCharacterReference.gperf" + {"Μ", "Μ"}, + {""}, {""}, +#line 1955 "HTMLCharacterReference.gperf" + {"Ų", "Ų"}, + {""}, {""}, +#line 1565 "HTMLCharacterReference.gperf" + {"ℜ", "ℜ"}, +#line 694 "HTMLCharacterReference.gperf" + {"ℏ", "ℏ"}, +#line 223 "HTMLCharacterReference.gperf" + {"≏", "≏"}, +#line 593 "HTMLCharacterReference.gperf" + {"⅗", "⅗"}, + {""}, {""}, {""}, +#line 862 "HTMLCharacterReference.gperf" + {"⤌", "⤌"}, +#line 586 "HTMLCharacterReference.gperf" + {"¼", "¼"}, + {""}, +#line 646 "HTMLCharacterReference.gperf" + {"≩", "≩"}, +#line 780 "HTMLCharacterReference.gperf" + {"ι", "ι"}, +#line 197 "HTMLCharacterReference.gperf" + {"╪", "╪"}, + {""}, {""}, +#line 971 "HTMLCharacterReference.gperf" + {"≨", "≨"}, + {""}, {""}, +#line 595 "HTMLCharacterReference.gperf" + {"⅘", "⅘"}, + {""}, +#line 1300 "HTMLCharacterReference.gperf" + {"#", "#"}, +#line 689 "HTMLCharacterReference.gperf" + {"⇔", "⇔"}, + {""}, +#line 587 "HTMLCharacterReference.gperf" + {"⅕", "⅕"}, +#line 666 "HTMLCharacterReference.gperf" + {">", ">"}, + {""}, +#line 193 "HTMLCharacterReference.gperf" + {"║", "║"}, +#line 202 "HTMLCharacterReference.gperf" + {"┤", "┤"}, +#line 584 "HTMLCharacterReference.gperf" + {"½", "½"}, + {""}, +#line 632 "HTMLCharacterReference.gperf" + {"𝔊", "𝔊"}, + {""}, {""}, {""}, {""}, +#line 1182 "HTMLCharacterReference.gperf" + {"¬", "¬"}, + {""}, +#line 1199 "HTMLCharacterReference.gperf" + {"∉", "∉"}, +#line 532 "HTMLCharacterReference.gperf" + {"=", "="}, + {""}, +#line 1680 "HTMLCharacterReference.gperf" + {"𝔖", "𝔖"}, +#line 312 "HTMLCharacterReference.gperf" + {"©", "©"}, +#line 206 "HTMLCharacterReference.gperf" + {"├", "├"}, +#line 225 "HTMLCharacterReference.gperf" + {"≏", "≏"}, + {""}, {""}, +#line 1498 "HTMLCharacterReference.gperf" + {""", "\""}, + {""}, +#line 591 "HTMLCharacterReference.gperf" + {"⅖", "⅖"}, + {""}, +#line 1249 "HTMLCharacterReference.gperf" + {"⊀", "⊀"}, + {""}, {""}, +#line 585 "HTMLCharacterReference.gperf" + {"⅓", "⅓"}, + {""}, {""}, {""}, +#line 195 "HTMLCharacterReference.gperf" + {"╬", "╬"}, + {""}, {""}, {""}, +#line 1760 "HTMLCharacterReference.gperf" + {"⋆", "⋆"}, +#line 866 "HTMLCharacterReference.gperf" + {"⦋", "⦋"}, +#line 588 "HTMLCharacterReference.gperf" + {"⅙", "⅙"}, +#line 183 "HTMLCharacterReference.gperf" + {"⊞", "⊞"}, + {""}, {""}, +#line 776 "HTMLCharacterReference.gperf" + {"į", "į"}, +#line 594 "HTMLCharacterReference.gperf" + {"⅜", "⅜"}, + {""}, +#line 1341 "HTMLCharacterReference.gperf" + {"𝔒", "𝔒"}, + {""}, +#line 200 "HTMLCharacterReference.gperf" + {"╢", "╢"}, + {""}, +#line 1976 "HTMLCharacterReference.gperf" + {"Υ", "Υ"}, + {""}, +#line 1100 "HTMLCharacterReference.gperf" + {" ", " "}, +#line 186 "HTMLCharacterReference.gperf" + {"╜", "╜"}, +#line 590 "HTMLCharacterReference.gperf" + {"⅔", "⅔"}, + {""}, +#line 1181 "HTMLCharacterReference.gperf" + {"⫬", "⫬"}, + {""}, {""}, {""}, +#line 1445 "HTMLCharacterReference.gperf" + {"⪻", "⪻"}, +#line 2065 "HTMLCharacterReference.gperf" + {"𝔴", "𝔴"}, + {""}, +#line 204 "HTMLCharacterReference.gperf" + {"╟", "╟"}, +#line 589 "HTMLCharacterReference.gperf" + {"⅛", "⅛"}, +#line 418 "HTMLCharacterReference.gperf" + {"∔", "∔"}, +#line 78 "HTMLCharacterReference.gperf" + {"⩰", "⩰"}, + {""}, +#line 190 "HTMLCharacterReference.gperf" + {"╙", "╙"}, + {""}, {""}, +#line 566 "HTMLCharacterReference.gperf" + {"𝔉", "𝔉"}, + {""}, {""}, +#line 596 "HTMLCharacterReference.gperf" + {"⅚", "⅚"}, + {""}, {""}, {""}, {""}, {""}, +#line 679 "HTMLCharacterReference.gperf" + {"≷", "≷"}, + {""}, +#line 95 "HTMLCharacterReference.gperf" + {"Ä", "Ä"}, + {""}, {""}, +#line 619 "HTMLCharacterReference.gperf" + {"≥", "≥"}, + {""}, {""}, {""}, {""}, +#line 884 "HTMLCharacterReference.gperf" + {"≤", "≤"}, + {""}, {""}, +#line 831 "HTMLCharacterReference.gperf" + {"⇚", "⇚"}, + {""}, +#line 487 "HTMLCharacterReference.gperf" + {"ⅇ", "ⅇ"}, + {""}, {""}, {""}, +#line 597 "HTMLCharacterReference.gperf" + {"⅝", "⅝"}, + {""}, +#line 1142 "HTMLCharacterReference.gperf" + {"≫⃒", "≫⃒"}, +#line 630 "HTMLCharacterReference.gperf" + {"⋛︀", "⋛︀"}, + {""}, {""}, {""}, +#line 1133 "HTMLCharacterReference.gperf" + {"𝔫", "𝔫"}, +#line 156 "HTMLCharacterReference.gperf" + {"⫭", "⫭"}, +#line 384 "HTMLCharacterReference.gperf" + {"⇃", "⇃"}, +#line 520 "HTMLCharacterReference.gperf" + {"⧣", "⧣"}, + {""}, {""}, {""}, +#line 66 "HTMLCharacterReference.gperf" + {"∟", "∟"}, + {""}, +#line 635 "HTMLCharacterReference.gperf" + {"≫", "≫"}, +#line 1571 "HTMLCharacterReference.gperf" + {"Ρ", "Ρ"}, + {""}, +#line 56 "HTMLCharacterReference.gperf" + {"∠", "∠"}, +#line 1004 "HTMLCharacterReference.gperf" + {"⦓", "⦓"}, +#line 949 "HTMLCharacterReference.gperf" + {"≶", "≶"}, +#line 1486 "HTMLCharacterReference.gperf" + {"𝔮", "𝔮"}, + {""}, +#line 385 "HTMLCharacterReference.gperf" + {"⇂", "⇂"}, +#line 598 "HTMLCharacterReference.gperf" + {"⅞", "⅞"}, +#line 491 "HTMLCharacterReference.gperf" + {"⪚", "⪚"}, + {""}, {""}, {""}, {""}, {""}, +#line 625 "HTMLCharacterReference.gperf" + {"⩾", "⩾"}, + {""}, {""}, {""}, +#line 341 "HTMLCharacterReference.gperf" + {"⤼", "⤼"}, +#line 926 "HTMLCharacterReference.gperf" + {"⩽", "⩽"}, +#line 1290 "HTMLCharacterReference.gperf" + {"≹", "≹"}, +#line 1273 "HTMLCharacterReference.gperf" + {"∦", "∦"}, + {""}, {""}, +#line 622 "HTMLCharacterReference.gperf" + {"≥", "≥"}, + {""}, {""}, {""}, {""}, +#line 923 "HTMLCharacterReference.gperf" + {"≤", "≤"}, + {""}, {""}, +#line 1201 "HTMLCharacterReference.gperf" + {"⋹̸", "⋹̸"}, + {""}, +#line 1132 "HTMLCharacterReference.gperf" + {"𝔑", "𝔑"}, + {""}, {""}, {""}, {""}, +#line 1061 "HTMLCharacterReference.gperf" + {"𝔐", "𝔐"}, + {""}, {""}, {""}, {""}, {""}, +#line 1284 "HTMLCharacterReference.gperf" + {"⊅", "⊅"}, +#line 323 "HTMLCharacterReference.gperf" + {"⫒", "⫒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 494 "HTMLCharacterReference.gperf" + {"⪖", "⪖"}, +#line 1019 "HTMLCharacterReference.gperf" + {"[", "["}, +#line 1318 "HTMLCharacterReference.gperf" + {"∼⃒", "∼⃒"}, + {""}, {""}, {""}, {""}, {""}, +#line 346 "HTMLCharacterReference.gperf" + {"¤", "¤"}, + {""}, {""}, +#line 796 "HTMLCharacterReference.gperf" + {"Ï", "Ï"}, +#line 222 "HTMLCharacterReference.gperf" + {"⪮", "⪮"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 49 "HTMLCharacterReference.gperf" + {"∧", "∧"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1255 "HTMLCharacterReference.gperf" + {"↛", "↛"}, + {""}, {""}, {""}, {""}, {""}, +#line 1257 "HTMLCharacterReference.gperf" + {"↝̸", "↝̸"}, + {""}, {""}, +#line 1244 "HTMLCharacterReference.gperf" + {"∦", "∦"}, + {""}, {""}, {""}, +#line 762 "HTMLCharacterReference.gperf" + {"∬", "∬"}, + {""}, {""}, +#line 57 "HTMLCharacterReference.gperf" + {"∡", "∡"}, + {""}, {""}, +#line 255 "HTMLCharacterReference.gperf" + {"¢", "¢"}, + {""}, +#line 313 "HTMLCharacterReference.gperf" + {"℗", "℗"}, + {""}, +#line 35 "HTMLCharacterReference.gperf" + {"𝔄", "𝔄"}, + {""}, +#line 671 "HTMLCharacterReference.gperf" + {"⋗", "⋗"}, + {""}, {""}, +#line 701 "HTMLCharacterReference.gperf" + {"ℌ", "ℌ"}, + {""}, +#line 1029 "HTMLCharacterReference.gperf" + {"⋖", "⋖"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 463 "HTMLCharacterReference.gperf" + {"⋱", "⋱"}, + {""}, {""}, {""}, {""}, +#line 1101 "HTMLCharacterReference.gperf" + {"≎̸", "≎̸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1303 "HTMLCharacterReference.gperf" + {"≍⃒", "≍⃒"}, + {""}, {""}, {""}, +#line 1200 "HTMLCharacterReference.gperf" + {"⋵̸", "⋵̸"}, +#line 951 "HTMLCharacterReference.gperf" + {"⥢", "⥢"}, + {""}, {""}, {""}, {""}, {""}, +#line 653 "HTMLCharacterReference.gperf" + {"`", "`"}, + {""}, {""}, {""}, +#line 383 "HTMLCharacterReference.gperf" + {"⥥", "⥥"}, +#line 217 "HTMLCharacterReference.gperf" + {"⧅", "⧅"}, + {""}, {""}, {""}, {""}, +#line 543 "HTMLCharacterReference.gperf" + {"≐", "≐"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1552 "HTMLCharacterReference.gperf" + {"ℜ", "ℜ"}, +#line 1414 "HTMLCharacterReference.gperf" + {"𝔓", "𝔓"}, +#line 623 "HTMLCharacterReference.gperf" + {"≧", "≧"}, +#line 72 "HTMLCharacterReference.gperf" + {"Ą", "Ą"}, + {""}, {""}, +#line 1140 "HTMLCharacterReference.gperf" + {"⋙̸", "⋙̸"}, +#line 924 "HTMLCharacterReference.gperf" + {"≦", "≦"}, +#line 324 "HTMLCharacterReference.gperf" + {"⋯", "⋯"}, + {""}, +#line 370 "HTMLCharacterReference.gperf" + {"ⅆ", "ⅆ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 262 "HTMLCharacterReference.gperf" + {"✓", "✓"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 735 "HTMLCharacterReference.gperf" + {"ℑ", "ℑ"}, + {""}, +#line 1321 "HTMLCharacterReference.gperf" + {"↖", "↖"}, + {""}, {""}, {""}, +#line 1309 "HTMLCharacterReference.gperf" + {">⃒", ">⃒"}, + {""}, {""}, {""}, +#line 2077 "HTMLCharacterReference.gperf" + {"𝔛", "𝔛"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 209 "HTMLCharacterReference.gperf" + {"˘", "˘"}, + {""}, {""}, {""}, +#line 779 "HTMLCharacterReference.gperf" + {"Ι", "Ι"}, +#line 1247 "HTMLCharacterReference.gperf" + {"∂̸", "∂̸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 124 "HTMLCharacterReference.gperf" + {"β", "β"}, + {""}, {""}, +#line 634 "HTMLCharacterReference.gperf" + {"⋙", "⋙"}, +#line 2109 "HTMLCharacterReference.gperf" + {"¥", "¥"}, + {""}, +#line 1148 "HTMLCharacterReference.gperf" + {"⫲", "⫲"}, +#line 121 "HTMLCharacterReference.gperf" + {"ℬ", "ℬ"}, + {""}, +#line 2134 "HTMLCharacterReference.gperf" + {"ℨ", "ℨ"}, +#line 2133 "HTMLCharacterReference.gperf" + {"ζ", "ζ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 617 "HTMLCharacterReference.gperf" + {"ġ", "ġ"}, + {""}, +#line 578 "HTMLCharacterReference.gperf" + {"∀", "∀"}, + {""}, {""}, {""}, +#line 670 "HTMLCharacterReference.gperf" + {"⩺", "⩺"}, +#line 809 "HTMLCharacterReference.gperf" + {"Ј", "Ј"}, + {""}, {""}, +#line 486 "HTMLCharacterReference.gperf" + {"ė", "ė"}, +#line 1028 "HTMLCharacterReference.gperf" + {"⩹", "⩹"}, + {""}, +#line 868 "HTMLCharacterReference.gperf" + {"⦍", "⦍"}, + {""}, {""}, +#line 759 "HTMLCharacterReference.gperf" + {"∞", "∞"}, +#line 207 "HTMLCharacterReference.gperf" + {"‵", "‵"}, + {""}, +#line 922 "HTMLCharacterReference.gperf" + {"⋚", "⋚"}, +#line 1145 "HTMLCharacterReference.gperf" + {"≫̸", "≫̸"}, +#line 775 "HTMLCharacterReference.gperf" + {"Į", "Į"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 375 "HTMLCharacterReference.gperf" + {"°", "°"}, + {""}, {""}, {""}, {""}, {""}, +#line 681 "HTMLCharacterReference.gperf" + {"≩︀", "≩︀"}, + {""}, {""}, {""}, +#line 127 "HTMLCharacterReference.gperf" + {"𝔅", "𝔅"}, +#line 1040 "HTMLCharacterReference.gperf" + {"≨︀", "≨︀"}, +#line 205 "HTMLCharacterReference.gperf" + {"╞", "╞"}, + {""}, {""}, +#line 636 "HTMLCharacterReference.gperf" + {"⋙", "⋙"}, + {""}, +#line 1286 "HTMLCharacterReference.gperf" + {"⊉", "⊉"}, +#line 631 "HTMLCharacterReference.gperf" + {"⪔", "⪔"}, +#line 1114 "HTMLCharacterReference.gperf" + {"≠", "≠"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 2129 "HTMLCharacterReference.gperf" + {"ż", "ż"}, + {""}, {""}, {""}, {""}, +#line 320 "HTMLCharacterReference.gperf" + {"⫏", "⫏"}, +#line 781 "HTMLCharacterReference.gperf" + {"⨼", "⨼"}, +#line 1246 "HTMLCharacterReference.gperf" + {"⫽⃥", "⫽⃥"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1147 "HTMLCharacterReference.gperf" + {"↮", "↮"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 251 "HTMLCharacterReference.gperf" + {"ċ", "ċ"}, +#line 295 "HTMLCharacterReference.gperf" + {",", ","}, +#line 454 "HTMLCharacterReference.gperf" + {"⌟", "⌟"}, + {""}, {""}, {""}, +#line 203 "HTMLCharacterReference.gperf" + {"╠", "╠"}, + {""}, {""}, {""}, +#line 662 "HTMLCharacterReference.gperf" + {"ℊ", "ℊ"}, +#line 189 "HTMLCharacterReference.gperf" + {"╚", "╚"}, + {""}, {""}, {""}, +#line 1013 "HTMLCharacterReference.gperf" + {"𝓁", "𝓁"}, + {""}, +#line 455 "HTMLCharacterReference.gperf" + {"⌌", "⌌"}, + {""}, +#line 59 "HTMLCharacterReference.gperf" + {"⦩", "⦩"}, +#line 542 "HTMLCharacterReference.gperf" + {"ℯ", "ℯ"}, + {""}, {""}, {""}, {""}, +#line 457 "HTMLCharacterReference.gperf" + {"𝒹", "𝒹"}, + {""}, +#line 50 "HTMLCharacterReference.gperf" + {"⩕", "⩕"}, + {""}, {""}, +#line 53 "HTMLCharacterReference.gperf" + {"⩚", "⩚"}, + {""}, {""}, {""}, {""}, +#line 807 "HTMLCharacterReference.gperf" + {"𝒥", "𝒥"}, + {""}, +#line 810 "HTMLCharacterReference.gperf" + {"ј", "ј"}, + {""}, {""}, +#line 88 "HTMLCharacterReference.gperf" + {"𝒶", "𝒶"}, + {""}, {""}, {""}, {""}, +#line 263 "HTMLCharacterReference.gperf" + {"✓", "✓"}, + {""}, {""}, {""}, +#line 63 "HTMLCharacterReference.gperf" + {"⦭", "⦭"}, +#line 602 "HTMLCharacterReference.gperf" + {"𝒻", "𝒻"}, +#line 626 "HTMLCharacterReference.gperf" + {"⪩", "⪩"}, + {""}, {""}, {""}, +#line 212 "HTMLCharacterReference.gperf" + {"𝒷", "𝒷"}, +#line 927 "HTMLCharacterReference.gperf" + {"⪨", "⪨"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 995 "HTMLCharacterReference.gperf" + {"⨴", "⨴"}, +#line 734 "HTMLCharacterReference.gperf" + {"⇔", "⇔"}, +#line 2142 "HTMLCharacterReference.gperf" + {"𝓏", "𝓏"}, +#line 1282 "HTMLCharacterReference.gperf" + {"⊁", "⊁"}, + {""}, {""}, {""}, +#line 715 "HTMLCharacterReference.gperf" + {"𝒽", "𝒽"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2064 "HTMLCharacterReference.gperf" + {"𝔚", "𝔚"}, + {""}, +#line 1126 "HTMLCharacterReference.gperf" + {"≂̸", "≂̸"}, +#line 700 "HTMLCharacterReference.gperf" + {"⊹", "⊹"}, + {""}, {""}, +#line 456 "HTMLCharacterReference.gperf" + {"𝒟", "𝒟"}, + {""}, {""}, {""}, {""}, +#line 319 "HTMLCharacterReference.gperf" + {"𝒸", "𝒸"}, + {""}, {""}, {""}, {""}, +#line 669 "HTMLCharacterReference.gperf" + {"⪧", "⪧"}, + {""}, +#line 1316 "HTMLCharacterReference.gperf" + {"⤃", "⤃"}, + {""}, +#line 950 "HTMLCharacterReference.gperf" + {"⪑", "⪑"}, +#line 1027 "HTMLCharacterReference.gperf" + {"⪦", "⪦"}, +#line 1272 "HTMLCharacterReference.gperf" + {"∤", "∤"}, + {""}, {""}, +#line 1143 "HTMLCharacterReference.gperf" + {"≯", "≯"}, +#line 1210 "HTMLCharacterReference.gperf" + {"≸", "≸"}, + {""}, {""}, {""}, {""}, +#line 663 "HTMLCharacterReference.gperf" + {"≳", "≳"}, +#line 1141 "HTMLCharacterReference.gperf" + {"≵", "≵"}, + {""}, {""}, {""}, +#line 1016 "HTMLCharacterReference.gperf" + {"≲", "≲"}, + {""}, {""}, {""}, {""}, +#line 545 "HTMLCharacterReference.gperf" + {"≂", "≂"}, + {""}, {""}, {""}, {""}, +#line 1491 "HTMLCharacterReference.gperf" + {"𝒬", "𝒬"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1208 "HTMLCharacterReference.gperf" + {"≮", "≮"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1987 "HTMLCharacterReference.gperf" + {"𝒰", "𝒰"}, +#line 733 "HTMLCharacterReference.gperf" + {"¡", "¡"}, + {""}, {""}, {""}, +#line 808 "HTMLCharacterReference.gperf" + {"𝒿", "𝒿"}, + {""}, {""}, +#line 527 "HTMLCharacterReference.gperf" + {"≕", "≕"}, +#line 65 "HTMLCharacterReference.gperf" + {"⦯", "⦯"}, + {""}, +#line 201 "HTMLCharacterReference.gperf" + {"╡", "╡"}, +#line 697 "HTMLCharacterReference.gperf" + {"♥", "♥"}, + {""}, +#line 331 "HTMLCharacterReference.gperf" + {"⋓", "⋓"}, +#line 214 "HTMLCharacterReference.gperf" + {"∽", "∽"}, +#line 665 "HTMLCharacterReference.gperf" + {"⪐", "⪐"}, +#line 864 "HTMLCharacterReference.gperf" + {"{", "{"}, + {""}, +#line 48 "HTMLCharacterReference.gperf" + {"⩓", "⩓"}, +#line 616 "HTMLCharacterReference.gperf" + {"Ġ", "Ġ"}, + {""}, {""}, {""}, +#line 1006 "HTMLCharacterReference.gperf" + {"⌟", "⌟"}, +#line 1012 "HTMLCharacterReference.gperf" + {"ℒ", "ℒ"}, + {""}, +#line 224 "HTMLCharacterReference.gperf" + {"≎", "≎"}, + {""}, {""}, +#line 2116 "HTMLCharacterReference.gperf" + {"𝒴", "𝒴"}, +#line 1285 "HTMLCharacterReference.gperf" + {"⫆̸", "⫆̸"}, +#line 2130 "HTMLCharacterReference.gperf" + {"ℨ", "ℨ"}, + {""}, {""}, {""}, +#line 372 "HTMLCharacterReference.gperf" + {"⇊", "⇊"}, + {""}, {""}, {""}, {""}, +#line 414 "HTMLCharacterReference.gperf" + {"≐", "≐"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2117 "HTMLCharacterReference.gperf" + {"𝓎", "𝓎"}, +#line 952 "HTMLCharacterReference.gperf" + {"↽", "↽"}, + {""}, {""}, {""}, +#line 1144 "HTMLCharacterReference.gperf" + {"≯", "≯"}, +#line 199 "HTMLCharacterReference.gperf" + {"╣", "╣"}, + {""}, {""}, {""}, {""}, +#line 185 "HTMLCharacterReference.gperf" + {"╝", "╝"}, +#line 296 "HTMLCharacterReference.gperf" + {"@", "@"}, + {""}, +#line 1135 "HTMLCharacterReference.gperf" + {"≱", "≱"}, + {""}, {""}, {""}, {""}, {""}, +#line 784 "HTMLCharacterReference.gperf" + {"𝒾", "𝒾"}, + {""}, {""}, +#line 326 "HTMLCharacterReference.gperf" + {"⤵", "⤵"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1629 "HTMLCharacterReference.gperf" + {"ℛ", "ℛ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1953 "HTMLCharacterReference.gperf" + {"⋃", "⋃"}, + {""}, +#line 770 "HTMLCharacterReference.gperf" + {"⨼", "⨼"}, + {""}, +#line 1276 "HTMLCharacterReference.gperf" + {"⊄", "⊄"}, +#line 321 "HTMLCharacterReference.gperf" + {"⫑", "⫑"}, + {""}, {""}, {""}, +#line 459 "HTMLCharacterReference.gperf" + {"ѕ", "ѕ"}, + {""}, +#line 1490 "HTMLCharacterReference.gperf" + {"⁗", "⁗"}, + {""}, {""}, +#line 661 "HTMLCharacterReference.gperf" + {"𝒢", "𝒢"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1262 "HTMLCharacterReference.gperf" + {"⊁", "⊁"}, +#line 1755 "HTMLCharacterReference.gperf" + {"𝒮", "𝒮"}, + {""}, +#line 335 "HTMLCharacterReference.gperf" + {"⩆", "⩆"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 258 "HTMLCharacterReference.gperf" + {"ℭ", "ℭ"}, + {""}, +#line 976 "HTMLCharacterReference.gperf" + {"⟬", "⟬"}, + {""}, +#line 1212 "HTMLCharacterReference.gperf" + {"⩽̸", "⩽̸"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1383 "HTMLCharacterReference.gperf" + {"𝒪", "𝒪"}, + {""}, {""}, {""}, {""}, {""}, +#line 1117 "HTMLCharacterReference.gperf" + {"↗", "↗"}, + {""}, +#line 867 "HTMLCharacterReference.gperf" + {"⦏", "⦏"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 2072 "HTMLCharacterReference.gperf" + {"𝓌", "𝓌"}, +#line 165 "HTMLCharacterReference.gperf" + {"╖", "╖"}, +#line 1256 "HTMLCharacterReference.gperf" + {"⤳̸", "⤳̸"}, + {""}, {""}, {""}, {""}, {""}, +#line 640 "HTMLCharacterReference.gperf" + {"≷", "≷"}, + {""}, +#line 601 "HTMLCharacterReference.gperf" + {"ℱ", "ℱ"}, + {""}, {""}, +#line 959 "HTMLCharacterReference.gperf" + {"≪", "≪"}, + {""}, {""}, +#line 169 "HTMLCharacterReference.gperf" + {"╓", "╓"}, + {""}, +#line 496 "HTMLCharacterReference.gperf" + {"⪙", "⪙"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 677 "HTMLCharacterReference.gperf" + {"⋛", "⋛"}, + {""}, {""}, {""}, {""}, +#line 785 "HTMLCharacterReference.gperf" + {"∈", "∈"}, + {""}, +#line 1037 "HTMLCharacterReference.gperf" + {"⦖", "⦖"}, +#line 1622 "HTMLCharacterReference.gperf" + {"⥰", "⥰"}, + {""}, +#line 172 "HTMLCharacterReference.gperf" + {"═", "═"}, +#line 790 "HTMLCharacterReference.gperf" + {"∈", "∈"}, +#line 1744 "HTMLCharacterReference.gperf" + {"□", "□"}, +#line 1693 "HTMLCharacterReference.gperf" + {"↑", "↑"}, + {""}, +#line 1266 "HTMLCharacterReference.gperf" + {"𝓃", "𝓃"}, +#line 252 "HTMLCharacterReference.gperf" + {"¸", "¸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1136 "HTMLCharacterReference.gperf" + {"≱", "≱"}, +#line 1137 "HTMLCharacterReference.gperf" + {"≧̸", "≧̸"}, + {""}, {""}, {""}, +#line 1492 "HTMLCharacterReference.gperf" + {"𝓆", "𝓆"}, + {""}, {""}, {""}, +#line 500 "HTMLCharacterReference.gperf" + {"⪕", "⪕"}, + {""}, +#line 664 "HTMLCharacterReference.gperf" + {"⪎", "⪎"}, +#line 304 "HTMLCharacterReference.gperf" + {"∯", "∯"}, + {""}, {""}, {""}, +#line 1017 "HTMLCharacterReference.gperf" + {"⪍", "⪍"}, + {""}, {""}, {""}, +#line 55 "HTMLCharacterReference.gperf" + {"⦤", "⦤"}, + {""}, +#line 1102 "HTMLCharacterReference.gperf" + {"≏̸", "≏̸"}, + {""}, +#line 2078 "HTMLCharacterReference.gperf" + {"𝔵", "𝔵"}, + {""}, {""}, {""}, +#line 1183 "HTMLCharacterReference.gperf" + {"≢", "≢"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1265 "HTMLCharacterReference.gperf" + {"𝒩", "𝒩"}, + {""}, +#line 1031 "HTMLCharacterReference.gperf" + {"⋉", "⋉"}, + {""}, {""}, +#line 1081 "HTMLCharacterReference.gperf" + {"ℳ", "ℳ"}, +#line 98 "HTMLCharacterReference.gperf" + {"⨑", "⨑"}, + {""}, {""}, {""}, {""}, {""}, +#line 1130 "HTMLCharacterReference.gperf" + {"∄", "∄"}, + {""}, +#line 1171 "HTMLCharacterReference.gperf" + {"≪⃒", "≪⃒"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 215 "HTMLCharacterReference.gperf" + {"⋍", "⋍"}, +#line 764 "HTMLCharacterReference.gperf" + {"⊺", "⊺"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 415 "HTMLCharacterReference.gperf" + {"≑", "≑"}, + {""}, {""}, {""}, {""}, {""}, +#line 2119 "HTMLCharacterReference.gperf" + {"ю", "ю"}, + {""}, {""}, {""}, +#line 1134 "HTMLCharacterReference.gperf" + {"≧̸", "≧̸"}, +#line 774 "HTMLCharacterReference.gperf" + {"ё", "ё"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 265 "HTMLCharacterReference.gperf" + {"χ", "χ"}, +#line 2132 "HTMLCharacterReference.gperf" + {"Ζ", "Ζ"}, + {""}, {""}, {""}, {""}, +#line 1269 "HTMLCharacterReference.gperf" + {"≁", "≁"}, +#line 352 "HTMLCharacterReference.gperf" + {"∱", "∱"}, + {""}, {""}, +#line 62 "HTMLCharacterReference.gperf" + {"⦬", "⦬"}, + {""}, {""}, {""}, {""}, +#line 989 "HTMLCharacterReference.gperf" + {"↫", "↫"}, + {""}, {""}, {""}, +#line 24 "HTMLCharacterReference.gperf" + {"∾", "∾"}, + {""}, +#line 87 "HTMLCharacterReference.gperf" + {"𝒜", "𝒜"}, + {""}, {""}, +#line 958 "HTMLCharacterReference.gperf" + {"⋘", "⋘"}, + {""}, +#line 714 "HTMLCharacterReference.gperf" + {"ℋ", "ℋ"}, + {""}, +#line 493 "HTMLCharacterReference.gperf" + {"è", "è"}, + {""}, {""}, +#line 730 "HTMLCharacterReference.gperf" + {"İ", "İ"}, +#line 2093 "HTMLCharacterReference.gperf" + {"⟶", "⟶"}, + {""}, {""}, {""}, {""}, +#line 174 "HTMLCharacterReference.gperf" + {"╦", "╦"}, + {""}, {""}, +#line 64 "HTMLCharacterReference.gperf" + {"⦮", "⦮"}, +#line 301 "HTMLCharacterReference.gperf" + {"≅", "≅"}, +#line 208 "HTMLCharacterReference.gperf" + {"˘", "˘"}, + {""}, {""}, +#line 641 "HTMLCharacterReference.gperf" + {"⪥", "⪥"}, +#line 572 "HTMLCharacterReference.gperf" + {"♭", "♭"}, + {""}, +#line 38 "HTMLCharacterReference.gperf" + {"à", "à"}, + {""}, {""}, +#line 300 "HTMLCharacterReference.gperf" + {"ℂ", "ℂ"}, +#line 2047 "HTMLCharacterReference.gperf" + {"∝", "∝"}, + {""}, {""}, {""}, +#line 123 "HTMLCharacterReference.gperf" + {"Β", "Β"}, +#line 1278 "HTMLCharacterReference.gperf" + {"⊈", "⊈"}, +#line 562 "HTMLCharacterReference.gperf" + {"♀", "♀"}, + {""}, {""}, +#line 2137 "HTMLCharacterReference.gperf" + {"ж", "ж"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2128 "HTMLCharacterReference.gperf" + {"Ż", "Ż"}, + {""}, +#line 526 "HTMLCharacterReference.gperf" + {"≖", "≖"}, + {""}, {""}, +#line 1251 "HTMLCharacterReference.gperf" + {"⪯̸", "⪯̸"}, +#line 299 "HTMLCharacterReference.gperf" + {"∁", "∁"}, + {""}, {""}, {""}, {""}, +#line 1010 "HTMLCharacterReference.gperf" + {"⊿", "⊿"}, + {""}, +#line 379 "HTMLCharacterReference.gperf" + {"⦱", "⦱"}, + {""}, +#line 261 "HTMLCharacterReference.gperf" + {"ч", "ч"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1480 "HTMLCharacterReference.gperf" + {"𝒫", "𝒫"}, +#line 788 "HTMLCharacterReference.gperf" + {"⋴", "⋴"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 413 "HTMLCharacterReference.gperf" + {"⃜", "◌⃜"}, +#line 119 "HTMLCharacterReference.gperf" + {"⦰", "⦰"}, + {""}, +#line 657 "HTMLCharacterReference.gperf" + {"⪢", "⪢"}, +#line 149 "HTMLCharacterReference.gperf" + {"␣", "␣"}, + {""}, {""}, {""}, +#line 783 "HTMLCharacterReference.gperf" + {"ℐ", "ℐ"}, + {""}, +#line 1301 "HTMLCharacterReference.gperf" + {"№", "№"}, + {""}, {""}, +#line 458 "HTMLCharacterReference.gperf" + {"Ѕ", "Ѕ"}, +#line 213 "HTMLCharacterReference.gperf" + {"⁏", "⁏"}, + {""}, {""}, {""}, +#line 2094 "HTMLCharacterReference.gperf" + {"𝒳", "𝒳"}, + {""}, +#line 712 "HTMLCharacterReference.gperf" + {"―", "―"}, + {""}, +#line 2041 "HTMLCharacterReference.gperf" + {"𝔳", "𝔳"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1131 "HTMLCharacterReference.gperf" + {"∄", "∄"}, + {""}, +#line 2103 "HTMLCharacterReference.gperf" + {"Я", "Я"}, + {""}, +#line 1936 "HTMLCharacterReference.gperf" + {"Ù", "Ù"}, +#line 254 "HTMLCharacterReference.gperf" + {"⦲", "⦲"}, + {""}, {""}, +#line 349 "HTMLCharacterReference.gperf" + {"⋎", "⋎"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2141 "HTMLCharacterReference.gperf" + {"𝒵", "𝒵"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 529 "HTMLCharacterReference.gperf" + {"⪖", "⪖"}, + {""}, {""}, {""}, {""}, {""}, +#line 210 "HTMLCharacterReference.gperf" + {"¦", "¦"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 480 "HTMLCharacterReference.gperf" + {"≕", "≕"}, +#line 725 "HTMLCharacterReference.gperf" + {"⁣", "⁣"}, + {""}, +#line 257 "HTMLCharacterReference.gperf" + {"·", "·"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 960 "HTMLCharacterReference.gperf" + {"⇇", "⇇"}, + {""}, {""}, +#line 1454 "HTMLCharacterReference.gperf" + {"≺", "≺"}, + {""}, +#line 978 "HTMLCharacterReference.gperf" + {"⟦", "⟦"}, + {""}, +#line 1747 "HTMLCharacterReference.gperf" + {"⊏", "⊏"}, +#line 333 "HTMLCharacterReference.gperf" + {"⩈", "⩈"}, +#line 469 "HTMLCharacterReference.gperf" + {"Џ", "Џ"}, +#line 574 "HTMLCharacterReference.gperf" + {"▱", "▱"}, + {""}, {""}, +#line 61 "HTMLCharacterReference.gperf" + {"⦫", "⦫"}, +#line 211 "HTMLCharacterReference.gperf" + {"ℬ", "ℬ"}, + {""}, +#line 738 "HTMLCharacterReference.gperf" + {"ì", "ì"}, + {""}, +#line 184 "HTMLCharacterReference.gperf" + {"⊠", "⊠"}, + {""}, {""}, +#line 89 "HTMLCharacterReference.gperf" + {"≔", "≔"}, + {""}, +#line 1813 "HTMLCharacterReference.gperf" + {"⊃", "⊃"}, + {""}, +#line 1119 "HTMLCharacterReference.gperf" + {"≐̸", "≐̸"}, +#line 1390 "HTMLCharacterReference.gperf" + {"⨷", "⨷"}, +#line 1748 "HTMLCharacterReference.gperf" + {"⊑", "⊑"}, +#line 1482 "HTMLCharacterReference.gperf" + {"Ψ", "Ψ"}, + {""}, +#line 873 "HTMLCharacterReference.gperf" + {"⌈", "⌈"}, + {""}, {""}, +#line 2040 "HTMLCharacterReference.gperf" + {"𝔙", "𝔙"}, + {""}, +#line 1252 "HTMLCharacterReference.gperf" + {"⊀", "⊀"}, + {""}, +#line 1648 "HTMLCharacterReference.gperf" + {"⪼", "⪼"}, + {""}, +#line 1688 "HTMLCharacterReference.gperf" + {"↓", "↓"}, + {""}, +#line 534 "HTMLCharacterReference.gperf" + {"≟", "≟"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1749 "HTMLCharacterReference.gperf" + {"⊐", "⊐"}, + {""}, {""}, +#line 618 "HTMLCharacterReference.gperf" + {"≧", "≧"}, + {""}, +#line 1750 "HTMLCharacterReference.gperf" + {"⊒", "⊒"}, + {""}, {""}, +#line 883 "HTMLCharacterReference.gperf" + {"≦", "≦"}, + {""}, +#line 1175 "HTMLCharacterReference.gperf" + {"≪̸", "≪̸"}, +#line 1277 "HTMLCharacterReference.gperf" + {"⫅̸", "⫅̸"}, + {""}, {""}, {""}, {""}, +#line 1270 "HTMLCharacterReference.gperf" + {"≄", "≄"}, + {""}, {""}, {""}, +#line 990 "HTMLCharacterReference.gperf" + {"↬", "↬"}, + {""}, {""}, {""}, {""}, {""}, +#line 168 "HTMLCharacterReference.gperf" + {"╔", "╔"}, + {""}, {""}, {""}, {""}, +#line 2080 "HTMLCharacterReference.gperf" + {"⟷", "⟷"}, + {""}, {""}, {""}, {""}, +#line 153 "HTMLCharacterReference.gperf" + {"█", "█"}, + {""}, {""}, {""}, {""}, +#line 863 "HTMLCharacterReference.gperf" + {"❲", "❲"}, + {""}, +#line 659 "HTMLCharacterReference.gperf" + {"⩾", "⩾"}, + {""}, {""}, {""}, +#line 1344 "HTMLCharacterReference.gperf" + {"Ò", "Ò"}, +#line 1287 "HTMLCharacterReference.gperf" + {"⊃⃒", "⊃⃒"}, +#line 182 "HTMLCharacterReference.gperf" + {"⊟", "⊟"}, + {""}, +#line 955 "HTMLCharacterReference.gperf" + {"▄", "▄"}, + {""}, {""}, +#line 642 "HTMLCharacterReference.gperf" + {"⪒", "⪒"}, +#line 2005 "HTMLCharacterReference.gperf" + {"⇕", "⇕"}, + {""}, +#line 1271 "HTMLCharacterReference.gperf" + {"≄", "≄"}, +#line 468 "HTMLCharacterReference.gperf" + {"⦦", "⦦"}, + {""}, +#line 1138 "HTMLCharacterReference.gperf" + {"⩾̸", "⩾̸"}, + {""}, +#line 1720 "HTMLCharacterReference.gperf" + {"Ь", "Ь"}, + {""}, +#line 1172 "HTMLCharacterReference.gperf" + {"≮", "≮"}, +#line 1805 "HTMLCharacterReference.gperf" + {"¹", "¹"}, + {""}, {""}, +#line 1213 "HTMLCharacterReference.gperf" + {"≴", "≴"}, + {""}, +#line 1497 "HTMLCharacterReference.gperf" + {""", "\""}, +#line 1170 "HTMLCharacterReference.gperf" + {"≴", "≴"}, + {""}, {""}, +#line 417 "HTMLCharacterReference.gperf" + {"∸", "∸"}, +#line 877 "HTMLCharacterReference.gperf" + {"⤶", "⤶"}, + {""}, {""}, {""}, {""}, +#line 1806 "HTMLCharacterReference.gperf" + {"²", "²"}, + {""}, {""}, {""}, {""}, +#line 2071 "HTMLCharacterReference.gperf" + {"𝒲", "𝒲"}, + {""}, {""}, {""}, {""}, +#line 886 "HTMLCharacterReference.gperf" + {"←", "←"}, + {""}, +#line 2053 "HTMLCharacterReference.gperf" + {"⫌︀", "⫌︀"}, + {""}, +#line 1766 "HTMLCharacterReference.gperf" + {"⋐", "⋐"}, +#line 682 "HTMLCharacterReference.gperf" + {"≩︀", "≩︀"}, + {""}, {""}, {""}, {""}, +#line 1041 "HTMLCharacterReference.gperf" + {"≨︀", "≨︀"}, + {""}, +#line 870 "HTMLCharacterReference.gperf" + {"ľ", "ľ"}, +#line 889 "HTMLCharacterReference.gperf" + {"⇤", "⇤"}, +#line 708 "HTMLCharacterReference.gperf" + {"↩", "↩"}, + {""}, +#line 1216 "HTMLCharacterReference.gperf" + {"∌", "∌"}, +#line 476 "HTMLCharacterReference.gperf" + {"ě", "ě"}, + {""}, +#line 1804 "HTMLCharacterReference.gperf" + {"⊃", "⊃"}, + {""}, +#line 787 "HTMLCharacterReference.gperf" + {"⋹", "⋹"}, +#line 366 "HTMLCharacterReference.gperf" + {"ď", "ď"}, + {""}, {""}, +#line 2118 "HTMLCharacterReference.gperf" + {"Ю", "Ю"}, + {""}, +#line 857 "HTMLCharacterReference.gperf" + {"⤛", "⤛"}, + {""}, +#line 549 "HTMLCharacterReference.gperf" + {"ð", "ð"}, +#line 1807 "HTMLCharacterReference.gperf" + {"³", "³"}, + {""}, +#line 77 "HTMLCharacterReference.gperf" + {"⩯", "⩯"}, + {""}, +#line 1801 "HTMLCharacterReference.gperf" + {"∑", "∑"}, + {""}, +#line 399 "HTMLCharacterReference.gperf" + {"⋲", "⋲"}, +#line 2070 "HTMLCharacterReference.gperf" + {"≀", "≀"}, + {""}, +#line 46 "HTMLCharacterReference.gperf" + {"&", "&"}, + {""}, {""}, {""}, {""}, {""}, +#line 2016 "HTMLCharacterReference.gperf" + {"⫫", "⫫"}, + {""}, +#line 610 "HTMLCharacterReference.gperf" + {"ğ", "ğ"}, + {""}, {""}, {""}, +#line 1036 "HTMLCharacterReference.gperf" + {"◂", "◂"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 465 "HTMLCharacterReference.gperf" + {"▾", "▾"}, +#line 2125 "HTMLCharacterReference.gperf" + {"ž", "ž"}, + {""}, +#line 1015 "HTMLCharacterReference.gperf" + {"↰", "↰"}, + {""}, {""}, {""}, +#line 1283 "HTMLCharacterReference.gperf" + {"⪰̸", "⪰̸"}, + {""}, +#line 1689 "HTMLCharacterReference.gperf" + {"←", "←"}, +#line 540 "HTMLCharacterReference.gperf" + {"≓", "≓"}, + {""}, {""}, +#line 1159 "HTMLCharacterReference.gperf" + {"≰", "≰"}, +#line 1308 "HTMLCharacterReference.gperf" + {"≥⃒", "≥⃒"}, +#line 1260 "HTMLCharacterReference.gperf" + {"⋫", "⋫"}, +#line 23 "HTMLCharacterReference.gperf" + {"ă", "ă"}, + {""}, {""}, {""}, {""}, +#line 365 "HTMLCharacterReference.gperf" + {"Ď", "Ď"}, + {""}, {""}, {""}, +#line 1463 "HTMLCharacterReference.gperf" + {"″", "″"}, +#line 242 "HTMLCharacterReference.gperf" + {"č", "č"}, + {""}, {""}, +#line 311 "HTMLCharacterReference.gperf" + {"©", "©"}, + {""}, +#line 782 "HTMLCharacterReference.gperf" + {"¿", "¿"}, + {""}, {""}, {""}, {""}, +#line 758 "HTMLCharacterReference.gperf" + {"℅", "℅"}, + {""}, {""}, +#line 931 "HTMLCharacterReference.gperf" + {"⋚︀", "⋚︀"}, +#line 2087 "HTMLCharacterReference.gperf" + {"⨀", "⨀"}, + {""}, {""}, {""}, +#line 250 "HTMLCharacterReference.gperf" + {"Ċ", "Ċ"}, + {""}, {""}, {""}, +#line 1416 "HTMLCharacterReference.gperf" + {"Φ", "Φ"}, + {""}, {""}, {""}, {""}, +#line 26 "HTMLCharacterReference.gperf" + {"∾̳", "∾̳"}, + {""}, +#line 167 "HTMLCharacterReference.gperf" + {"┐", "┐"}, + {""}, {""}, {""}, +#line 1686 "HTMLCharacterReference.gperf" + {"Ш", "Ш"}, +#line 240 "HTMLCharacterReference.gperf" + {"⩍", "⩍"}, + {""}, {""}, {""}, {""}, +#line 164 "HTMLCharacterReference.gperf" + {"╗", "╗"}, + {""}, {""}, {""}, +#line 391 "HTMLCharacterReference.gperf" + {"⋄", "⋄"}, +#line 171 "HTMLCharacterReference.gperf" + {"┌", "┌"}, +#line 37 "HTMLCharacterReference.gperf" + {"À", "À"}, + {""}, {""}, {""}, {""}, {""}, +#line 374 "HTMLCharacterReference.gperf" + {"⩷", "⩷"}, +#line 266 "HTMLCharacterReference.gperf" + {"○", "○"}, + {""}, {""}, +#line 1310 "HTMLCharacterReference.gperf" + {"⤄", "⤄"}, + {""}, +#line 1681 "HTMLCharacterReference.gperf" + {"𝔰", "𝔰"}, + {""}, +#line 29 "HTMLCharacterReference.gperf" + {"´", "´"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 175 "HTMLCharacterReference.gperf" + {"╤", "╤"}, + {""}, {""}, +#line 1743 "HTMLCharacterReference.gperf" + {"□", "□"}, +#line 2034 "HTMLCharacterReference.gperf" + {"|", "|"}, + {""}, +#line 869 "HTMLCharacterReference.gperf" + {"Ľ", "Ľ"}, + {""}, {""}, +#line 1761 "HTMLCharacterReference.gperf" + {"☆", "☆"}, + {""}, +#line 2091 "HTMLCharacterReference.gperf" + {"⨂", "⨂"}, + {""}, +#line 400 "HTMLCharacterReference.gperf" + {"÷", "÷"}, +#line 732 "HTMLCharacterReference.gperf" + {"е", "е"}, + {""}, {""}, +#line 739 "HTMLCharacterReference.gperf" + {"ⅈ", "ⅈ"}, + {""}, {""}, {""}, +#line 1923 "HTMLCharacterReference.gperf" + {"Ŭ", "Ŭ"}, + {""}, {""}, +#line 1103 "HTMLCharacterReference.gperf" + {"⩃", "⩃"}, +#line 1765 "HTMLCharacterReference.gperf" + {"¯", "¯"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 248 "HTMLCharacterReference.gperf" + {"⩌", "⩌"}, + {""}, {""}, +#line 396 "HTMLCharacterReference.gperf" + {"¨", "¨"}, +#line 318 "HTMLCharacterReference.gperf" + {"𝒞", "𝒞"}, +#line 1156 "HTMLCharacterReference.gperf" + {"↚", "↚"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 550 "HTMLCharacterReference.gperf" + {"Ë", "Ë"}, +#line 1978 "HTMLCharacterReference.gperf" + {"⊥", "⊥"}, + {""}, {""}, {""}, {""}, +#line 1007 "HTMLCharacterReference.gperf" + {"⇋", "⇋"}, + {""}, {""}, +#line 1014 "HTMLCharacterReference.gperf" + {"↰", "↰"}, + {""}, {""}, +#line 1539 "HTMLCharacterReference.gperf" + {"Ř", "Ř"}, + {""}, {""}, {""}, +#line 488 "HTMLCharacterReference.gperf" + {"≒", "≒"}, + {""}, {""}, +#line 921 "HTMLCharacterReference.gperf" + {"⪋", "⪋"}, + {""}, {""}, +#line 676 "HTMLCharacterReference.gperf" + {"⋗", "⋗"}, + {""}, {""}, +#line 1164 "HTMLCharacterReference.gperf" + {"≰", "≰"}, +#line 1165 "HTMLCharacterReference.gperf" + {"≦̸", "≦̸"}, +#line 737 "HTMLCharacterReference.gperf" + {"Ì", "Ì"}, + {""}, {""}, {""}, +#line 1754 "HTMLCharacterReference.gperf" + {"→", "→"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1729 "HTMLCharacterReference.gperf" + {"∥", "∥"}, + {""}, {""}, {""}, {""}, +#line 1110 "HTMLCharacterReference.gperf" + {"⩂", "⩂"}, + {""}, +#line 1211 "HTMLCharacterReference.gperf" + {"≪̸", "≪̸"}, + {""}, {""}, {""}, {""}, +#line 1651 "HTMLCharacterReference.gperf" + {"Š", "Š"}, + {""}, {""}, +#line 2033 "HTMLCharacterReference.gperf" + {"‖", "‖"}, +#line 1979 "HTMLCharacterReference.gperf" + {"↥", "↥"}, + {""}, {""}, {""}, {""}, {""}, +#line 1757 "HTMLCharacterReference.gperf" + {"∖", "∖"}, + {""}, +#line 1631 "HTMLCharacterReference.gperf" + {"↱", "↱"}, + {""}, +#line 467 "HTMLCharacterReference.gperf" + {"⥯", "⥯"}, +#line 298 "HTMLCharacterReference.gperf" + {"∘", "∘"}, + {""}, +#line 546 "HTMLCharacterReference.gperf" + {"Η", "Η"}, + {""}, +#line 1820 "HTMLCharacterReference.gperf" + {"⊋", "⊋"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 609 "HTMLCharacterReference.gperf" + {"Ğ", "Ğ"}, + {""}, +#line 25 "HTMLCharacterReference.gperf" + {"∿", "∿"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 2095 "HTMLCharacterReference.gperf" + {"𝓍", "𝓍"}, + {""}, {""}, {""}, +#line 1158 "HTMLCharacterReference.gperf" + {"≦̸", "≦̸"}, + {""}, {""}, {""}, {""}, {""}, +#line 888 "HTMLCharacterReference.gperf" + {"←", "←"}, + {""}, +#line 144 "HTMLCharacterReference.gperf" + {"▪", "▪"}, +#line 1149 "HTMLCharacterReference.gperf" + {"∋", "∋"}, + {""}, {""}, {""}, {""}, {""}, +#line 2027 "HTMLCharacterReference.gperf" + {"∨", "∨"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1018 "HTMLCharacterReference.gperf" + {"⪏", "⪏"}, + {""}, {""}, {""}, {""}, +#line 395 "HTMLCharacterReference.gperf" + {"♦", "♦"}, +#line 1751 "HTMLCharacterReference.gperf" + {"⊔", "⊔"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 337 "HTMLCharacterReference.gperf" + {"⊍", "⊍"}, + {""}, +#line 489 "HTMLCharacterReference.gperf" + {"𝔈", "𝔈"}, +#line 1288 "HTMLCharacterReference.gperf" + {"⊉", "⊉"}, +#line 1289 "HTMLCharacterReference.gperf" + {"⫆̸", "⫆̸"}, +#line 1105 "HTMLCharacterReference.gperf" + {"ň", "ň"}, + {""}, {""}, {""}, +#line 86 "HTMLCharacterReference.gperf" + {"å", "å"}, + {""}, {""}, {""}, +#line 267 "HTMLCharacterReference.gperf" + {"ˆ", "ˆ"}, + {""}, {""}, {""}, +#line 1150 "HTMLCharacterReference.gperf" + {"⋼", "⋼"}, + {""}, +#line 1833 "HTMLCharacterReference.gperf" + {"↙", "↙"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1248 "HTMLCharacterReference.gperf" + {"⨔", "⨔"}, + {""}, {""}, +#line 483 "HTMLCharacterReference.gperf" + {"⩷", "⩷"}, +#line 1008 "HTMLCharacterReference.gperf" + {"⥭", "⥭"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1487 "HTMLCharacterReference.gperf" + {"⨌", "⨌"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1104 "HTMLCharacterReference.gperf" + {"Ň", "Ň"}, + {""}, {""}, {""}, {""}, {""}, +#line 1821 "HTMLCharacterReference.gperf" + {"⫀", "⫀"}, + {""}, +#line 639 "HTMLCharacterReference.gperf" + {"ѓ", "ѓ"}, + {""}, {""}, +#line 1494 "HTMLCharacterReference.gperf" + {"⨖", "⨖"}, + {""}, +#line 957 "HTMLCharacterReference.gperf" + {"љ", "љ"}, + {""}, {""}, +#line 786 "HTMLCharacterReference.gperf" + {"⋵", "⋵"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 405 "HTMLCharacterReference.gperf" + {"ђ", "ђ"}, + {""}, {""}, {""}, +#line 2026 "HTMLCharacterReference.gperf" + {"⋁", "⋁"}, + {""}, +#line 515 "HTMLCharacterReference.gperf" + {"Ę", "Ę"}, +#line 932 "HTMLCharacterReference.gperf" + {"⪓", "⪓"}, + {""}, {""}, +#line 1264 "HTMLCharacterReference.gperf" + {"⪰̸", "⪰̸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 660 "HTMLCharacterReference.gperf" + {"≳", "≳"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 874 "HTMLCharacterReference.gperf" + {"{", "{"}, +#line 1984 "HTMLCharacterReference.gperf" + {"Ů", "Ů"}, + {""}, {""}, {""}, {""}, {""}, +#line 406 "HTMLCharacterReference.gperf" + {"⌞", "⌞"}, + {""}, {""}, +#line 887 "HTMLCharacterReference.gperf" + {"⇐", "⇐"}, +#line 1190 "HTMLCharacterReference.gperf" + {"≯", "≯"}, +#line 2054 "HTMLCharacterReference.gperf" + {"⊋︀", "⊋︀"}, +#line 249 "HTMLCharacterReference.gperf" + {"⩐", "⩐"}, + {""}, +#line 2136 "HTMLCharacterReference.gperf" + {"Ж", "Ж"}, +#line 1683 "HTMLCharacterReference.gperf" + {"♯", "♯"}, + {""}, {""}, {""}, {""}, {""}, +#line 407 "HTMLCharacterReference.gperf" + {"⌍", "⌍"}, + {""}, +#line 60 "HTMLCharacterReference.gperf" + {"⦪", "⦪"}, +#line 1679 "HTMLCharacterReference.gperf" + {"✶", "✶"}, + {""}, {""}, {""}, {""}, {""}, +#line 1819 "HTMLCharacterReference.gperf" + {"⫌", "⫌"}, +#line 1759 "HTMLCharacterReference.gperf" + {"⋆", "⋆"}, + {""}, {""}, {""}, {""}, +#line 994 "HTMLCharacterReference.gperf" + {"⨭", "⨭"}, + {""}, {""}, +#line 1971 "HTMLCharacterReference.gperf" + {"↖", "↖"}, + {""}, {""}, +#line 1279 "HTMLCharacterReference.gperf" + {"⊂⃒", "⊂⃒"}, +#line 1152 "HTMLCharacterReference.gperf" + {"∋", "∋"}, +#line 2050 "HTMLCharacterReference.gperf" + {"𝓋", "𝓋"}, + {""}, +#line 761 "HTMLCharacterReference.gperf" + {"ı", "ı"}, +#line 523 "HTMLCharacterReference.gperf" + {"Ε", "Ε"}, + {""}, +#line 339 "HTMLCharacterReference.gperf" + {"∪︀", "∪︀"}, + {""}, +#line 22 "HTMLCharacterReference.gperf" + {"Ă", "Ă"}, +#line 1421 "HTMLCharacterReference.gperf" + {"Π", "Π"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 94 "HTMLCharacterReference.gperf" + {"ã", "ã"}, + {""}, +#line 1949 "HTMLCharacterReference.gperf" + {"_", "_"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 531 "HTMLCharacterReference.gperf" + {"⩵", "⩵"}, + {""}, +#line 433 "HTMLCharacterReference.gperf" + {"⇕", "⇕"}, +#line 538 "HTMLCharacterReference.gperf" + {"⧥", "⧥"}, +#line 1675 "HTMLCharacterReference.gperf" + {";", ";"}, + {""}, {""}, {""}, {""}, {""}, +#line 170 "HTMLCharacterReference.gperf" + {"╒", "╒"}, + {""}, {""}, {""}, +#line 81 "HTMLCharacterReference.gperf" + {"'", "'"}, + {""}, +#line 2051 "HTMLCharacterReference.gperf" + {"⫋︀", "⫋︀"}, +#line 2081 "HTMLCharacterReference.gperf" + {"Ξ", "Ξ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 264 "HTMLCharacterReference.gperf" + {"Χ", "Χ"}, + {""}, +#line 1678 "HTMLCharacterReference.gperf" + {"∖", "∖"}, + {""}, +#line 302 "HTMLCharacterReference.gperf" + {"⩭", "⩭"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 908 "HTMLCharacterReference.gperf" + {"⊣", "⊣"}, + {""}, {""}, {""}, +#line 1323 "HTMLCharacterReference.gperf" + {"⤧", "⤧"}, + {""}, {""}, +#line 1157 "HTMLCharacterReference.gperf" + {"‥", "‥"}, +#line 599 "HTMLCharacterReference.gperf" + {"⁄", "⁄"}, +#line 334 "HTMLCharacterReference.gperf" + {"≍", "≍"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 821 "HTMLCharacterReference.gperf" + {"𝔨", "𝔨"}, + {""}, {""}, {""}, {""}, +#line 133 "HTMLCharacterReference.gperf" + {"⨁", "⨁"}, +#line 2049 "HTMLCharacterReference.gperf" + {"𝒱", "𝒱"}, + {""}, +#line 1752 "HTMLCharacterReference.gperf" + {"▪", "▪"}, + {""}, {""}, +#line 51 "HTMLCharacterReference.gperf" + {"⩜", "⩜"}, + {""}, +#line 2124 "HTMLCharacterReference.gperf" + {"Ž", "Ž"}, + {""}, +#line 1214 "HTMLCharacterReference.gperf" + {"⪢̸", "⪢̸"}, +#line 890 "HTMLCharacterReference.gperf" + {"⇆", "⇆"}, +#line 288 "HTMLCharacterReference.gperf" + {"♣", "♣"}, + {""}, {""}, {""}, +#line 773 "HTMLCharacterReference.gperf" + {"Ё", "Ё"}, + {""}, +#line 1990 "HTMLCharacterReference.gperf" + {"Ũ", "Ũ"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 961 "HTMLCharacterReference.gperf" + {"⌞", "⌞"}, + {""}, {""}, {""}, {""}, {""}, +#line 1166 "HTMLCharacterReference.gperf" + {"⩽̸", "⩽̸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1558 "HTMLCharacterReference.gperf" + {"®", "®"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1758 "HTMLCharacterReference.gperf" + {"⌣", "⌣"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 268 "HTMLCharacterReference.gperf" + {"≗", "≗"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2112 "HTMLCharacterReference.gperf" + {"Ї", "Ї"}, + {""}, +#line 530 "HTMLCharacterReference.gperf" + {"⪕", "⪕"}, + {""}, {""}, {""}, {""}, +#line 1261 "HTMLCharacterReference.gperf" + {"⋭", "⋭"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 793 "HTMLCharacterReference.gperf" + {"ĩ", "ĩ"}, +#line 1253 "HTMLCharacterReference.gperf" + {"⪯̸", "⪯̸"}, + {""}, {""}, {""}, {""}, {""}, +#line 645 "HTMLCharacterReference.gperf" + {"⪊", "⪊"}, + {""}, {""}, {""}, {""}, +#line 970 "HTMLCharacterReference.gperf" + {"⪉", "⪉"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 432 "HTMLCharacterReference.gperf" + {"⇑", "⇑"}, + {""}, {""}, +#line 658 "HTMLCharacterReference.gperf" + {"≷", "≷"}, + {""}, +#line 820 "HTMLCharacterReference.gperf" + {"𝔎", "𝔎"}, +#line 2099 "HTMLCharacterReference.gperf" + {"⋁", "⋁"}, + {""}, {""}, {""}, {""}, {""}, +#line 317 "HTMLCharacterReference.gperf" + {"✗", "✗"}, + {""}, {""}, {""}, {""}, +#line 166 "HTMLCharacterReference.gperf" + {"╕", "╕"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1475 "HTMLCharacterReference.gperf" + {"∷", "∷"}, + {""}, {""}, {""}, {""}, {""}, +#line 1011 "HTMLCharacterReference.gperf" + {"‹", "‹"}, + {""}, {""}, +#line 1154 "HTMLCharacterReference.gperf" + {"њ", "њ"}, + {""}, +#line 1307 "HTMLCharacterReference.gperf" + {"⊬", "⊬"}, + {""}, +#line 453 "HTMLCharacterReference.gperf" + {"⤐", "⤐"}, +#line 419 "HTMLCharacterReference.gperf" + {"⊡", "⊡"}, + {""}, {""}, +#line 537 "HTMLCharacterReference.gperf" + {"⩸", "⩸"}, + {""}, {""}, {""}, +#line 627 "HTMLCharacterReference.gperf" + {"⪀", "⪀"}, + {""}, {""}, +#line 1667 "HTMLCharacterReference.gperf" + {"⋅", "⋅"}, + {""}, +#line 928 "HTMLCharacterReference.gperf" + {"⩿", "⩿"}, + {""}, {""}, {""}, {""}, +#line 1388 "HTMLCharacterReference.gperf" + {"Õ", "Õ"}, +#line 909 "HTMLCharacterReference.gperf" + {"↤", "↤"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 336 "HTMLCharacterReference.gperf" + {"⩊", "⩊"}, + {""}, {""}, {""}, +#line 85 "HTMLCharacterReference.gperf" + {"Å", "Å"}, + {""}, {""}, {""}, +#line 900 "HTMLCharacterReference.gperf" + {"⇇", "⇇"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 477 "HTMLCharacterReference.gperf" + {"≖", "≖"}, +#line 1672 "HTMLCharacterReference.gperf" + {"↘", "↘"}, +#line 495 "HTMLCharacterReference.gperf" + {"⪘", "⪘"}, + {""}, {""}, {""}, {""}, {""}, +#line 253 "HTMLCharacterReference.gperf" + {"¸", "¸"}, + {""}, {""}, +#line 70 "HTMLCharacterReference.gperf" + {"Å", "Å"}, + {""}, {""}, {""}, {""}, +#line 328 "HTMLCharacterReference.gperf" + {"⋟", "⋟"}, + {""}, {""}, {""}, {""}, +#line 112 "HTMLCharacterReference.gperf" + {"≌", "≌"}, + {""}, {""}, {""}, {""}, +#line 1302 "HTMLCharacterReference.gperf" + {" ", " "}, + {""}, {""}, +#line 1457 "HTMLCharacterReference.gperf" + {"≾", "≾"}, + {""}, {""}, +#line 1292 "HTMLCharacterReference.gperf" + {"ñ", "ñ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 260 "HTMLCharacterReference.gperf" + {"Ч", "Ч"}, +#line 151 "HTMLCharacterReference.gperf" + {"░", "░"}, + {""}, {""}, {""}, +#line 1756 "HTMLCharacterReference.gperf" + {"𝓈", "𝓈"}, +#line 1127 "HTMLCharacterReference.gperf" + {"≫", "≫"}, + {""}, +#line 398 "HTMLCharacterReference.gperf" + {"ϝ", "ϝ"}, +#line 930 "HTMLCharacterReference.gperf" + {"⪃", "⪃"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 582 "HTMLCharacterReference.gperf" + {"ℱ", "ℱ"}, + {""}, {""}, {""}, +#line 897 "HTMLCharacterReference.gperf" + {"⌊", "⌊"}, +#line 2098 "HTMLCharacterReference.gperf" + {"△", "△"}, + {""}, {""}, {""}, {""}, {""}, +#line 1291 "HTMLCharacterReference.gperf" + {"Ñ", "Ñ"}, + {""}, {""}, {""}, +#line 150 "HTMLCharacterReference.gperf" + {"▒", "▒"}, + {""}, {""}, {""}, {""}, +#line 953 "HTMLCharacterReference.gperf" + {"↼", "↼"}, + {""}, {""}, {""}, +#line 1280 "HTMLCharacterReference.gperf" + {"⊈", "⊈"}, +#line 1281 "HTMLCharacterReference.gperf" + {"⫅̸", "⫅̸"}, + {""}, {""}, {""}, {""}, +#line 1730 "HTMLCharacterReference.gperf" + {"⊓", "⊓"}, +#line 687 "HTMLCharacterReference.gperf" + {"Ъ", "Ъ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 152 "HTMLCharacterReference.gperf" + {"▓", "▓"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 435 "HTMLCharacterReference.gperf" + {"↓", "↓"}, + {""}, +#line 954 "HTMLCharacterReference.gperf" + {"⥪", "⥪"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 760 "HTMLCharacterReference.gperf" + {"⧝", "⧝"}, +#line 2073 "HTMLCharacterReference.gperf" + {"⋂", "⋂"}, + {""}, {""}, +#line 438 "HTMLCharacterReference.gperf" + {"⤓", "⤓"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 2084 "HTMLCharacterReference.gperf" + {"⟵", "⟵"}, +#line 1745 "HTMLCharacterReference.gperf" + {"□", "□"}, + {""}, {""}, {""}, {""}, +#line 93 "HTMLCharacterReference.gperf" + {"Ã", "Ã"}, + {""}, {""}, {""}, +#line 613 "HTMLCharacterReference.gperf" + {"ĝ", "ĝ"}, + {""}, {""}, {""}, {""}, +#line 861 "HTMLCharacterReference.gperf" + {"⤎", "⤎"}, +#line 1731 "HTMLCharacterReference.gperf" + {"⊓︀", "⊓︀"}, + {""}, {""}, {""}, +#line 479 "HTMLCharacterReference.gperf" + {"ê", "ê"}, +#line 1125 "HTMLCharacterReference.gperf" + {"⤨", "⤨"}, + {""}, {""}, +#line 484 "HTMLCharacterReference.gperf" + {"Ė", "Ė"}, +#line 1205 "HTMLCharacterReference.gperf" + {"⋪", "⋪"}, +#line 996 "HTMLCharacterReference.gperf" + {"∗", "∗"}, + {""}, +#line 1206 "HTMLCharacterReference.gperf" + {"⧏̸", "⧏̸"}, +#line 404 "HTMLCharacterReference.gperf" + {"Ђ", "Ђ"}, +#line 1207 "HTMLCharacterReference.gperf" + {"⋬", "⋬"}, + {""}, {""}, {""}, {""}, +#line 798 "HTMLCharacterReference.gperf" + {"Ĵ", "Ĵ"}, + {""}, +#line 393 "HTMLCharacterReference.gperf" + {"⋄", "⋄"}, + {""}, +#line 1811 "HTMLCharacterReference.gperf" + {"⊇", "⊇"}, +#line 28 "HTMLCharacterReference.gperf" + {"â", "â"}, +#line 1030 "HTMLCharacterReference.gperf" + {"⋋", "⋋"}, +#line 1215 "HTMLCharacterReference.gperf" + {"⪡̸", "⪡̸"}, + {""}, {""}, {""}, +#line 241 "HTMLCharacterReference.gperf" + {"Č", "Č"}, + {""}, {""}, +#line 303 "HTMLCharacterReference.gperf" + {"≡", "≡"}, +#line 964 "HTMLCharacterReference.gperf" + {"◺", "◺"}, + {""}, +#line 1817 "HTMLCharacterReference.gperf" + {"⥻", "⥻"}, + {""}, +#line 2075 "HTMLCharacterReference.gperf" + {"⋃", "⋃"}, + {""}, {""}, {""}, {""}, +#line 1787 "HTMLCharacterReference.gperf" + {"≻", "≻"}, + {""}, +#line 1684 "HTMLCharacterReference.gperf" + {"Щ", "Щ"}, + {""}, {""}, {""}, {""}, +#line 2052 "HTMLCharacterReference.gperf" + {"⊊︀", "⊊︀"}, + {""}, {""}, {""}, +#line 696 "HTMLCharacterReference.gperf" + {"ĥ", "ĥ"}, + {""}, {""}, {""}, {""}, +#line 2048 "HTMLCharacterReference.gperf" + {"⊳", "⊳"}, + {""}, {""}, {""}, {""}, {""}, +#line 394 "HTMLCharacterReference.gperf" + {"♦", "♦"}, +#line 628 "HTMLCharacterReference.gperf" + {"⪂", "⪂"}, + {""}, {""}, +#line 1669 "HTMLCharacterReference.gperf" + {"⩦", "⩦"}, +#line 1493 "HTMLCharacterReference.gperf" + {"ℍ", "ℍ"}, +#line 929 "HTMLCharacterReference.gperf" + {"⪁", "⪁"}, + {""}, {""}, +#line 246 "HTMLCharacterReference.gperf" + {"ĉ", "ĉ"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 392 "HTMLCharacterReference.gperf" + {"⋄", "⋄"}, + {""}, {""}, +#line 743 "HTMLCharacterReference.gperf" + {"℩", "℩"}, + {""}, {""}, {""}, {""}, +#line 1692 "HTMLCharacterReference.gperf" + {"→", "→"}, +#line 792 "HTMLCharacterReference.gperf" + {"Ĩ", "Ĩ"}, + {""}, {""}, +#line 956 "HTMLCharacterReference.gperf" + {"Љ", "Љ"}, + {""}, {""}, {""}, {""}, +#line 541 "HTMLCharacterReference.gperf" + {"ℰ", "ℰ"}, +#line 637 "HTMLCharacterReference.gperf" + {"ℷ", "ℷ"}, +#line 1721 "HTMLCharacterReference.gperf" + {"ь", "ь"}, +#line 2082 "HTMLCharacterReference.gperf" + {"ξ", "ξ"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 347 "HTMLCharacterReference.gperf" + {"↶", "↶"}, +#line 1495 "HTMLCharacterReference.gperf" + {"?", "?"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 512 "HTMLCharacterReference.gperf" + {"Ŋ", "Ŋ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1194 "HTMLCharacterReference.gperf" + {"≹", "≹"}, +#line 1925 "HTMLCharacterReference.gperf" + {"Û", "Û"}, +#line 2059 "HTMLCharacterReference.gperf" + {"⩟", "⩟"}, + {""}, {""}, {""}, +#line 799 "HTMLCharacterReference.gperf" + {"ĵ", "ĵ"}, + {""}, +#line 1960 "HTMLCharacterReference.gperf" + {"⇑", "⇑"}, + {""}, +#line 80 "HTMLCharacterReference.gperf" + {"≋", "≋"}, + {""}, {""}, {""}, +#line 1790 "HTMLCharacterReference.gperf" + {"≻", "≻"}, + {""}, {""}, {""}, {""}, {""}, +#line 281 "HTMLCharacterReference.gperf" + {"≗", "≗"}, + {""}, {""}, {""}, +#line 981 "HTMLCharacterReference.gperf" + {"⟵", "⟵"}, + {""}, {""}, +#line 1305 "HTMLCharacterReference.gperf" + {"⊮", "⊮"}, + {""}, {""}, {""}, +#line 2105 "HTMLCharacterReference.gperf" + {"Ŷ", "Ŷ"}, + {""}, +#line 1085 "HTMLCharacterReference.gperf" + {"μ", "μ"}, +#line 1992 "HTMLCharacterReference.gperf" + {"▵", "▵"}, + {""}, +#line 1108 "HTMLCharacterReference.gperf" + {"≇", "≇"}, + {""}, {""}, {""}, {""}, +#line 1020 "HTMLCharacterReference.gperf" + {"‘", "‘"}, +#line 1021 "HTMLCharacterReference.gperf" + {"‚", "‚"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2106 "HTMLCharacterReference.gperf" + {"ŷ", "ŷ"}, + {""}, +#line 1080 "HTMLCharacterReference.gperf" + {"∓", "∓"}, + {""}, +#line 638 "HTMLCharacterReference.gperf" + {"Ѓ", "Ѓ"}, +#line 919 "HTMLCharacterReference.gperf" + {"↼", "↼"}, + {""}, {""}, {""}, {""}, {""}, +#line 1822 "HTMLCharacterReference.gperf" + {"⋑", "⋑"}, +#line 132 "HTMLCharacterReference.gperf" + {"⨀", "⨀"}, + {""}, {""}, +#line 985 "HTMLCharacterReference.gperf" + {"⟼", "⟼"}, + {""}, {""}, +#line 1996 "HTMLCharacterReference.gperf" + {"ü", "ü"}, +#line 553 "HTMLCharacterReference.gperf" + {"!", "!"}, +#line 727 "HTMLCharacterReference.gperf" + {"î", "î"}, + {""}, {""}, +#line 1974 "HTMLCharacterReference.gperf" + {"υ", "υ"}, +#line 544 "HTMLCharacterReference.gperf" + {"⩳", "⩳"}, + {""}, {""}, +#line 1202 "HTMLCharacterReference.gperf" + {"∉", "∉"}, + {""}, {""}, +#line 1530 "HTMLCharacterReference.gperf" + {"⤐", "⤐"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1254 "HTMLCharacterReference.gperf" + {"⇏", "⇏"}, +#line 439 "HTMLCharacterReference.gperf" + {"⇵", "⇵"}, + {""}, {""}, {""}, +#line 91 "HTMLCharacterReference.gperf" + {"≈", "≈"}, + {""}, {""}, +#line 910 "HTMLCharacterReference.gperf" + {"⥚", "⥚"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 612 "HTMLCharacterReference.gperf" + {"Ĝ", "Ĝ"}, + {""}, {""}, {""}, {""}, +#line 188 "HTMLCharacterReference.gperf" + {"┘", "┘"}, + {""}, {""}, {""}, +#line 310 "HTMLCharacterReference.gperf" + {"∐", "∐"}, +#line 1658 "HTMLCharacterReference.gperf" + {"Ŝ", "Ŝ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 192 "HTMLCharacterReference.gperf" + {"└", "└"}, +#line 2063 "HTMLCharacterReference.gperf" + {"℘", "℘"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 2061 "HTMLCharacterReference.gperf" + {"∧", "∧"}, + {""}, {""}, {""}, {""}, +#line 1328 "HTMLCharacterReference.gperf" + {"Ô", "Ô"}, + {""}, +#line 719 "HTMLCharacterReference.gperf" + {"≎", "≎"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 2058 "HTMLCharacterReference.gperf" + {"ŵ", "ŵ"}, + {""}, {""}, +#line 765 "HTMLCharacterReference.gperf" + {"ℤ", "ℤ"}, +#line 1674 "HTMLCharacterReference.gperf" + {"§", "§"}, + {""}, {""}, {""}, +#line 980 "HTMLCharacterReference.gperf" + {"⟸", "⟸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1802 "HTMLCharacterReference.gperf" + {"♪", "♪"}, + {""}, +#line 2062 "HTMLCharacterReference.gperf" + {"≙", "≙"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 698 "HTMLCharacterReference.gperf" + {"♥", "♥"}, +#line 1695 "HTMLCharacterReference.gperf" + {"Σ", "Σ"}, + {""}, +#line 1649 "HTMLCharacterReference.gperf" + {"≻", "≻"}, + {""}, +#line 1153 "HTMLCharacterReference.gperf" + {"Њ", "Њ"}, +#line 2029 "HTMLCharacterReference.gperf" + {"≚", "≚"}, + {""}, {""}, {""}, +#line 1367 "HTMLCharacterReference.gperf" + {"‘", "‘"}, + {""}, +#line 1083 "HTMLCharacterReference.gperf" + {"∾", "∾"}, +#line 1935 "HTMLCharacterReference.gperf" + {"𝔲", "𝔲"}, + {""}, +#line 2113 "HTMLCharacterReference.gperf" + {"ї", "ї"}, + {""}, {""}, {""}, {""}, +#line 1687 "HTMLCharacterReference.gperf" + {"ш", "ш"}, +#line 1320 "HTMLCharacterReference.gperf" + {"⇖", "⇖"}, + {""}, {""}, +#line 1062 "HTMLCharacterReference.gperf" + {"𝔪", "𝔪"}, +#line 830 "HTMLCharacterReference.gperf" + {"𝓀", "𝓀"}, + {""}, +#line 1794 "HTMLCharacterReference.gperf" + {"⪰", "⪰"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 437 "HTMLCharacterReference.gperf" + {"↓", "↓"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1173 "HTMLCharacterReference.gperf" + {"⋪", "⋪"}, +#line 83 "HTMLCharacterReference.gperf" + {"≈", "≈"}, + {""}, {""}, {""}, +#line 1668 "HTMLCharacterReference.gperf" + {"⊡", "⊡"}, +#line 129 "HTMLCharacterReference.gperf" + {"⋂", "⋂"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1229 "HTMLCharacterReference.gperf" + {"⊐̸", "⊐̸"}, + {""}, {""}, +#line 1087 "HTMLCharacterReference.gperf" + {"⊸", "⊸"}, + {""}, +#line 1230 "HTMLCharacterReference.gperf" + {"⋣", "⋣"}, +#line 1239 "HTMLCharacterReference.gperf" + {"≁", "≁"}, + {""}, {""}, {""}, +#line 1322 "HTMLCharacterReference.gperf" + {"↖", "↖"}, + {""}, {""}, +#line 741 "HTMLCharacterReference.gperf" + {"∭", "∭"}, +#line 401 "HTMLCharacterReference.gperf" + {"÷", "÷"}, + {""}, +#line 1109 "HTMLCharacterReference.gperf" + {"⩭̸", "⩭̸"}, + {""}, {""}, {""}, {""}, {""}, +#line 1956 "HTMLCharacterReference.gperf" + {"ų", "ų"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 436 "HTMLCharacterReference.gperf" + {"⇓", "⇓"}, + {""}, +#line 421 "HTMLCharacterReference.gperf" + {"∯", "∯"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1767 "HTMLCharacterReference.gperf" + {"⊂", "⊂"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1664 "HTMLCharacterReference.gperf" + {"≿", "≿"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 498 "HTMLCharacterReference.gperf" + {"⏧", "⏧"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 27 "HTMLCharacterReference.gperf" + {"Â", "Â"}, + {""}, {""}, +#line 1294 "HTMLCharacterReference.gperf" + {"⋪", "⋪"}, + {""}, +#line 695 "HTMLCharacterReference.gperf" + {"Ĥ", "Ĥ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1220 "HTMLCharacterReference.gperf" + {"⊀", "⊀"}, + {""}, {""}, +#line 829 "HTMLCharacterReference.gperf" + {"𝒦", "𝒦"}, +#line 1146 "HTMLCharacterReference.gperf" + {"⇎", "⇎"}, + {""}, {""}, +#line 1691 "HTMLCharacterReference.gperf" + {"∥", "∥"}, + {""}, {""}, +#line 740 "HTMLCharacterReference.gperf" + {"⨌", "⨌"}, +#line 2138 "HTMLCharacterReference.gperf" + {"⇝", "⇝"}, +#line 1038 "HTMLCharacterReference.gperf" + {"⥊", "⥊"}, +#line 1994 "HTMLCharacterReference.gperf" + {"⇈", "⇈"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1063 "HTMLCharacterReference.gperf" + {"℧", "℧"}, + {""}, {""}, +#line 1977 "HTMLCharacterReference.gperf" + {"υ", "υ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1139 "HTMLCharacterReference.gperf" + {"⩾̸", "⩾̸"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 501 "HTMLCharacterReference.gperf" + {"⪗", "⪗"}, + {""}, +#line 1918 "HTMLCharacterReference.gperf" + {"⇑", "⇑"}, + {""}, {""}, {""}, {""}, {""}, +#line 1810 "HTMLCharacterReference.gperf" + {"⫆", "⫆"}, +#line 1972 "HTMLCharacterReference.gperf" + {"↗", "↗"}, + {""}, {""}, +#line 416 "HTMLCharacterReference.gperf" + {"≐", "≐"}, +#line 1650 "HTMLCharacterReference.gperf" + {"⪸", "⪸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 451 "HTMLCharacterReference.gperf" + {"⊤", "⊤"}, + {""}, {""}, {""}, {""}, +#line 893 "HTMLCharacterReference.gperf" + {"⟦", "⟦"}, +#line 1655 "HTMLCharacterReference.gperf" + {"⪰", "⪰"}, + {""}, +#line 1710 "HTMLCharacterReference.gperf" + {"←", "←"}, +#line 140 "HTMLCharacterReference.gperf" + {"⋁", "⋁"}, + {""}, {""}, +#line 935 "HTMLCharacterReference.gperf" + {"⋚", "⋚"}, + {""}, {""}, {""}, +#line 548 "HTMLCharacterReference.gperf" + {"Ð", "Ð"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1186 "HTMLCharacterReference.gperf" + {"∉", "∉"}, + {""}, {""}, +#line 84 "HTMLCharacterReference.gperf" + {"≊", "≊"}, + {""}, +#line 726 "HTMLCharacterReference.gperf" + {"Î", "Î"}, +#line 492 "HTMLCharacterReference.gperf" + {"È", "È"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1660 "HTMLCharacterReference.gperf" + {"⪺", "⪺"}, +#line 283 "HTMLCharacterReference.gperf" + {"⫯", "⫯"}, + {""}, {""}, +#line 422 "HTMLCharacterReference.gperf" + {"¨", "¨"}, + {""}, +#line 2032 "HTMLCharacterReference.gperf" + {"|", "|"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1456 "HTMLCharacterReference.gperf" + {"≼", "≼"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2090 "HTMLCharacterReference.gperf" + {"⨁", "⨁"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 2076 "HTMLCharacterReference.gperf" + {"▽", "▽"}, +#line 2097 "HTMLCharacterReference.gperf" + {"⨄", "⨄"}, + {""}, +#line 767 "HTMLCharacterReference.gperf" + {"⊺", "⊺"}, +#line 280 "HTMLCharacterReference.gperf" + {"⧃", "⧃"}, + {""}, {""}, {""}, +#line 1879 "HTMLCharacterReference.gperf" + {"⊤", "⊤"}, + {""}, +#line 423 "HTMLCharacterReference.gperf" + {"⇓", "⇓"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 731 "HTMLCharacterReference.gperf" + {"Е", "Е"}, + {""}, {""}, +#line 558 "HTMLCharacterReference.gperf" + {"ⅇ", "ⅇ"}, + {""}, {""}, +#line 191 "HTMLCharacterReference.gperf" + {"╘", "╘"}, + {""}, {""}, {""}, +#line 1939 "HTMLCharacterReference.gperf" + {"↿", "↿"}, +#line 316 "HTMLCharacterReference.gperf" + {"⨯", "⨯"}, + {""}, +#line 768 "HTMLCharacterReference.gperf" + {"⋂", "⋂"}, + {""}, {""}, {""}, +#line 963 "HTMLCharacterReference.gperf" + {"⥫", "⥫"}, + {""}, +#line 52 "HTMLCharacterReference.gperf" + {"⩘", "⩘"}, + {""}, {""}, {""}, +#line 673 "HTMLCharacterReference.gperf" + {"⩼", "⩼"}, + {""}, +#line 1940 "HTMLCharacterReference.gperf" + {"↾", "↾"}, + {""}, +#line 2031 "HTMLCharacterReference.gperf" + {"‖", "‖"}, +#line 1033 "HTMLCharacterReference.gperf" + {"⩻", "⩻"}, + {""}, {""}, +#line 179 "HTMLCharacterReference.gperf" + {"╧", "╧"}, +#line 1652 "HTMLCharacterReference.gperf" + {"š", "š"}, + {""}, +#line 1796 "HTMLCharacterReference.gperf" + {"⪶", "⪶"}, + {""}, +#line 1774 "HTMLCharacterReference.gperf" + {"⊊", "⊊"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1705 "HTMLCharacterReference.gperf" + {"⪝", "⪝"}, + {""}, +#line 2028 "HTMLCharacterReference.gperf" + {"⊻", "⊻"}, + {""}, {""}, +#line 427 "HTMLCharacterReference.gperf" + {"⟸", "⟸"}, + {""}, {""}, {""}, {""}, +#line 428 "HTMLCharacterReference.gperf" + {"⟺", "⟺"}, +#line 424 "HTMLCharacterReference.gperf" + {"⇐", "⇐"}, + {""}, {""}, {""}, +#line 1296 "HTMLCharacterReference.gperf" + {"⋫", "⋫"}, + {""}, +#line 1297 "HTMLCharacterReference.gperf" + {"⋭", "⋭"}, + {""}, {""}, {""}, {""}, +#line 1250 "HTMLCharacterReference.gperf" + {"⋠", "⋠"}, + {""}, {""}, {""}, {""}, {""}, +#line 452 "HTMLCharacterReference.gperf" + {"↧", "↧"}, +#line 1699 "HTMLCharacterReference.gperf" + {"∼", "∼"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1885 "HTMLCharacterReference.gperf" + {"⤩", "⤩"}, + {""}, +#line 680 "HTMLCharacterReference.gperf" + {"≳", "≳"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2060 "HTMLCharacterReference.gperf" + {"⋀", "⋀"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 69 "HTMLCharacterReference.gperf" + {"∢", "∢"}, + {""}, +#line 1654 "HTMLCharacterReference.gperf" + {"⪴", "⪴"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 2057 "HTMLCharacterReference.gperf" + {"Ŵ", "Ŵ"}, + {""}, {""}, {""}, {""}, +#line 1116 "HTMLCharacterReference.gperf" + {"⇗", "⇗"}, +#line 656 "HTMLCharacterReference.gperf" + {"≧", "≧"}, +#line 1001 "HTMLCharacterReference.gperf" + {"◊", "◊"}, +#line 1851 "HTMLCharacterReference.gperf" + {"𝔱", "𝔱"}, + {""}, {""}, {""}, {""}, {""}, +#line 1841 "HTMLCharacterReference.gperf" + {"⎴", "⎴"}, +#line 933 "HTMLCharacterReference.gperf" + {"⪅", "⪅"}, + {""}, {""}, {""}, +#line 824 "HTMLCharacterReference.gperf" + {"х", "х"}, + {""}, {""}, {""}, {""}, +#line 1989 "HTMLCharacterReference.gperf" + {"⋰", "⋰"}, + {""}, +#line 1446 "HTMLCharacterReference.gperf" + {"≺", "≺"}, + {""}, +#line 289 "HTMLCharacterReference.gperf" + {"♣", "♣"}, + {""}, {""}, +#line 2096 "HTMLCharacterReference.gperf" + {"⨆", "⨆"}, + {""}, {""}, {""}, +#line 178 "HTMLCharacterReference.gperf" + {"╩", "╩"}, +#line 163 "HTMLCharacterReference.gperf" + {"⧉", "⧉"}, + {""}, {""}, {""}, +#line 1195 "HTMLCharacterReference.gperf" + {"⩾̸", "⩾̸"}, +#line 270 "HTMLCharacterReference.gperf" + {"↻", "↻"}, +#line 1775 "HTMLCharacterReference.gperf" + {"⪿", "⪿"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 997 "HTMLCharacterReference.gperf" + {"_", "_"}, + {""}, +#line 1938 "HTMLCharacterReference.gperf" + {"⥣", "⥣"}, + {""}, {""}, {""}, +#line 934 "HTMLCharacterReference.gperf" + {"⋖", "⋖"}, +#line 1187 "HTMLCharacterReference.gperf" + {"≠", "≠"}, + {""}, {""}, +#line 2055 "HTMLCharacterReference.gperf" + {"⊪", "⊪"}, +#line 1118 "HTMLCharacterReference.gperf" + {"↗", "↗"}, + {""}, {""}, {""}, {""}, {""}, +#line 608 "HTMLCharacterReference.gperf" + {"⪆", "⪆"}, +#line 1192 "HTMLCharacterReference.gperf" + {"≧̸", "≧̸"}, + {""}, {""}, {""}, +#line 842 "HTMLCharacterReference.gperf" + {"⪅", "⪅"}, + {""}, +#line 1295 "HTMLCharacterReference.gperf" + {"⋬", "⋬"}, + {""}, +#line 1204 "HTMLCharacterReference.gperf" + {"⋶", "⋶"}, + {""}, {""}, +#line 187 "HTMLCharacterReference.gperf" + {"╛", "╛"}, + {""}, {""}, +#line 1371 "HTMLCharacterReference.gperf" + {"∨", "∨"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1788 "HTMLCharacterReference.gperf" + {"⪸", "⪸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 256 "HTMLCharacterReference.gperf" + {"·", "·"}, + {""}, +#line 1306 "HTMLCharacterReference.gperf" + {"⊭", "⊭"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 789 "HTMLCharacterReference.gperf" + {"⋳", "⋳"}, +#line 1812 "HTMLCharacterReference.gperf" + {"⫄", "⫄"}, + {""}, {""}, +#line 936 "HTMLCharacterReference.gperf" + {"⪋", "⪋"}, +#line 475 "HTMLCharacterReference.gperf" + {"Ě", "Ě"}, + {""}, +#line 856 "HTMLCharacterReference.gperf" + {"⪫", "⪫"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1773 "HTMLCharacterReference.gperf" + {"⫋", "⫋"}, + {""}, {""}, {""}, +#line 1878 "HTMLCharacterReference.gperf" + {"⤨", "⤨"}, +#line 878 "HTMLCharacterReference.gperf" + {"“", "“"}, +#line 879 "HTMLCharacterReference.gperf" + {"„", "„"}, +#line 1496 "HTMLCharacterReference.gperf" + {"≟", "≟"}, + {""}, {""}, {""}, {""}, {""}, +#line 979 "HTMLCharacterReference.gperf" + {"⟵", "⟵"}, +#line 1478 "HTMLCharacterReference.gperf" + {"≾", "≾"}, + {""}, {""}, {""}, {""}, {""}, +#line 238 "HTMLCharacterReference.gperf" + {"ˇ", "ˇ"}, + {""}, {""}, +#line 229 "HTMLCharacterReference.gperf" + {"∩", "∩"}, + {""}, +#line 1762 "HTMLCharacterReference.gperf" + {"★", "★"}, + {""}, {""}, {""}, +#line 895 "HTMLCharacterReference.gperf" + {"⇃", "⇃"}, + {""}, {""}, +#line 896 "HTMLCharacterReference.gperf" + {"⥙", "⥙"}, +#line 276 "HTMLCharacterReference.gperf" + {"Ⓢ", "Ⓢ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1474 "HTMLCharacterReference.gperf" + {"∝", "∝"}, + {""}, +#line 115 "HTMLCharacterReference.gperf" + {"„", "„"}, +#line 1808 "HTMLCharacterReference.gperf" + {"⪾", "⪾"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1394 "HTMLCharacterReference.gperf" + {"ö", "ö"}, + {""}, {""}, {""}, {""}, +#line 2092 "HTMLCharacterReference.gperf" + {"⟹", "⟹"}, + {""}, {""}, {""}, +#line 847 "HTMLCharacterReference.gperf" + {"←", "←"}, + {""}, +#line 1930 "HTMLCharacterReference.gperf" + {"Ű", "Ű"}, + {""}, {""}, {""}, {""}, {""}, +#line 1177 "HTMLCharacterReference.gperf" + {"⁠", "⁠"}, + {""}, +#line 359 "HTMLCharacterReference.gperf" + {"↓", "↓"}, +#line 245 "HTMLCharacterReference.gperf" + {"Ĉ", "Ĉ"}, +#line 1174 "HTMLCharacterReference.gperf" + {"⋬", "⋬"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1387 "HTMLCharacterReference.gperf" + {"⊘", "⊘"}, + {""}, {""}, {""}, {""}, +#line 1707 "HTMLCharacterReference.gperf" + {"≆", "≆"}, + {""}, {""}, +#line 1447 "HTMLCharacterReference.gperf" + {"⪷", "⪷"}, + {""}, {""}, +#line 600 "HTMLCharacterReference.gperf" + {"⌢", "⌢"}, + {""}, {""}, {""}, +#line 1227 "HTMLCharacterReference.gperf" + {"⊏̸", "⊏̸"}, + {""}, {""}, {""}, {""}, +#line 1228 "HTMLCharacterReference.gperf" + {"⋢", "⋢"}, + {""}, +#line 1450 "HTMLCharacterReference.gperf" + {"⪯", "⪯"}, + {""}, {""}, {""}, +#line 116 "HTMLCharacterReference.gperf" + {"∵", "∵"}, + {""}, {""}, {""}, {""}, {""}, +#line 92 "HTMLCharacterReference.gperf" + {"≍", "≍"}, + {""}, +#line 690 "HTMLCharacterReference.gperf" + {"↔", "↔"}, +#line 1981 "HTMLCharacterReference.gperf" + {"⌝", "⌝"}, +#line 852 "HTMLCharacterReference.gperf" + {"↫", "↫"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1850 "HTMLCharacterReference.gperf" + {"𝔗", "𝔗"}, + {""}, +#line 1867 "HTMLCharacterReference.gperf" + {"þ", "þ"}, + {""}, {""}, {""}, +#line 357 "HTMLCharacterReference.gperf" + {"↡", "↡"}, +#line 1983 "HTMLCharacterReference.gperf" + {"⌎", "⌎"}, +#line 853 "HTMLCharacterReference.gperf" + {"⤹", "⤹"}, +#line 1415 "HTMLCharacterReference.gperf" + {"𝔭", "𝔭"}, + {""}, +#line 1381 "HTMLCharacterReference.gperf" + {"⩛", "⩛"}, + {""}, {""}, {""}, +#line 1988 "HTMLCharacterReference.gperf" + {"𝓊", "𝓊"}, + {""}, +#line 1366 "HTMLCharacterReference.gperf" + {"“", "“"}, + {""}, +#line 1476 "HTMLCharacterReference.gperf" + {"∝", "∝"}, + {""}, +#line 1466 "HTMLCharacterReference.gperf" + {"⪹", "⪹"}, + {""}, {""}, {""}, {""}, +#line 1082 "HTMLCharacterReference.gperf" + {"𝓂", "𝓂"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 474 "HTMLCharacterReference.gperf" + {"⩮", "⩮"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 285 "HTMLCharacterReference.gperf" + {"∲", "∲"}, + {""}, {""}, {""}, {""}, +#line 1151 "HTMLCharacterReference.gperf" + {"⋺", "⋺"}, +#line 1379 "HTMLCharacterReference.gperf" + {"⩖", "⩖"}, + {""}, {""}, {""}, {""}, +#line 2074 "HTMLCharacterReference.gperf" + {"◯", "◯"}, + {""}, {""}, {""}, +#line 1128 "HTMLCharacterReference.gperf" + {"≪", "≪"}, + {""}, {""}, {""}, {""}, +#line 1917 "HTMLCharacterReference.gperf" + {"↟", "↟"}, + {""}, {""}, {""}, {""}, +#line 1342 "HTMLCharacterReference.gperf" + {"𝔬", "𝔬"}, +#line 1477 "HTMLCharacterReference.gperf" + {"∝", "∝"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1793 "HTMLCharacterReference.gperf" + {"≿", "≿"}, + {""}, {""}, +#line 556 "HTMLCharacterReference.gperf" + {"ℰ", "ℰ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1708 "HTMLCharacterReference.gperf" + {"⨤", "⨤"}, + {""}, +#line 845 "HTMLCharacterReference.gperf" + {"↞", "↞"}, + {""}, +#line 1333 "HTMLCharacterReference.gperf" + {"Ő", "Ő"}, + {""}, {""}, +#line 823 "HTMLCharacterReference.gperf" + {"Х", "Х"}, + {""}, {""}, +#line 1209 "HTMLCharacterReference.gperf" + {"≰", "≰"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 855 "HTMLCharacterReference.gperf" + {"↢", "↢"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 350 "HTMLCharacterReference.gperf" + {"⋏", "⋏"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 629 "HTMLCharacterReference.gperf" + {"⪄", "⪄"}, + {""}, {""}, +#line 686 "HTMLCharacterReference.gperf" + {"ℋ", "ℋ"}, + {""}, +#line 1382 "HTMLCharacterReference.gperf" + {"Ⓢ", "Ⓢ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1167 "HTMLCharacterReference.gperf" + {"⩽̸", "⩽̸"}, + {""}, +#line 430 "HTMLCharacterReference.gperf" + {"⇒", "⇒"}, + {""}, {""}, {""}, {""}, {""}, +#line 497 "HTMLCharacterReference.gperf" + {"∈", "∈"}, + {""}, +#line 1511 "HTMLCharacterReference.gperf" + {"↠", "↠"}, +#line 1706 "HTMLCharacterReference.gperf" + {"⪟", "⪟"}, + {""}, {""}, +#line 1677 "HTMLCharacterReference.gperf" + {"∖", "∖"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1092 "HTMLCharacterReference.gperf" + {"≉", "≉"}, +#line 1348 "HTMLCharacterReference.gperf" + {"Ω", "Ω"}, + {""}, +#line 1372 "HTMLCharacterReference.gperf" + {"↻", "↻"}, + {""}, {""}, {""}, {""}, +#line 1727 "HTMLCharacterReference.gperf" + {"♠", "♠"}, +#line 1982 "HTMLCharacterReference.gperf" + {"⌝", "⌝"}, +#line 982 "HTMLCharacterReference.gperf" + {"⟷", "⟷"}, +#line 1929 "HTMLCharacterReference.gperf" + {"⇅", "⇅"}, +#line 1365 "HTMLCharacterReference.gperf" + {"⦷", "⦷"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 2079 "HTMLCharacterReference.gperf" + {"⟺", "⟺"}, + {""}, {""}, {""}, +#line 2017 "HTMLCharacterReference.gperf" + {"⫨", "⫨"}, + {""}, +#line 230 "HTMLCharacterReference.gperf" + {"⩄", "⩄"}, + {""}, {""}, {""}, +#line 2018 "HTMLCharacterReference.gperf" + {"⫩", "⫩"}, + {""}, +#line 363 "HTMLCharacterReference.gperf" + {"⤏", "⤏"}, +#line 1451 "HTMLCharacterReference.gperf" + {"≺", "≺"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1304 "HTMLCharacterReference.gperf" + {"⊯", "⊯"}, + {""}, +#line 1046 "HTMLCharacterReference.gperf" + {"⤅", "⤅"}, + {""}, {""}, {""}, +#line 1449 "HTMLCharacterReference.gperf" + {"⪳", "⪳"}, +#line 1380 "HTMLCharacterReference.gperf" + {"⩗", "⩗"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 470 "HTMLCharacterReference.gperf" + {"џ", "џ"}, + {""}, {""}, {""}, +#line 891 "HTMLCharacterReference.gperf" + {"↢", "↢"}, + {""}, {""}, {""}, {""}, {""}, +#line 1397 "HTMLCharacterReference.gperf" + {"⏞", "⏞"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1368 "HTMLCharacterReference.gperf" + {"⦹", "⦹"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 402 "HTMLCharacterReference.gperf" + {"⋇", "⋇"}, + {""}, {""}, {""}, {""}, {""}, +#line 509 "HTMLCharacterReference.gperf" + {" ", " "}, + {""}, {""}, {""}, {""}, {""}, +#line 120 "HTMLCharacterReference.gperf" + {"϶", "϶"}, + {""}, {""}, {""}, {""}, {""}, +#line 1965 "HTMLCharacterReference.gperf" + {"⇕", "⇕"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 47 "HTMLCharacterReference.gperf" + {"&", "&"}, +#line 1980 "HTMLCharacterReference.gperf" + {"⇈", "⇈"}, + {""}, +#line 511 "HTMLCharacterReference.gperf" + {" ", " "}, + {""}, {""}, +#line 173 "HTMLCharacterReference.gperf" + {"─", "─"}, + {""}, {""}, {""}, +#line 1440 "HTMLCharacterReference.gperf" + {"ℌ", "ℌ"}, +#line 1638 "HTMLCharacterReference.gperf" + {"▹", "▹"}, +#line 2042 "HTMLCharacterReference.gperf" + {"⊲", "⊲"}, + {""}, {""}, {""}, {""}, +#line 1562 "HTMLCharacterReference.gperf" + {"⥯", "⥯"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 371 "HTMLCharacterReference.gperf" + {"‡", "‡"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 219 "HTMLCharacterReference.gperf" + {"•", "•"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1523 "HTMLCharacterReference.gperf" + {"⤖", "⤖"}, + {""}, {""}, {""}, +#line 1095 "HTMLCharacterReference.gperf" + {"ʼn", "ʼn"}, + {""}, {""}, {""}, {""}, {""}, +#line 510 "HTMLCharacterReference.gperf" + {" ", " "}, +#line 1396 "HTMLCharacterReference.gperf" + {"‾", "‾"}, +#line 693 "HTMLCharacterReference.gperf" + {"^", "^"}, + {""}, +#line 291 "HTMLCharacterReference.gperf" + {":", ":"}, +#line 162 "HTMLCharacterReference.gperf" + {"⋈", "⋈"}, +#line 118 "HTMLCharacterReference.gperf" + {"∵", "∵"}, + {""}, {""}, {""}, +#line 1852 "HTMLCharacterReference.gperf" + {"∴", "∴"}, + {""}, +#line 1242 "HTMLCharacterReference.gperf" + {"≉", "≉"}, + {""}, +#line 1168 "HTMLCharacterReference.gperf" + {"≮", "≮"}, + {""}, {""}, +#line 766 "HTMLCharacterReference.gperf" + {"∫", "∫"}, + {""}, {""}, +#line 1886 "HTMLCharacterReference.gperf" + {"‴", "‴"}, + {""}, {""}, {""}, +#line 1484 "HTMLCharacterReference.gperf" + {" ", " "}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 247 "HTMLCharacterReference.gperf" + {"∰", "∰"}, + {""}, {""}, {""}, +#line 822 "HTMLCharacterReference.gperf" + {"ĸ", "ĸ"}, + {""}, {""}, {""}, {""}, +#line 946 "HTMLCharacterReference.gperf" + {"⌊", "⌊"}, +#line 1818 "HTMLCharacterReference.gperf" + {"⫂", "⫂"}, +#line 1690 "HTMLCharacterReference.gperf" + {"∣", "∣"}, +#line 1770 "HTMLCharacterReference.gperf" + {"⊆", "⊆"}, + {""}, {""}, {""}, {""}, +#line 1848 "HTMLCharacterReference.gperf" + {"⃛", "◌⃛"}, + {""}, +#line 1952 "HTMLCharacterReference.gperf" + {"⏝", "⏝"}, +#line 940 "HTMLCharacterReference.gperf" + {"≶", "≶"}, + {""}, {""}, {""}, {""}, {""}, +#line 426 "HTMLCharacterReference.gperf" + {"⫤", "⫤"}, + {""}, {""}, +#line 1880 "HTMLCharacterReference.gperf" + {"⌶", "⌶"}, + {""}, {""}, +#line 1267 "HTMLCharacterReference.gperf" + {"∤", "∤"}, + {""}, +#line 1921 "HTMLCharacterReference.gperf" + {"Ў", "Ў"}, + {""}, {""}, {""}, {""}, +#line 1263 "HTMLCharacterReference.gperf" + {"⋡", "⋡"}, + {""}, +#line 941 "HTMLCharacterReference.gperf" + {"⪡", "⪡"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 446 "HTMLCharacterReference.gperf" + {"↽", "↽"}, +#line 1479 "HTMLCharacterReference.gperf" + {"⊰", "⊰"}, + {""}, +#line 447 "HTMLCharacterReference.gperf" + {"⥖", "⥖"}, + {""}, +#line 1420 "HTMLCharacterReference.gperf" + {"☎", "☎"}, +#line 1377 "HTMLCharacterReference.gperf" + {"º", "º"}, +#line 1032 "HTMLCharacterReference.gperf" + {"⥶", "⥶"}, + {""}, {""}, {""}, +#line 176 "HTMLCharacterReference.gperf" + {"╥", "╥"}, +#line 1398 "HTMLCharacterReference.gperf" + {"⎴", "⎴"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 624 "HTMLCharacterReference.gperf" + {"⩾", "⩾"}, + {""}, +#line 2044 "HTMLCharacterReference.gperf" + {"⊃⃒", "⊃⃒"}, + {""}, +#line 1203 "HTMLCharacterReference.gperf" + {"⋷", "⋷"}, +#line 925 "HTMLCharacterReference.gperf" + {"⩽", "⩽"}, + {""}, {""}, {""}, +#line 1217 "HTMLCharacterReference.gperf" + {"∌", "∌"}, +#line 1412 "HTMLCharacterReference.gperf" + {"⊥", "⊥"}, + {""}, {""}, +#line 1023 "HTMLCharacterReference.gperf" + {"ł", "ł"}, +#line 1096 "HTMLCharacterReference.gperf" + {"≉", "≉"}, + {""}, {""}, +#line 1155 "HTMLCharacterReference.gperf" + {"⇍", "⇍"}, + {""}, +#line 917 "HTMLCharacterReference.gperf" + {"↿", "↿"}, + {""}, +#line 1373 "HTMLCharacterReference.gperf" + {"⩝", "⩝"}, + {""}, +#line 462 "HTMLCharacterReference.gperf" + {"đ", "đ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1566 "HTMLCharacterReference.gperf" + {"𝔯", "𝔯"}, +#line 1905 "HTMLCharacterReference.gperf" + {"𝓉", "𝓉"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1937 "HTMLCharacterReference.gperf" + {"ù", "ù"}, + {""}, {""}, {""}, {""}, {""}, +#line 563 "HTMLCharacterReference.gperf" + {"ffi", "ffi"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2086 "HTMLCharacterReference.gperf" + {"⋻", "⋻"}, +#line 348 "HTMLCharacterReference.gperf" + {"↷", "↷"}, +#line 1059 "HTMLCharacterReference.gperf" + {" ", " "}, + {""}, {""}, {""}, {""}, +#line 718 "HTMLCharacterReference.gperf" + {"ħ", "ħ"}, + {""}, {""}, {""}, +#line 1617 "HTMLCharacterReference.gperf" + {"⦆", "⦆"}, + {""}, +#line 1776 "HTMLCharacterReference.gperf" + {"⥹", "⥹"}, +#line 615 "HTMLCharacterReference.gperf" + {"г", "г"}, + {""}, +#line 1458 "HTMLCharacterReference.gperf" + {"⪯", "⪯"}, + {""}, {""}, +#line 876 "HTMLCharacterReference.gperf" + {"л", "л"}, +#line 826 "HTMLCharacterReference.gperf" + {"ќ", "ќ"}, +#line 848 "HTMLCharacterReference.gperf" + {"⇤", "⇤"}, +#line 461 "HTMLCharacterReference.gperf" + {"Đ", "Đ"}, + {""}, +#line 482 "HTMLCharacterReference.gperf" + {"э", "э"}, + {""}, {""}, {""}, {""}, +#line 368 "HTMLCharacterReference.gperf" + {"д", "д"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 800 "HTMLCharacterReference.gperf" + {"Й", "Й"}, +#line 1346 "HTMLCharacterReference.gperf" + {"⧁", "⧁"}, + {""}, +#line 1777 "HTMLCharacterReference.gperf" + {"⋐", "⋐"}, +#line 67 "HTMLCharacterReference.gperf" + {"⊾", "⊾"}, +#line 31 "HTMLCharacterReference.gperf" + {"а", "а"}, +#line 1986 "HTMLCharacterReference.gperf" + {"◹", "◹"}, + {""}, {""}, {""}, +#line 1746 "HTMLCharacterReference.gperf" + {"⊓", "⊓"}, + {""}, {""}, {""}, {""}, +#line 561 "HTMLCharacterReference.gperf" + {"ф", "ф"}, + {""}, +#line 1343 "HTMLCharacterReference.gperf" + {"˛", "˛"}, +#line 1823 "HTMLCharacterReference.gperf" + {"⊃", "⊃"}, + {""}, +#line 114 "HTMLCharacterReference.gperf" + {"б", "б"}, +#line 440 "HTMLCharacterReference.gperf" + {"̑", "◌̑"}, +#line 1639 "HTMLCharacterReference.gperf" + {"⊵", "⊵"}, +#line 329 "HTMLCharacterReference.gperf" + {"↶", "↶"}, + {""}, {""}, {""}, +#line 1626 "HTMLCharacterReference.gperf" + {"⇉", "⇉"}, + {""}, +#line 654 "HTMLCharacterReference.gperf" + {"≥", "≥"}, +#line 2127 "HTMLCharacterReference.gperf" + {"з", "з"}, + {""}, {""}, +#line 273 "HTMLCharacterReference.gperf" + {"⊝", "⊝"}, + {""}, {""}, +#line 1623 "HTMLCharacterReference.gperf" + {")", ")"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 998 "HTMLCharacterReference.gperf" + {"↙", "↙"}, +#line 605 "HTMLCharacterReference.gperf" + {"γ", "γ"}, + {""}, {""}, +#line 367 "HTMLCharacterReference.gperf" + {"Д", "Д"}, + {""}, +#line 1410 "HTMLCharacterReference.gperf" + {".", "."}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 58 "HTMLCharacterReference.gperf" + {"⦨", "⦨"}, + {""}, +#line 1615 "HTMLCharacterReference.gperf" + {"⇾", "⇾"}, +#line 1022 "HTMLCharacterReference.gperf" + {"Ł", "Ł"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 269 "HTMLCharacterReference.gperf" + {"↺", "↺"}, +#line 721 "HTMLCharacterReference.gperf" + {"⁃", "⁃"}, + {""}, +#line 1572 "HTMLCharacterReference.gperf" + {"ρ", "ρ"}, + {""}, +#line 1659 "HTMLCharacterReference.gperf" + {"ŝ", "ŝ"}, + {""}, {""}, {""}, {""}, +#line 1832 "HTMLCharacterReference.gperf" + {"⇙", "⇙"}, +#line 722 "HTMLCharacterReference.gperf" + {"‐", "‐"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1927 "HTMLCharacterReference.gperf" + {"У", "У"}, +#line 1512 "HTMLCharacterReference.gperf" + {"⇒", "⇒"}, + {""}, {""}, {""}, +#line 801 "HTMLCharacterReference.gperf" + {"й", "й"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 503 "HTMLCharacterReference.gperf" + {"ē", "ē"}, + {""}, {""}, {""}, +#line 1444 "HTMLCharacterReference.gperf" + {"£", "£"}, +#line 364 "HTMLCharacterReference.gperf" + {"˝", "˝"}, + {""}, +#line 1241 "HTMLCharacterReference.gperf" + {"≇", "≇"}, +#line 875 "HTMLCharacterReference.gperf" + {"Л", "Л"}, + {""}, {""}, +#line 131 "HTMLCharacterReference.gperf" + {"⋃", "⋃"}, + {""}, +#line 2107 "HTMLCharacterReference.gperf" + {"Ы", "Ы"}, +#line 1701 "HTMLCharacterReference.gperf" + {"≃", "≃"}, +#line 1702 "HTMLCharacterReference.gperf" + {"≃", "≃"}, + {""}, +#line 1834 "HTMLCharacterReference.gperf" + {"↙", "↙"}, + {""}, {""}, +#line 44 "HTMLCharacterReference.gperf" + {"ā", "ā"}, + {""}, {""}, {""}, {""}, +#line 1532 "HTMLCharacterReference.gperf" + {"⤍", "⤍"}, + {""}, {""}, {""}, {""}, +#line 1696 "HTMLCharacterReference.gperf" + {"σ", "σ"}, + {""}, {""}, +#line 2108 "HTMLCharacterReference.gperf" + {"ы", "ы"}, +#line 1907 "HTMLCharacterReference.gperf" + {"ц", "ц"}, +#line 1336 "HTMLCharacterReference.gperf" + {"⊙", "⊙"}, +#line 865 "HTMLCharacterReference.gperf" + {"[", "["}, + {""}, {""}, +#line 1854 "HTMLCharacterReference.gperf" + {"∴", "∴"}, + {""}, {""}, {""}, +#line 373 "HTMLCharacterReference.gperf" + {"⤑", "⤑"}, +#line 431 "HTMLCharacterReference.gperf" + {"⊨", "⊨"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1795 "HTMLCharacterReference.gperf" + {"⪺", "⪺"}, + {""}, +#line 729 "HTMLCharacterReference.gperf" + {"и", "и"}, + {""}, {""}, {""}, {""}, {""}, +#line 1904 "HTMLCharacterReference.gperf" + {"𝒯", "𝒯"}, + {""}, {""}, {""}, +#line 1545 "HTMLCharacterReference.gperf" + {"Р", "Р"}, + {""}, +#line 1856 "HTMLCharacterReference.gperf" + {"θ", "θ"}, + {""}, {""}, +#line 1481 "HTMLCharacterReference.gperf" + {"𝓅", "𝓅"}, +#line 1560 "HTMLCharacterReference.gperf" + {"∋", "∋"}, + {""}, {""}, {""}, +#line 1824 "HTMLCharacterReference.gperf" + {"⊇", "⊇"}, +#line 1825 "HTMLCharacterReference.gperf" + {"⫆", "⫆"}, + {""}, {""}, {""}, {""}, {""}, +#line 1536 "HTMLCharacterReference.gperf" + {"⦌", "⦌"}, + {""}, {""}, {""}, +#line 1314 "HTMLCharacterReference.gperf" + {"<⃒", "<⃒"}, + {""}, +#line 607 "HTMLCharacterReference.gperf" + {"ϝ", "ϝ"}, + {""}, {""}, {""}, {""}, {""}, +#line 746 "HTMLCharacterReference.gperf" + {"ℑ", "ℑ"}, +#line 614 "HTMLCharacterReference.gperf" + {"Г", "Г"}, + {""}, +#line 1094 "HTMLCharacterReference.gperf" + {"≋̸", "≋̸"}, + {""}, +#line 330 "HTMLCharacterReference.gperf" + {"⤽", "⤽"}, + {""}, {""}, {""}, +#line 1997 "HTMLCharacterReference.gperf" + {"⦧", "⦧"}, + {""}, +#line 1665 "HTMLCharacterReference.gperf" + {"С", "С"}, +#line 1941 "HTMLCharacterReference.gperf" + {"▀", "▀"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1739 "HTMLCharacterReference.gperf" + {"⊐", "⊐"}, + {""}, {""}, +#line 1188 "HTMLCharacterReference.gperf" + {"≂̸", "≂̸"}, + {""}, +#line 1946 "HTMLCharacterReference.gperf" + {"Ū", "Ū"}, +#line 220 "HTMLCharacterReference.gperf" + {"•", "•"}, + {""}, {""}, {""}, +#line 1384 "HTMLCharacterReference.gperf" + {"ℴ", "ℴ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1330 "HTMLCharacterReference.gperf" + {"О", "О"}, + {""}, +#line 1499 "HTMLCharacterReference.gperf" + {"⇛", "⇛"}, + {""}, +#line 130 "HTMLCharacterReference.gperf" + {"◯", "◯"}, + {""}, {""}, {""}, {""}, {""}, +#line 228 "HTMLCharacterReference.gperf" + {"⋒", "⋒"}, +#line 105 "HTMLCharacterReference.gperf" + {"⫧", "⫧"}, + {""}, {""}, +#line 1709 "HTMLCharacterReference.gperf" + {"⥲", "⥲"}, +#line 621 "HTMLCharacterReference.gperf" + {"⋛", "⋛"}, + {""}, {""}, +#line 1077 "HTMLCharacterReference.gperf" + {"⊧", "⊧"}, + {""}, +#line 1792 "HTMLCharacterReference.gperf" + {"≽", "≽"}, + {""}, +#line 478 "HTMLCharacterReference.gperf" + {"Ê", "Ê"}, +#line 1789 "HTMLCharacterReference.gperf" + {"≽", "≽"}, +#line 1337 "HTMLCharacterReference.gperf" + {"⦼", "⦼"}, +#line 560 "HTMLCharacterReference.gperf" + {"Ф", "Ф"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 604 "HTMLCharacterReference.gperf" + {"Γ", "Γ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 859 "HTMLCharacterReference.gperf" + {"⪭", "⪭"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 872 "HTMLCharacterReference.gperf" + {"ļ", "ļ"}, + {""}, {""}, +#line 1661 "HTMLCharacterReference.gperf" + {"⪶", "⪶"}, +#line 748 "HTMLCharacterReference.gperf" + {"ī", "ī"}, + {""}, +#line 1460 "HTMLCharacterReference.gperf" + {"⪵", "⪵"}, +#line 1112 "HTMLCharacterReference.gperf" + {"н", "н"}, +#line 1993 "HTMLCharacterReference.gperf" + {"▴", "▴"}, +#line 1924 "HTMLCharacterReference.gperf" + {"ŭ", "ŭ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1340 "HTMLCharacterReference.gperf" + {"⦿", "⦿"}, +#line 1483 "HTMLCharacterReference.gperf" + {"ψ", "ψ"}, + {""}, +#line 1769 "HTMLCharacterReference.gperf" + {"⫅", "⫅"}, +#line 1409 "HTMLCharacterReference.gperf" + {"%", "%"}, + {""}, +#line 343 "HTMLCharacterReference.gperf" + {"⋟", "⋟"}, + {""}, +#line 125 "HTMLCharacterReference.gperf" + {"ℶ", "ℶ"}, +#line 1647 "HTMLCharacterReference.gperf" + {"‚", "‚"}, + {""}, {""}, {""}, +#line 1633 "HTMLCharacterReference.gperf" + {"]", "]"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 376 "HTMLCharacterReference.gperf" + {"∇", "∇"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 177 "HTMLCharacterReference.gperf" + {"┬", "┬"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1111 "HTMLCharacterReference.gperf" + {"Н", "Н"}, + {""}, {""}, {""}, {""}, +#line 1054 "HTMLCharacterReference.gperf" + {"М", "М"}, + {""}, {""}, +#line 717 "HTMLCharacterReference.gperf" + {"Ħ", "Ħ"}, +#line 342 "HTMLCharacterReference.gperf" + {"⋞", "⋞"}, + {""}, +#line 1573 "HTMLCharacterReference.gperf" + {"ϱ", "ϱ"}, + {""}, +#line 232 "HTMLCharacterReference.gperf" + {"⩋", "⩋"}, + {""}, {""}, {""}, +#line 1613 "HTMLCharacterReference.gperf" + {"⫮", "⫮"}, + {""}, {""}, {""}, {""}, {""}, +#line 244 "HTMLCharacterReference.gperf" + {"ç", "ç"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1703 "HTMLCharacterReference.gperf" + {"⪞", "⪞"}, +#line 1355 "HTMLCharacterReference.gperf" + {"Ō", "Ō"}, + {""}, {""}, {""}, {""}, {""}, +#line 293 "HTMLCharacterReference.gperf" + {"≔", "≔"}, + {""}, {""}, +#line 1553 "HTMLCharacterReference.gperf" + {"ℜ", "ℜ"}, + {""}, {""}, {""}, {""}, +#line 1853 "HTMLCharacterReference.gperf" + {"∴", "∴"}, + {""}, {""}, +#line 117 "HTMLCharacterReference.gperf" + {"∵", "∵"}, + {""}, {""}, {""}, +#line 1697 "HTMLCharacterReference.gperf" + {"ς", "ς"}, + {""}, {""}, {""}, {""}, {""}, +#line 1900 "HTMLCharacterReference.gperf" + {"⨹", "⨹"}, + {""}, +#line 1567 "HTMLCharacterReference.gperf" + {"⥤", "⥤"}, + {""}, +#line 403 "HTMLCharacterReference.gperf" + {"⋇", "⋇"}, +#line 849 "HTMLCharacterReference.gperf" + {"⤟", "⤟"}, + {""}, {""}, +#line 2043 "HTMLCharacterReference.gperf" + {"⊂⃒", "⊂⃒"}, + {""}, {""}, +#line 30 "HTMLCharacterReference.gperf" + {"А", "А"}, + {""}, +#line 1855 "HTMLCharacterReference.gperf" + {"Θ", "Θ"}, + {""}, {""}, {""}, +#line 882 "HTMLCharacterReference.gperf" + {"↲", "↲"}, + {""}, +#line 345 "HTMLCharacterReference.gperf" + {"⋏", "⋏"}, + {""}, {""}, {""}, {""}, +#line 1221 "HTMLCharacterReference.gperf" + {"⪯̸", "⪯̸"}, + {""}, {""}, {""}, +#line 1196 "HTMLCharacterReference.gperf" + {"≵", "≵"}, +#line 606 "HTMLCharacterReference.gperf" + {"Ϝ", "Ϝ"}, + {""}, {""}, {""}, {""}, +#line 2100 "HTMLCharacterReference.gperf" + {"⋀", "⋀"}, + {""}, {""}, {""}, {""}, +#line 871 "HTMLCharacterReference.gperf" + {"Ļ", "Ļ"}, + {""}, +#line 1741 "HTMLCharacterReference.gperf" + {"⊐", "⊐"}, + {""}, +#line 1742 "HTMLCharacterReference.gperf" + {"⊒", "⊒"}, +#line 1682 "HTMLCharacterReference.gperf" + {"⌢", "⌢"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 353 "HTMLCharacterReference.gperf" + {"⌭", "⌭"}, + {""}, +#line 1694 "HTMLCharacterReference.gperf" + {"­", " "}, +#line 1452 "HTMLCharacterReference.gperf" + {"⪷", "⪷"}, +#line 237 "HTMLCharacterReference.gperf" + {"⁁", "⁁"}, +#line 835 "HTMLCharacterReference.gperf" + {"ℒ", "ℒ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 860 "HTMLCharacterReference.gperf" + {"⪭︀", "⪭︀"}, +#line 1862 "HTMLCharacterReference.gperf" + {" ", " "}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1407 "HTMLCharacterReference.gperf" + {"П", "П"}, + {""}, {""}, +#line 408 "HTMLCharacterReference.gperf" + {"$", "$"}, + {""}, {""}, +#line 1075 "HTMLCharacterReference.gperf" + {"…", "…"}, +#line 1671 "HTMLCharacterReference.gperf" + {"⇘", "⇘"}, +#line 1541 "HTMLCharacterReference.gperf" + {"Ŗ", "Ŗ"}, + {""}, {""}, {""}, +#line 1419 "HTMLCharacterReference.gperf" + {"ℳ", "ℳ"}, +#line 106 "HTMLCharacterReference.gperf" + {"⊽", "⊽"}, + {""}, {""}, +#line 1464 "HTMLCharacterReference.gperf" + {"′", "′"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 728 "HTMLCharacterReference.gperf" + {"И", "И"}, + {""}, {""}, +#line 892 "HTMLCharacterReference.gperf" + {"⌈", "⌈"}, + {""}, {""}, +#line 839 "HTMLCharacterReference.gperf" + {"⟨", "⟨"}, + {""}, {""}, {""}, {""}, +#line 2104 "HTMLCharacterReference.gperf" + {"я", "я"}, + {""}, +#line 611 "HTMLCharacterReference.gperf" + {"Ģ", "Ģ"}, +#line 1417 "HTMLCharacterReference.gperf" + {"φ", "φ"}, + {""}, {""}, {""}, {""}, +#line 1219 "HTMLCharacterReference.gperf" + {"⋽", "⋽"}, + {""}, {""}, +#line 429 "HTMLCharacterReference.gperf" + {"⟹", "⟹"}, +#line 1656 "HTMLCharacterReference.gperf" + {"Ş", "Ş"}, +#line 1538 "HTMLCharacterReference.gperf" + {"⦐", "⦐"}, + {""}, +#line 720 "HTMLCharacterReference.gperf" + {"≏", "≏"}, + {""}, {""}, +#line 1129 "HTMLCharacterReference.gperf" + {" ", "␊"}, +#line 1559 "HTMLCharacterReference.gperf" + {"®", "®"}, + {""}, {""}, +#line 1881 "HTMLCharacterReference.gperf" + {"⫱", "⫱"}, + {""}, {""}, {""}, +#line 43 "HTMLCharacterReference.gperf" + {"Ā", "Ā"}, + {""}, +#line 1673 "HTMLCharacterReference.gperf" + {"↘", "↘"}, +#line 2126 "HTMLCharacterReference.gperf" + {"З", "З"}, + {""}, {""}, {""}, +#line 434 "HTMLCharacterReference.gperf" + {"∥", "∥"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1465 "HTMLCharacterReference.gperf" + {"ℙ", "ℙ"}, + {""}, {""}, {""}, +#line 825 "HTMLCharacterReference.gperf" + {"Ќ", "Ќ"}, + {""}, {""}, +#line 294 "HTMLCharacterReference.gperf" + {"≔", "≔"}, + {""}, +#line 1985 "HTMLCharacterReference.gperf" + {"ů", "ů"}, + {""}, +#line 1781 "HTMLCharacterReference.gperf" + {"⊆", "⊆"}, + {""}, +#line 68 "HTMLCharacterReference.gperf" + {"⦝", "⦝"}, + {""}, {""}, +#line 655 "HTMLCharacterReference.gperf" + {"⋛", "⋛"}, + {""}, {""}, {""}, +#line 1556 "HTMLCharacterReference.gperf" + {"ℝ", "ℝ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 113 "HTMLCharacterReference.gperf" + {"Б", "Б"}, + {""}, {""}, {""}, +#line 1391 "HTMLCharacterReference.gperf" + {"⊗", "⊗"}, + {""}, +#line 1826 "HTMLCharacterReference.gperf" + {"⊋", "⊋"}, +#line 1827 "HTMLCharacterReference.gperf" + {"⫌", "⫌"}, +#line 272 "HTMLCharacterReference.gperf" + {"⊚", "⊚"}, + {""}, {""}, +#line 1630 "HTMLCharacterReference.gperf" + {"𝓇", "𝓇"}, + {""}, +#line 684 "HTMLCharacterReference.gperf" + {" ", " "}, + {""}, {""}, +#line 1074 "HTMLCharacterReference.gperf" + {"⫛", "⫛"}, +#line 1057 "HTMLCharacterReference.gperf" + {"∺", "∺"}, +#line 1107 "HTMLCharacterReference.gperf" + {"ņ", "ņ"}, + {""}, {""}, +#line 943 "HTMLCharacterReference.gperf" + {"⩽", "⩽"}, + {""}, {""}, {""}, {""}, {""}, +#line 2083 "HTMLCharacterReference.gperf" + {"⟸", "⟸"}, +#line 1312 "HTMLCharacterReference.gperf" + {"⤂", "⤂"}, + {""}, {""}, +#line 1354 "HTMLCharacterReference.gperf" + {"⧀", "⧀"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1906 "HTMLCharacterReference.gperf" + {"Ц", "Ц"}, +#line 747 "HTMLCharacterReference.gperf" + {"Ī", "Ī"}, + {""}, +#line 1275 "HTMLCharacterReference.gperf" + {"⋣", "⋣"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 2006 "HTMLCharacterReference.gperf" + {"↕", "↕"}, + {""}, {""}, +#line 1621 "HTMLCharacterReference.gperf" + {"⨵", "⨵"}, + {""}, +#line 838 "HTMLCharacterReference.gperf" + {"⟪", "⟪"}, + {""}, +#line 1106 "HTMLCharacterReference.gperf" + {"Ņ", "Ņ"}, + {""}, {""}, +#line 444 "HTMLCharacterReference.gperf" + {"⥐", "⥐"}, +#line 290 "HTMLCharacterReference.gperf" + {"∷", "∷"}, + {""}, +#line 1771 "HTMLCharacterReference.gperf" + {"⫃", "⫃"}, + {""}, {""}, {""}, {""}, +#line 1345 "HTMLCharacterReference.gperf" + {"ò", "ò"}, + {""}, +#line 2004 "HTMLCharacterReference.gperf" + {"∝", "∝"}, +#line 1864 "HTMLCharacterReference.gperf" + {"≈", "≈"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 135 "HTMLCharacterReference.gperf" + {"⨆", "⨆"}, + {""}, +#line 1942 "HTMLCharacterReference.gperf" + {"⌜", "⌜"}, + {""}, {""}, +#line 881 "HTMLCharacterReference.gperf" + {"⥋", "⥋"}, + {""}, {""}, +#line 1843 "HTMLCharacterReference.gperf" + {"ť", "ť"}, + {""}, +#line 148 "HTMLCharacterReference.gperf" + {"▸", "▸"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1944 "HTMLCharacterReference.gperf" + {"⌏", "⌏"}, + {""}, {""}, {""}, {""}, +#line 278 "HTMLCharacterReference.gperf" + {"⊕", "⊕"}, + {""}, {""}, {""}, +#line 1877 "HTMLCharacterReference.gperf" + {"∭", "∭"}, + {""}, {""}, {""}, {""}, +#line 1505 "HTMLCharacterReference.gperf" + {"⟫", "⟫"}, + {""}, +#line 858 "HTMLCharacterReference.gperf" + {"⤙", "⤙"}, + {""}, {""}, {""}, {""}, +#line 1076 "HTMLCharacterReference.gperf" + {"∓", "∓"}, + {""}, {""}, {""}, +#line 1991 "HTMLCharacterReference.gperf" + {"ũ", "ũ"}, +#line 1768 "HTMLCharacterReference.gperf" + {"⪽", "⪽"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 441 "HTMLCharacterReference.gperf" + {"⇊", "⇊"}, +#line 1735 "HTMLCharacterReference.gperf" + {"⊏", "⊏"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 325 "HTMLCharacterReference.gperf" + {"⤸", "⤸"}, + {""}, {""}, +#line 1866 "HTMLCharacterReference.gperf" + {"Þ", "Þ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1534 "HTMLCharacterReference.gperf" + {"}", "}"}, + {""}, +#line 1467 "HTMLCharacterReference.gperf" + {"⪵", "⪵"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 82 "HTMLCharacterReference.gperf" + {"⁡", "⁡"}, + {""}, {""}, +#line 1959 "HTMLCharacterReference.gperf" + {"↑", "↑"}, + {""}, +#line 1392 "HTMLCharacterReference.gperf" + {"⨶", "⨶"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1568 "HTMLCharacterReference.gperf" + {"⇁", "⇁"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 45 "HTMLCharacterReference.gperf" + {"⨿", "⨿"}, +#line 1740 "HTMLCharacterReference.gperf" + {"⊒", "⊒"}, + {""}, +#line 231 "HTMLCharacterReference.gperf" + {"⩉", "⩉"}, + {""}, {""}, {""}, {""}, +#line 1418 "HTMLCharacterReference.gperf" + {"ϕ", "ϕ"}, + {""}, +#line 533 "HTMLCharacterReference.gperf" + {"≂", "≂"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1828 "HTMLCharacterReference.gperf" + {"⫈", "⫈"}, + {""}, {""}, {""}, +#line 1704 "HTMLCharacterReference.gperf" + {"⪠", "⪠"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1065 "HTMLCharacterReference.gperf" + {"∣", "∣"}, + {""}, {""}, {""}, {""}, {""}, +#line 1091 "HTMLCharacterReference.gperf" + {"∠⃒", "∠⃒"}, + {""}, {""}, {""}, +#line 139 "HTMLCharacterReference.gperf" + {"⨄", "⨄"}, + {""}, {""}, +#line 1350 "HTMLCharacterReference.gperf" + {"↺", "↺"}, + {""}, {""}, {""}, {""}, {""}, +#line 1943 "HTMLCharacterReference.gperf" + {"⌜", "⌜"}, +#line 145 "HTMLCharacterReference.gperf" + {"▴", "▴"}, + {""}, +#line 564 "HTMLCharacterReference.gperf" + {"ff", "ff"}, + {""}, {""}, {""}, {""}, {""}, +#line 672 "HTMLCharacterReference.gperf" + {"⦕", "⦕"}, + {""}, {""}, {""}, {""}, +#line 811 "HTMLCharacterReference.gperf" + {"Є", "Є"}, + {""}, +#line 275 "HTMLCharacterReference.gperf" + {"®", "®"}, + {""}, +#line 1614 "HTMLCharacterReference.gperf" + {"⟭", "⟭"}, +#line 1237 "HTMLCharacterReference.gperf" + {"⊃⃒", "⊃⃒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1537 "HTMLCharacterReference.gperf" + {"⦎", "⦎"}, + {""}, {""}, +#line 843 "HTMLCharacterReference.gperf" + {"ℒ", "ℒ"}, + {""}, {""}, {""}, +#line 2085 "HTMLCharacterReference.gperf" + {"⟼", "⟼"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1374 "HTMLCharacterReference.gperf" + {"ℴ", "ℴ"}, + {""}, {""}, {""}, {""}, +#line 1422 "HTMLCharacterReference.gperf" + {"π", "π"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1169 "HTMLCharacterReference.gperf" + {"⋘̸", "⋘̸"}, + {""}, {""}, {""}, {""}, +#line 99 "HTMLCharacterReference.gperf" + {"≌", "≌"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 143 "HTMLCharacterReference.gperf" + {"⧫", "⧫"}, + {""}, {""}, {""}, +#line 1842 "HTMLCharacterReference.gperf" + {"Ť", "Ť"}, + {""}, +#line 499 "HTMLCharacterReference.gperf" + {"ℓ", "ℓ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1470 "HTMLCharacterReference.gperf" + {"∏", "∏"}, +#line 1737 "HTMLCharacterReference.gperf" + {"⊏", "⊏"}, + {""}, +#line 1738 "HTMLCharacterReference.gperf" + {"⊑", "⊑"}, + {""}, {""}, {""}, +#line 1897 "HTMLCharacterReference.gperf" + {"≜", "≜"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 71 "HTMLCharacterReference.gperf" + {"⍼", "⍼"}, +#line 1455 "HTMLCharacterReference.gperf" + {"⪯", "⪯"}, + {""}, +#line 1732 "HTMLCharacterReference.gperf" + {"⊔", "⊔"}, +#line 812 "HTMLCharacterReference.gperf" + {"є", "є"}, + {""}, +#line 1561 "HTMLCharacterReference.gperf" + {"⇋", "⇋"}, + {""}, +#line 999 "HTMLCharacterReference.gperf" + {"↘", "↘"}, + {""}, {""}, +#line 1610 "HTMLCharacterReference.gperf" + {"‏", "‏"}, + {""}, +#line 378 "HTMLCharacterReference.gperf" + {"δ", "δ"}, +#line 1637 "HTMLCharacterReference.gperf" + {"⋊", "⋊"}, + {""}, {""}, {""}, {""}, +#line 355 "HTMLCharacterReference.gperf" + {"†", "†"}, + {""}, +#line 1814 "HTMLCharacterReference.gperf" + {"⊇", "⊇"}, + {""}, {""}, {""}, +#line 1968 "HTMLCharacterReference.gperf" + {"↿", "↿"}, +#line 1469 "HTMLCharacterReference.gperf" + {"∏", "∏"}, +#line 1557 "HTMLCharacterReference.gperf" + {"▭", "▭"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1243 "HTMLCharacterReference.gperf" + {"∤", "∤"}, + {""}, {""}, {""}, {""}, {""}, +#line 1349 "HTMLCharacterReference.gperf" + {"∮", "∮"}, +#line 1700 "HTMLCharacterReference.gperf" + {"⩪", "⩪"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 126 "HTMLCharacterReference.gperf" + {"≬", "≬"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 554 "HTMLCharacterReference.gperf" + {"∃", "∃"}, +#line 1733 "HTMLCharacterReference.gperf" + {"⊔︀", "⊔︀"}, + {""}, {""}, +#line 1093 "HTMLCharacterReference.gperf" + {"⩰̸", "⩰̸"}, +#line 425 "HTMLCharacterReference.gperf" + {"⇔", "⇔"}, +#line 795 "HTMLCharacterReference.gperf" + {"і", "і"}, +#line 1809 "HTMLCharacterReference.gperf" + {"⫘", "⫘"}, + {""}, {""}, +#line 377 "HTMLCharacterReference.gperf" + {"Δ", "Δ"}, + {""}, {""}, {""}, {""}, {""}, +#line 354 "HTMLCharacterReference.gperf" + {"‡", "‡"}, + {""}, {""}, +#line 274 "HTMLCharacterReference.gperf" + {"⊙", "⊙"}, +#line 1888 "HTMLCharacterReference.gperf" + {"™", "™"}, + {""}, {""}, {""}, {""}, {""}, +#line 1395 "HTMLCharacterReference.gperf" + {"⌽", "⌽"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 716 "HTMLCharacterReference.gperf" + {"ℏ", "ℏ"}, +#line 1424 "HTMLCharacterReference.gperf" + {"ϖ", "ϖ"}, + {""}, {""}, +#line 1399 "HTMLCharacterReference.gperf" + {"⏜", "⏜"}, + {""}, {""}, +#line 1920 "HTMLCharacterReference.gperf" + {"⥉", "⥉"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 840 "HTMLCharacterReference.gperf" + {"⦑", "⦑"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 104 "HTMLCharacterReference.gperf" + {"∖", "∖"}, +#line 1411 "HTMLCharacterReference.gperf" + {"‰", "‰"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1218 "HTMLCharacterReference.gperf" + {"⋾", "⋾"}, + {""}, {""}, {""}, +#line 1347 "HTMLCharacterReference.gperf" + {"⦵", "⦵"}, + {""}, {""}, {""}, {""}, +#line 2035 "HTMLCharacterReference.gperf" + {"∣", "∣"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1064 "HTMLCharacterReference.gperf" + {"µ", "µ"}, +#line 850 "HTMLCharacterReference.gperf" + {"⤝", "⤝"}, + {""}, {""}, {""}, {""}, +#line 841 "HTMLCharacterReference.gperf" + {"⟨", "⟨"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1963 "HTMLCharacterReference.gperf" + {"⇅", "⇅"}, + {""}, {""}, {""}, {""}, {""}, +#line 1772 "HTMLCharacterReference.gperf" + {"⫁", "⫁"}, + {""}, +#line 1313 "HTMLCharacterReference.gperf" + {"≤⃒", "≤⃒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1926 "HTMLCharacterReference.gperf" + {"û", "û"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1961 "HTMLCharacterReference.gperf" + {"↑", "↑"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1624 "HTMLCharacterReference.gperf" + {"⦔", "⦔"}, + {""}, {""}, +#line 1932 "HTMLCharacterReference.gperf" + {"⥮", "⥮"}, + {""}, +#line 234 "HTMLCharacterReference.gperf" + {"⩀", "⩀"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 2020 "HTMLCharacterReference.gperf" + {"в", "в"}, +#line 1945 "HTMLCharacterReference.gperf" + {"◸", "◸"}, + {""}, {""}, {""}, {""}, +#line 1293 "HTMLCharacterReference.gperf" + {"≸", "≸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1608 "HTMLCharacterReference.gperf" + {"⇄", "⇄"}, + {""}, {""}, {""}, {""}, +#line 1616 "HTMLCharacterReference.gperf" + {"⟧", "⟧"}, +#line 243 "HTMLCharacterReference.gperf" + {"Ç", "Ç"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 33 "HTMLCharacterReference.gperf" + {"æ", "æ"}, + {""}, {""}, +#line 1969 "HTMLCharacterReference.gperf" + {"↾", "↾"}, + {""}, {""}, +#line 292 "HTMLCharacterReference.gperf" + {"⩴", "⩴"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1543 "HTMLCharacterReference.gperf" + {"⌉", "⌉"}, + {""}, +#line 894 "HTMLCharacterReference.gperf" + {"⥡", "⥡"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 678 "HTMLCharacterReference.gperf" + {"⪌", "⪌"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1385 "HTMLCharacterReference.gperf" + {"Ø", "Ø"}, + {""}, +#line 620 "HTMLCharacterReference.gperf" + {"⪌", "⪌"}, + {""}, +#line 1459 "HTMLCharacterReference.gperf" + {"⪹", "⪹"}, + {""}, {""}, +#line 2019 "HTMLCharacterReference.gperf" + {"В", "В"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1533 "HTMLCharacterReference.gperf" + {"❳", "❳"}, +#line 1736 "HTMLCharacterReference.gperf" + {"⊑", "⊑"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1722 "HTMLCharacterReference.gperf" + {"/", "/"}, + {""}, +#line 1950 "HTMLCharacterReference.gperf" + {"⏟", "⏟"}, +#line 1778 "HTMLCharacterReference.gperf" + {"⊂", "⊂"}, +#line 1951 "HTMLCharacterReference.gperf" + {"⎵", "⎵"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1066 "HTMLCharacterReference.gperf" + {"*", "*"}, + {""}, +#line 1717 "HTMLCharacterReference.gperf" + {"⪪", "⪪"}, + {""}, {""}, {""}, {""}, {""}, +#line 1547 "HTMLCharacterReference.gperf" + {"⤷", "⤷"}, + {""}, {""}, {""}, {""}, +#line 236 "HTMLCharacterReference.gperf" + {"∩︀", "∩︀"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1335 "HTMLCharacterReference.gperf" + {"⨸", "⨸"}, + {""}, {""}, {""}, {""}, {""}, +#line 1351 "HTMLCharacterReference.gperf" + {"⦾", "⦾"}, + {""}, {""}, {""}, {""}, +#line 1540 "HTMLCharacterReference.gperf" + {"ř", "ř"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 794 "HTMLCharacterReference.gperf" + {"І", "І"}, +#line 1274 "HTMLCharacterReference.gperf" + {"⋢", "⋢"}, + {""}, {""}, {""}, +#line 1526 "HTMLCharacterReference.gperf" + {"⤜", "⤜"}, +#line 1193 "HTMLCharacterReference.gperf" + {"≫̸", "≫̸"}, + {""}, {""}, +#line 1191 "HTMLCharacterReference.gperf" + {"≱", "≱"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1453 "HTMLCharacterReference.gperf" + {"≼", "≼"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1640 "HTMLCharacterReference.gperf" + {"▸", "▸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 755 "HTMLCharacterReference.gperf" + {"Ƶ", "Ƶ"}, + {""}, {""}, +#line 1632 "HTMLCharacterReference.gperf" + {"↱", "↱"}, + {""}, {""}, +#line 1053 "HTMLCharacterReference.gperf" + {"⨩", "⨩"}, + {""}, {""}, {""}, {""}, {""}, +#line 146 "HTMLCharacterReference.gperf" + {"▾", "▾"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 683 "HTMLCharacterReference.gperf" + {"ˇ", "ˇ"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1359 "HTMLCharacterReference.gperf" + {"Ο", "Ο"}, + {""}, {""}, +#line 749 "HTMLCharacterReference.gperf" + {"ℑ", "ℑ"}, + {""}, +#line 1389 "HTMLCharacterReference.gperf" + {"õ", "õ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 688 "HTMLCharacterReference.gperf" + {"ъ", "ъ"}, + {""}, {""}, {""}, +#line 652 "HTMLCharacterReference.gperf" + {"𝕘", "𝕘"}, + {""}, {""}, {""}, +#line 1728 "HTMLCharacterReference.gperf" + {"♠", "♠"}, +#line 993 "HTMLCharacterReference.gperf" + {"𝕝", "𝕝"}, + {""}, +#line 2036 "HTMLCharacterReference.gperf" + {"|", "|"}, + {""}, {""}, +#line 518 "HTMLCharacterReference.gperf" + {"𝕖", "𝕖"}, + {""}, {""}, +#line 1779 "HTMLCharacterReference.gperf" + {"⊆", "⊆"}, +#line 1780 "HTMLCharacterReference.gperf" + {"⫅", "⫅"}, +#line 410 "HTMLCharacterReference.gperf" + {"𝕕", "𝕕"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 805 "HTMLCharacterReference.gperf" + {"𝕁", "𝕁"}, +#line 699 "HTMLCharacterReference.gperf" + {"…", "…"}, + {""}, {""}, {""}, +#line 75 "HTMLCharacterReference.gperf" + {"𝕒", "𝕒"}, +#line 915 "HTMLCharacterReference.gperf" + {"⥑", "⥑"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 577 "HTMLCharacterReference.gperf" + {"𝕗", "𝕗"}, + {""}, {""}, +#line 1898 "HTMLCharacterReference.gperf" + {"⨺", "⨺"}, + {""}, +#line 159 "HTMLCharacterReference.gperf" + {"𝕓", "𝕓"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 136 "HTMLCharacterReference.gperf" + {"★", "★"}, + {""}, {""}, +#line 2140 "HTMLCharacterReference.gperf" + {"𝕫", "𝕫"}, + {""}, {""}, +#line 1666 "HTMLCharacterReference.gperf" + {"с", "с"}, + {""}, +#line 711 "HTMLCharacterReference.gperf" + {"𝕙", "𝕙"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 409 "HTMLCharacterReference.gperf" + {"𝔻", "𝔻"}, + {""}, {""}, {""}, {""}, +#line 308 "HTMLCharacterReference.gperf" + {"𝕔", "𝕔"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 575 "HTMLCharacterReference.gperf" + {"ƒ", "ƒ"}, +#line 1413 "HTMLCharacterReference.gperf" + {"‱", "‱"}, + {""}, {""}, {""}, {""}, +#line 1830 "HTMLCharacterReference.gperf" + {"⫖", "⫖"}, + {""}, {""}, {""}, {""}, {""}, +#line 1902 "HTMLCharacterReference.gperf" + {"⨻", "⨻"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1488 "HTMLCharacterReference.gperf" + {"ℚ", "ℚ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1957 "HTMLCharacterReference.gperf" + {"𝕌", "𝕌"}, +#line 233 "HTMLCharacterReference.gperf" + {"⩇", "⩇"}, + {""}, {""}, {""}, +#line 806 "HTMLCharacterReference.gperf" + {"𝕛", "𝕛"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 992 "HTMLCharacterReference.gperf" + {"𝕃", "𝕃"}, + {""}, {""}, {""}, {""}, +#line 2114 "HTMLCharacterReference.gperf" + {"𝕐", "𝕐"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1352 "HTMLCharacterReference.gperf" + {"⦻", "⦻"}, +#line 2143 "HTMLCharacterReference.gperf" + {"‍", "‍"}, + {""}, {""}, {""}, {""}, +#line 1245 "HTMLCharacterReference.gperf" + {"∦", "∦"}, +#line 2115 "HTMLCharacterReference.gperf" + {"𝕪", "𝕪"}, + {""}, {""}, {""}, {""}, +#line 571 "HTMLCharacterReference.gperf" + {"fj", "fj"}, + {""}, {""}, +#line 1375 "HTMLCharacterReference.gperf" + {"ℴ", "ℴ"}, + {""}, +#line 42 "HTMLCharacterReference.gperf" + {"α", "α"}, + {""}, {""}, +#line 2039 "HTMLCharacterReference.gperf" + {" ", " "}, + {""}, +#line 1088 "HTMLCharacterReference.gperf" + {"∇", "∇"}, + {""}, {""}, {""}, {""}, +#line 778 "HTMLCharacterReference.gperf" + {"𝕚", "𝕚"}, +#line 277 "HTMLCharacterReference.gperf" + {"⊖", "⊖"}, + {""}, {""}, {""}, {""}, +#line 279 "HTMLCharacterReference.gperf" + {"⊗", "⊗"}, + {""}, +#line 1641 "HTMLCharacterReference.gperf" + {"⧎", "⧎"}, + {""}, +#line 1618 "HTMLCharacterReference.gperf" + {"ℝ", "ℝ"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1644 "HTMLCharacterReference.gperf" + {"℞", "℞"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 916 "HTMLCharacterReference.gperf" + {"⥠", "⥠"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 651 "HTMLCharacterReference.gperf" + {"𝔾", "𝔾"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1725 "HTMLCharacterReference.gperf" + {"𝕊", "𝕊"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 481 "HTMLCharacterReference.gperf" + {"Э", "Э"}, + {""}, {""}, {""}, +#line 445 "HTMLCharacterReference.gperf" + {"⥞", "⥞"}, + {""}, {""}, +#line 1327 "HTMLCharacterReference.gperf" + {"⊚", "⊚"}, + {""}, +#line 142 "HTMLCharacterReference.gperf" + {"⤍", "⤍"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1363 "HTMLCharacterReference.gperf" + {"𝕆", "𝕆"}, + {""}, +#line 707 "HTMLCharacterReference.gperf" + {"∻", "∻"}, + {""}, {""}, +#line 181 "HTMLCharacterReference.gperf" + {"┴", "┴"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1378 "HTMLCharacterReference.gperf" + {"⊶", "⊶"}, + {""}, {""}, +#line 2067 "HTMLCharacterReference.gperf" + {"𝕨", "𝕨"}, +#line 837 "HTMLCharacterReference.gperf" + {"λ", "λ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 576 "HTMLCharacterReference.gperf" + {"𝔽", "𝔽"}, + {""}, {""}, {""}, {""}, {""}, +#line 1353 "HTMLCharacterReference.gperf" + {"‾", "‾"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1555 "HTMLCharacterReference.gperf" + {"ℜ", "ℜ"}, +#line 1544 "HTMLCharacterReference.gperf" + {"}", "}"}, +#line 2003 "HTMLCharacterReference.gperf" + {"ϖ", "ϖ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1002 "HTMLCharacterReference.gperf" + {"⧫", "⧫"}, + {""}, {""}, {""}, {""}, +#line 1185 "HTMLCharacterReference.gperf" + {"∦", "∦"}, + {""}, +#line 1835 "HTMLCharacterReference.gperf" + {"⤪", "⤪"}, + {""}, +#line 1231 "HTMLCharacterReference.gperf" + {"⊂⃒", "⊂⃒"}, +#line 1180 "HTMLCharacterReference.gperf" + {"𝕟", "𝕟"}, + {""}, {""}, {""}, {""}, +#line 745 "HTMLCharacterReference.gperf" + {"ij", "ij"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1489 "HTMLCharacterReference.gperf" + {"𝕢", "𝕢"}, +#line 1620 "HTMLCharacterReference.gperf" + {"⨮", "⨮"}, + {""}, +#line 1799 "HTMLCharacterReference.gperf" + {"∋", "∋"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1896 "HTMLCharacterReference.gperf" + {"◬", "◬"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1873 "HTMLCharacterReference.gperf" + {"×", "×"}, +#line 198 "HTMLCharacterReference.gperf" + {"┼", "┼"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1723 "HTMLCharacterReference.gperf" + {"⧄", "⧄"}, +#line 1179 "HTMLCharacterReference.gperf" + {"ℕ", "ℕ"}, +#line 1657 "HTMLCharacterReference.gperf" + {"ş", "ş"}, + {""}, {""}, {""}, +#line 1078 "HTMLCharacterReference.gperf" + {"𝕄", "𝕄"}, + {""}, {""}, +#line 2038 "HTMLCharacterReference.gperf" + {"≀", "≀"}, + {""}, +#line 502 "HTMLCharacterReference.gperf" + {"Ē", "Ē"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1712 "HTMLCharacterReference.gperf" + {"∖", "∖"}, + {""}, +#line 180 "HTMLCharacterReference.gperf" + {"╨", "╨"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 196 "HTMLCharacterReference.gperf" + {"╫", "╫"}, +#line 147 "HTMLCharacterReference.gperf" + {"◂", "◂"}, + {""}, {""}, {""}, +#line 1329 "HTMLCharacterReference.gperf" + {"ô", "ô"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 573 "HTMLCharacterReference.gperf" + {"fl", "fl"}, + {""}, {""}, {""}, {""}, {""}, +#line 836 "HTMLCharacterReference.gperf" + {"Λ", "Λ"}, + {""}, {""}, +#line 1782 "HTMLCharacterReference.gperf" + {"⊊", "⊊"}, +#line 1783 "HTMLCharacterReference.gperf" + {"⫋", "⫋"}, + {""}, {""}, {""}, {""}, +#line 74 "HTMLCharacterReference.gperf" + {"𝔸", "𝔸"}, + {""}, +#line 557 "HTMLCharacterReference.gperf" + {"ⅇ", "ⅇ"}, + {""}, {""}, +#line 710 "HTMLCharacterReference.gperf" + {"ℍ", "ℍ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1575 "HTMLCharacterReference.gperf" + {"→", "→"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 844 "HTMLCharacterReference.gperf" + {"«", "«"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1060 "HTMLCharacterReference.gperf" + {"ℳ", "ℳ"}, + {""}, {""}, {""}, +#line 819 "HTMLCharacterReference.gperf" + {"к", "к"}, + {""}, +#line 1931 "HTMLCharacterReference.gperf" + {"ű", "ű"}, + {""}, {""}, {""}, +#line 583 "HTMLCharacterReference.gperf" + {"⨍", "⨍"}, + {""}, {""}, {""}, {""}, +#line 218 "HTMLCharacterReference.gperf" + {"⟈", "⟈"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1047 "HTMLCharacterReference.gperf" + {"↦", "↦"}, + {""}, {""}, +#line 1998 "HTMLCharacterReference.gperf" + {"⦜", "⦜"}, + {""}, {""}, {""}, +#line 1442 "HTMLCharacterReference.gperf" + {"ℙ", "ℙ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1876 "HTMLCharacterReference.gperf" + {"⨰", "⨰"}, +#line 1554 "HTMLCharacterReference.gperf" + {"ℛ", "ℛ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 984 "HTMLCharacterReference.gperf" + {"⟷", "⟷"}, + {""}, +#line 777 "HTMLCharacterReference.gperf" + {"𝕀", "𝕀"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2088 "HTMLCharacterReference.gperf" + {"𝕏", "𝕏"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1357 "HTMLCharacterReference.gperf" + {"Ω", "Ω"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2139 "HTMLCharacterReference.gperf" + {"ℤ", "ℤ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1628 "HTMLCharacterReference.gperf" + {"›", "›"}, + {""}, {""}, {""}, +#line 814 "HTMLCharacterReference.gperf" + {"κ", "κ"}, + {""}, +#line 912 "HTMLCharacterReference.gperf" + {"⊲", "⊲"}, +#line 713 "HTMLCharacterReference.gperf" + {"─", "─"}, + {""}, +#line 41 "HTMLCharacterReference.gperf" + {"Α", "Α"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 818 "HTMLCharacterReference.gperf" + {"К", "К"}, + {""}, {""}, {""}, {""}, +#line 1919 "HTMLCharacterReference.gperf" + {"↑", "↑"}, + {""}, {""}, {""}, +#line 914 "HTMLCharacterReference.gperf" + {"⊴", "⊴"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 158 "HTMLCharacterReference.gperf" + {"𝔹", "𝔹"}, +#line 1859 "HTMLCharacterReference.gperf" + {"≈", "≈"}, + {""}, {""}, {""}, +#line 306 "HTMLCharacterReference.gperf" + {"∮", "∮"}, +#line 1662 "HTMLCharacterReference.gperf" + {"⋩", "⋩"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1784 "HTMLCharacterReference.gperf" + {"⫇", "⫇"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1698 "HTMLCharacterReference.gperf" + {"ς", "ς"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1791 "HTMLCharacterReference.gperf" + {"⪰", "⪰"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 692 "HTMLCharacterReference.gperf" + {"↭", "↭"}, + {""}, {""}, +#line 983 "HTMLCharacterReference.gperf" + {"⟺", "⟺"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1858 "HTMLCharacterReference.gperf" + {"ϑ", "ϑ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1676 "HTMLCharacterReference.gperf" + {"⤩", "⤩"}, + {""}, {""}, {""}, {""}, {""}, +#line 920 "HTMLCharacterReference.gperf" + {"⥒", "⥒"}, + {""}, +#line 813 "HTMLCharacterReference.gperf" + {"Κ", "Κ"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 756 "HTMLCharacterReference.gperf" + {"⇒", "⇒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1569 "HTMLCharacterReference.gperf" + {"⇀", "⇀"}, +#line 100 "HTMLCharacterReference.gperf" + {"϶", "϶"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 344 "HTMLCharacterReference.gperf" + {"⋎", "⋎"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 471 "HTMLCharacterReference.gperf" + {"⟿", "⟿"}, + {""}, +#line 2066 "HTMLCharacterReference.gperf" + {"𝕎", "𝕎"}, + {""}, {""}, {""}, {""}, {""}, +#line 1570 "HTMLCharacterReference.gperf" + {"⥬", "⥬"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1067 "HTMLCharacterReference.gperf" + {"⫰", "⫰"}, + {""}, {""}, +#line 1718 "HTMLCharacterReference.gperf" + {"⪬", "⪬"}, +#line 287 "HTMLCharacterReference.gperf" + {"’", "’"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1531 "HTMLCharacterReference.gperf" + {"⤏", "⤏"}, + {""}, {""}, {""}, {""}, +#line 122 "HTMLCharacterReference.gperf" + {"ℬ", "ℬ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1966 "HTMLCharacterReference.gperf" + {"↕", "↕"}, +#line 817 "HTMLCharacterReference.gperf" + {"ķ", "ķ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1636 "HTMLCharacterReference.gperf" + {"⋌", "⋌"}, + {""}, {""}, {""}, {""}, +#line 1052 "HTMLCharacterReference.gperf" + {"▮", "▮"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1829 "HTMLCharacterReference.gperf" + {"⫔", "⫔"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 568 "HTMLCharacterReference.gperf" + {"fi", "fi"}, +#line 1238 "HTMLCharacterReference.gperf" + {"⊉", "⊉"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1764 "HTMLCharacterReference.gperf" + {"ϕ", "ϕ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 286 "HTMLCharacterReference.gperf" + {"”", "”"}, + {""}, {""}, {""}, +#line 1097 "HTMLCharacterReference.gperf" + {"♮", "♮"}, + {""}, {""}, +#line 1317 "HTMLCharacterReference.gperf" + {"⊵⃒", "⊵⃒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 97 "HTMLCharacterReference.gperf" + {"∳", "∳"}, +#line 709 "HTMLCharacterReference.gperf" + {"↪", "↪"}, + {""}, {""}, +#line 1240 "HTMLCharacterReference.gperf" + {"≄", "≄"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1922 "HTMLCharacterReference.gperf" + {"ў", "ў"}, +#line 816 "HTMLCharacterReference.gperf" + {"Ķ", "Ķ"}, +#line 1198 "HTMLCharacterReference.gperf" + {"≏̸", "≏̸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1222 "HTMLCharacterReference.gperf" + {"⋠", "⋠"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 307 "HTMLCharacterReference.gperf" + {"ℂ", "ℂ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1634 "HTMLCharacterReference.gperf" + {"’", "’"}, +#line 1635 "HTMLCharacterReference.gperf" + {"’", "’"}, + {""}, {""}, {""}, +#line 271 "HTMLCharacterReference.gperf" + {"⊛", "⊛"}, + {""}, {""}, {""}, +#line 351 "HTMLCharacterReference.gperf" + {"∲", "∲"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1719 "HTMLCharacterReference.gperf" + {"⪬︀", "⪬︀"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 141 "HTMLCharacterReference.gperf" + {"⋀", "⋀"}, +#line 1716 "HTMLCharacterReference.gperf" + {"⌣", "⌣"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 966 "HTMLCharacterReference.gperf" + {"ŀ", "ŀ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1184 "HTMLCharacterReference.gperf" + {"≭", "≭"}, +#line 1653 "HTMLCharacterReference.gperf" + {"≽", "≽"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1579 "HTMLCharacterReference.gperf" + {"⇄", "⇄"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1863 "HTMLCharacterReference.gperf" + {" ", " "}, +#line 1338 "HTMLCharacterReference.gperf" + {"Œ", "Œ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2089 "HTMLCharacterReference.gperf" + {"𝕩", "𝕩"}, + {""}, {""}, {""}, +#line 911 "HTMLCharacterReference.gperf" + {"⋋", "⋋"}, + {""}, {""}, {""}, +#line 834 "HTMLCharacterReference.gperf" + {"⦴", "⦴"}, + {""}, {""}, +#line 1233 "HTMLCharacterReference.gperf" + {"⊁", "⊁"}, + {""}, {""}, +#line 1899 "HTMLCharacterReference.gperf" + {"⃛", "◌⃛"}, +#line 1901 "HTMLCharacterReference.gperf" + {"⧍", "⧍"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1468 "HTMLCharacterReference.gperf" + {"⋨", "⋨"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1840 "HTMLCharacterReference.gperf" + {"τ", "τ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1606 "HTMLCharacterReference.gperf" + {"˚", "˚"}, + {""}, +#line 1874 "HTMLCharacterReference.gperf" + {"⊠", "⊠"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1176 "HTMLCharacterReference.gperf" + {"∤", "∤"}, + {""}, +#line 1724 "HTMLCharacterReference.gperf" + {"⌿", "⌿"}, + {""}, +#line 752 "HTMLCharacterReference.gperf" + {"ℑ", "ℑ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1954 "HTMLCharacterReference.gperf" + {"⊎", "⊎"}, +#line 521 "HTMLCharacterReference.gperf" + {"⩱", "⩱"}, +#line 965 "HTMLCharacterReference.gperf" + {"Ŀ", "Ŀ"}, + {""}, {""}, +#line 111 "HTMLCharacterReference.gperf" + {"⎶", "⎶"}, +#line 744 "HTMLCharacterReference.gperf" + {"IJ", "IJ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1068 "HTMLCharacterReference.gperf" + {"·", "·"}, +#line 1928 "HTMLCharacterReference.gperf" + {"у", "у"}, +#line 1405 "HTMLCharacterReference.gperf" + {"∂", "∂"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1055 "HTMLCharacterReference.gperf" + {"м", "м"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1326 "HTMLCharacterReference.gperf" + {"⊛", "⊛"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1428 "HTMLCharacterReference.gperf" + {"+", "+"}, + {""}, +#line 1427 "HTMLCharacterReference.gperf" + {"ℏ", "ℏ"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1400 "HTMLCharacterReference.gperf" + {"∥", "∥"}, + {""}, +#line 643 "HTMLCharacterReference.gperf" + {"⪤", "⪤"}, + {""}, +#line 2030 "HTMLCharacterReference.gperf" + {"⋮", "⋮"}, + {""}, {""}, {""}, +#line 32 "HTMLCharacterReference.gperf" + {"Æ", "Æ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1602 "HTMLCharacterReference.gperf" + {"↾", "↾"}, + {""}, {""}, +#line 1124 "HTMLCharacterReference.gperf" + {"≢", "≢"}, +#line 1334 "HTMLCharacterReference.gperf" + {"ő", "ő"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 2046 "HTMLCharacterReference.gperf" + {"𝕧", "𝕧"}, + {""}, {""}, {""}, {""}, {""}, +#line 1685 "HTMLCharacterReference.gperf" + {"щ", "щ"}, + {""}, {""}, {""}, {""}, +#line 1601 "HTMLCharacterReference.gperf" + {"⥜", "⥜"}, + {""}, {""}, {""}, {""}, +#line 565 "HTMLCharacterReference.gperf" + {"ffl", "ffl"}, + {""}, +#line 1875 "HTMLCharacterReference.gperf" + {"⨱", "⨱"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1609 "HTMLCharacterReference.gperf" + {"⇌", "⇌"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1947 "HTMLCharacterReference.gperf" + {"ū", "ū"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1401 "HTMLCharacterReference.gperf" + {"¶", "¶"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1042 "HTMLCharacterReference.gperf" + {"¯", "¯"}, +#line 2045 "HTMLCharacterReference.gperf" + {"𝕍", "𝕍"}, + {""}, {""}, {""}, {""}, {""}, +#line 967 "HTMLCharacterReference.gperf" + {"⎰", "⎰"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1839 "HTMLCharacterReference.gperf" + {"Τ", "Τ"}, + {""}, +#line 1436 "HTMLCharacterReference.gperf" + {"±", "±"}, + {""}, {""}, {""}, {""}, {""}, +#line 1711 "HTMLCharacterReference.gperf" + {"∘", "∘"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 555 "HTMLCharacterReference.gperf" + {"∃", "∃"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1962 "HTMLCharacterReference.gperf" + {"⤒", "⤒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 40 "HTMLCharacterReference.gperf" + {"ℵ", "ℵ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1786 "HTMLCharacterReference.gperf" + {"⫓", "⫓"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1964 "HTMLCharacterReference.gperf" + {"↕", "↕"}, + {""}, {""}, +#line 1434 "HTMLCharacterReference.gperf" + {"⩲", "⩲"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1448 "HTMLCharacterReference.gperf" + {"≼", "≼"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 750 "HTMLCharacterReference.gperf" + {"ⅈ", "ⅈ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1911 "HTMLCharacterReference.gperf" + {"ŧ", "ŧ"}, + {""}, {""}, {""}, {""}, +#line 1439 "HTMLCharacterReference.gperf" + {"±", "±"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 360 "HTMLCharacterReference.gperf" + {"‐", "‐"}, + {""}, {""}, {""}, {""}, {""}, +#line 362 "HTMLCharacterReference.gperf" + {"⊣", "⊣"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1726 "HTMLCharacterReference.gperf" + {"𝕤", "𝕤"}, + {""}, {""}, {""}, {""}, {""}, +#line 1438 "HTMLCharacterReference.gperf" + {"⨧", "⨧"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1847 "HTMLCharacterReference.gperf" + {"т", "т"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 361 "HTMLCharacterReference.gperf" + {"⫤", "⫤"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1232 "HTMLCharacterReference.gperf" + {"⊈", "⊈"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 504 "HTMLCharacterReference.gperf" + {"∅", "∅"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1528 "HTMLCharacterReference.gperf" + {"∶", "∶"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1816 "HTMLCharacterReference.gperf" + {"⫗", "⫗"}, +#line 1593 "HTMLCharacterReference.gperf" + {"⊢", "⊢"}, + {""}, +#line 1549 "HTMLCharacterReference.gperf" + {"”", "”"}, +#line 1550 "HTMLCharacterReference.gperf" + {"”", "”"}, +#line 108 "HTMLCharacterReference.gperf" + {"⌅", "⌅"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1595 "HTMLCharacterReference.gperf" + {"⥛", "⥛"}, + {""}, {""}, {""}, {""}, {""}, +#line 1069 "HTMLCharacterReference.gperf" + {"−", "−"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 905 "HTMLCharacterReference.gperf" + {"⇋", "⇋"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 937 "HTMLCharacterReference.gperf" + {"⋚", "⋚"}, + {""}, {""}, +#line 1513 "HTMLCharacterReference.gperf" + {"→", "→"}, + {""}, {""}, {""}, {""}, {""}, +#line 1753 "HTMLCharacterReference.gperf" + {"▪", "▪"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1430 "HTMLCharacterReference.gperf" + {"⊞", "⊞"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1910 "HTMLCharacterReference.gperf" + {"Ŧ", "Ŧ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 517 "HTMLCharacterReference.gperf" + {"𝔼", "𝔼"}, + {""}, {""}, {""}, {""}, +#line 918 "HTMLCharacterReference.gperf" + {"⥘", "⥘"}, +#line 1520 "HTMLCharacterReference.gperf" + {"↬", "↬"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1432 "HTMLCharacterReference.gperf" + {"∔", "∔"}, + {""}, {""}, {""}, {""}, {""}, +#line 1521 "HTMLCharacterReference.gperf" + {"⥅", "⥅"}, + {""}, {""}, {""}, +#line 903 "HTMLCharacterReference.gperf" + {"↔", "↔"}, +#line 904 "HTMLCharacterReference.gperf" + {"⇆", "⇆"}, + {""}, {""}, {""}, +#line 1576 "HTMLCharacterReference.gperf" + {"⇒", "⇒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1846 "HTMLCharacterReference.gperf" + {"Т", "Т"}, + {""}, {""}, {""}, {""}, {""}, +#line 505 "HTMLCharacterReference.gperf" + {"∅", "∅"}, + {""}, {""}, +#line 1408 "HTMLCharacterReference.gperf" + {"п", "п"}, + {""}, {""}, {""}, +#line 1071 "HTMLCharacterReference.gperf" + {"∸", "∸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1797 "HTMLCharacterReference.gperf" + {"⋩", "⋩"}, +#line 420 "HTMLCharacterReference.gperf" + {"⌆", "⌆"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1517 "HTMLCharacterReference.gperf" + {"⤳", "⤳"}, + {""}, {""}, {""}, {""}, {""}, +#line 1524 "HTMLCharacterReference.gperf" + {"↣", "↣"}, + {""}, +#line 1594 "HTMLCharacterReference.gperf" + {"↦", "↦"}, + {""}, {""}, {""}, {""}, {""}, +#line 1331 "HTMLCharacterReference.gperf" + {"о", "о"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1433 "HTMLCharacterReference.gperf" + {"⨥", "⨥"}, + {""}, {""}, {""}, {""}, {""}, +#line 1845 "HTMLCharacterReference.gperf" + {"ţ", "ţ"}, + {""}, {""}, {""}, {""}, +#line 603 "HTMLCharacterReference.gperf" + {"ǵ", "ǵ"}, + {""}, {""}, {""}, {""}, +#line 833 "HTMLCharacterReference.gperf" + {"ĺ", "ĺ"}, + {""}, {""}, {""}, {""}, +#line 473 "HTMLCharacterReference.gperf" + {"é", "é"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 704 "HTMLCharacterReference.gperf" + {"⤥", "⤥"}, + {""}, {""}, {""}, +#line 1714 "HTMLCharacterReference.gperf" + {"⧤", "⧤"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 902 "HTMLCharacterReference.gperf" + {"⇔", "⇔"}, +#line 21 "HTMLCharacterReference.gperf" + {"á", "á"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 988 "HTMLCharacterReference.gperf" + {"⟶", "⟶"}, +#line 2123 "HTMLCharacterReference.gperf" + {"ź", "ź"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 227 "HTMLCharacterReference.gperf" + {"ć", "ć"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 828 "HTMLCharacterReference.gperf" + {"𝕜", "𝕜"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 703 "HTMLCharacterReference.gperf" + {"ℋ", "ℋ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1356 "HTMLCharacterReference.gperf" + {"ō", "ō"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1915 "HTMLCharacterReference.gperf" + {"Ú", "Ú"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 832 "HTMLCharacterReference.gperf" + {"Ĺ", "Ĺ"}, + {""}, {""}, {""}, {""}, +#line 2101 "HTMLCharacterReference.gperf" + {"Ý", "Ý"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 39 "HTMLCharacterReference.gperf" + {"ℵ", "ℵ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 2102 "HTMLCharacterReference.gperf" + {"ý", "ý"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 724 "HTMLCharacterReference.gperf" + {"í", "í"}, + {""}, {""}, +#line 1912 "HTMLCharacterReference.gperf" + {"≬", "≬"}, +#line 987 "HTMLCharacterReference.gperf" + {"⟹", "⟹"}, + {""}, {""}, +#line 1583 "HTMLCharacterReference.gperf" + {"⥝", "⥝"}, + {""}, {""}, +#line 1501 "HTMLCharacterReference.gperf" + {"Ŕ", "Ŕ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 827 "HTMLCharacterReference.gperf" + {"𝕂", "𝕂"}, + {""}, {""}, {""}, {""}, {""}, +#line 1564 "HTMLCharacterReference.gperf" + {"⌋", "⌋"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1844 "HTMLCharacterReference.gperf" + {"Ţ", "Ţ"}, + {""}, {""}, {""}, {""}, +#line 1785 "HTMLCharacterReference.gperf" + {"⫕", "⫕"}, + {""}, {""}, {""}, {""}, +#line 1645 "HTMLCharacterReference.gperf" + {"Ś", "Ś"}, + {""}, {""}, {""}, {""}, {""}, +#line 1607 "HTMLCharacterReference.gperf" + {"≓", "≓"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 239 "HTMLCharacterReference.gperf" + {"ℭ", "ℭ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1324 "HTMLCharacterReference.gperf" + {"Ó", "Ó"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1189 "HTMLCharacterReference.gperf" + {"∄", "∄"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1122 "HTMLCharacterReference.gperf" + {"​", "​"}, + {""}, {""}, {""}, +#line 1072 "HTMLCharacterReference.gperf" + {"⨪", "⨪"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1090 "HTMLCharacterReference.gperf" + {"ń", "ń"}, + {""}, {""}, {""}, {""}, +#line 939 "HTMLCharacterReference.gperf" + {"≶", "≶"}, + {""}, {""}, {""}, {""}, {""}, +#line 107 "HTMLCharacterReference.gperf" + {"⌆", "⌆"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1865 "HTMLCharacterReference.gperf" + {"∼", "∼"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1546 "HTMLCharacterReference.gperf" + {"р", "р"}, + {""}, +#line 1515 "HTMLCharacterReference.gperf" + {"⇥", "⇥"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1503 "HTMLCharacterReference.gperf" + {"√", "√"}, +#line 1089 "HTMLCharacterReference.gperf" + {"Ń", "Ń"}, + {""}, {""}, {""}, +#line 1999 "HTMLCharacterReference.gperf" + {"ϵ", "ϵ"}, + {""}, {""}, {""}, {""}, +#line 906 "HTMLCharacterReference.gperf" + {"↭", "↭"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 901 "HTMLCharacterReference.gperf" + {"↔", "↔"}, + {""}, +#line 1223 "HTMLCharacterReference.gperf" + {"∌", "∌"}, + {""}, {""}, {""}, +#line 1236 "HTMLCharacterReference.gperf" + {"≿̸", "≿̸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 138 "HTMLCharacterReference.gperf" + {"△", "△"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 20 "HTMLCharacterReference.gperf" + {"Á", "Á"}, + {""}, {""}, {""}, {""}, {""}, +#line 1123 "HTMLCharacterReference.gperf" + {"​", "​"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1715 "HTMLCharacterReference.gperf" + {"∣", "∣"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1584 "HTMLCharacterReference.gperf" + {"⇂", "⇂"}, + {""}, +#line 1574 "HTMLCharacterReference.gperf" + {"⟩", "⟩"}, +#line 1585 "HTMLCharacterReference.gperf" + {"⥕", "⥕"}, + {""}, {""}, {""}, {""}, +#line 1086 "HTMLCharacterReference.gperf" + {"⊸", "⊸"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1763 "HTMLCharacterReference.gperf" + {"ϵ", "ϵ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1070 "HTMLCharacterReference.gperf" + {"⊟", "⊟"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1535 "HTMLCharacterReference.gperf" + {"]", "]"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 723 "HTMLCharacterReference.gperf" + {"Í", "Í"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 109 "HTMLCharacterReference.gperf" + {"⌅", "⌅"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1514 "HTMLCharacterReference.gperf" + {"⥵", "⥵"}, + {""}, +#line 1837 "HTMLCharacterReference.gperf" + {" ", "␉"}, + {""}, {""}, {""}, {""}, {""}, +#line 1268 "HTMLCharacterReference.gperf" + {"∦", "∦"}, + {""}, +#line 2122 "HTMLCharacterReference.gperf" + {"Ź", "Ź"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 986 "HTMLCharacterReference.gperf" + {"⟶", "⟶"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1234 "HTMLCharacterReference.gperf" + {"⪰̸", "⪰̸"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1838 "HTMLCharacterReference.gperf" + {"⌖", "⌖"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2001 "HTMLCharacterReference.gperf" + {"∅", "∅"}, + {""}, {""}, {""}, {""}, {""}, +#line 1542 "HTMLCharacterReference.gperf" + {"ŗ", "ŗ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 134 "HTMLCharacterReference.gperf" + {"⨂", "⨂"}, +#line 815 "HTMLCharacterReference.gperf" + {"ϰ", "ϰ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1048 "HTMLCharacterReference.gperf" + {"↦", "↦"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1197 "HTMLCharacterReference.gperf" + {"≎̸", "≎̸"}, + {""}, +#line 1600 "HTMLCharacterReference.gperf" + {"⥏", "⥏"}, + {""}, +#line 769 "HTMLCharacterReference.gperf" + {"⨗", "⨗"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1958 "HTMLCharacterReference.gperf" + {"𝕦", "𝕦"}, + {""}, {""}, {""}, +#line 155 "HTMLCharacterReference.gperf" + {"≡⃥", "≡⃥"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1079 "HTMLCharacterReference.gperf" + {"𝕞", "𝕞"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1516 "HTMLCharacterReference.gperf" + {"⤠", "⤠"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1551 "HTMLCharacterReference.gperf" + {"↳", "↳"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1975 "HTMLCharacterReference.gperf" + {"ϒ", "ϒ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1402 "HTMLCharacterReference.gperf" + {"∥", "∥"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1849 "HTMLCharacterReference.gperf" + {"⌕", "⌕"}, + {""}, {""}, +#line 899 "HTMLCharacterReference.gperf" + {"↼", "↼"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1908 "HTMLCharacterReference.gperf" + {"Ћ", "Ћ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 968 "HTMLCharacterReference.gperf" + {"⎰", "⎰"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1506 "HTMLCharacterReference.gperf" + {"⟩", "⟩"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1160 "HTMLCharacterReference.gperf" + {"⇍", "⇍"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1627 "HTMLCharacterReference.gperf" + {"⇛", "⇛"}, + {""}, {""}, {""}, +#line 226 "HTMLCharacterReference.gperf" + {"Ć", "Ć"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1909 "HTMLCharacterReference.gperf" + {"ћ", "ћ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1051 "HTMLCharacterReference.gperf" + {"↥", "↥"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 942 "HTMLCharacterReference.gperf" + {"≲", "≲"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1386 "HTMLCharacterReference.gperf" + {"ø", "ø"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1663 "HTMLCharacterReference.gperf" + {"⨓", "⨓"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1259 "HTMLCharacterReference.gperf" + {"↛", "↛"}, +#line 1798 "HTMLCharacterReference.gperf" + {"≿", "≿"}, + {""}, {""}, +#line 962 "HTMLCharacterReference.gperf" + {"⇚", "⇚"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1461 "HTMLCharacterReference.gperf" + {"⋨", "⋨"}, + {""}, {""}, {""}, +#line 137 "HTMLCharacterReference.gperf" + {"▽", "▽"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1527 "HTMLCharacterReference.gperf" + {"⤚", "⤚"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1162 "HTMLCharacterReference.gperf" + {"⇎", "⇎"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1508 "HTMLCharacterReference.gperf" + {"⦥", "⦥"}, + {""}, {""}, {""}, {""}, +#line 1425 "HTMLCharacterReference.gperf" + {"ℏ", "ℏ"}, + {""}, {""}, {""}, +#line 1529 "HTMLCharacterReference.gperf" + {"ℚ", "ℚ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1861 "HTMLCharacterReference.gperf" + {"  ", "  "}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1360 "HTMLCharacterReference.gperf" + {"ο", "ο"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1883 "HTMLCharacterReference.gperf" + {"𝕥", "𝕥"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 742 "HTMLCharacterReference.gperf" + {"⧜", "⧜"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1339 "HTMLCharacterReference.gperf" + {"œ", "œ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1113 "HTMLCharacterReference.gperf" + {"–", "–"}, + {""}, {""}, {""}, {""}, +#line 1161 "HTMLCharacterReference.gperf" + {"↚", "↚"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1404 "HTMLCharacterReference.gperf" + {"⫽", "⫽"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1049 "HTMLCharacterReference.gperf" + {"↧", "↧"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2000 "HTMLCharacterReference.gperf" + {"ϰ", "ϰ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1500 "HTMLCharacterReference.gperf" + {"∽̱", "∽̱"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 945 "HTMLCharacterReference.gperf" + {"⥼", "⥼"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 380 "HTMLCharacterReference.gperf" + {"⥿", "⥿"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1603 "HTMLCharacterReference.gperf" + {"⥔", "⥔"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1887 "HTMLCharacterReference.gperf" + {"™", "™"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1311 "HTMLCharacterReference.gperf" + {"⧞", "⧞"}, + {""}, +#line 1882 "HTMLCharacterReference.gperf" + {"𝕋", "𝕋"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1443 "HTMLCharacterReference.gperf" + {"𝕡", "𝕡"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1224 "HTMLCharacterReference.gperf" + {"⋫", "⋫"}, + {""}, {""}, +#line 1225 "HTMLCharacterReference.gperf" + {"⧐̸", "⧐̸"}, + {""}, +#line 1226 "HTMLCharacterReference.gperf" + {"⋭", "⋭"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1163 "HTMLCharacterReference.gperf" + {"↮", "↮"}, + {""}, {""}, {""}, +#line 1507 "HTMLCharacterReference.gperf" + {"⦒", "⦒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 101 "HTMLCharacterReference.gperf" + {"‵", "‵"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1364 "HTMLCharacterReference.gperf" + {"𝕠", "𝕠"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 898 "HTMLCharacterReference.gperf" + {"↽", "↽"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1518 "HTMLCharacterReference.gperf" + {"⤞", "⤞"}, + {""}, {""}, {""}, {""}, +#line 1509 "HTMLCharacterReference.gperf" + {"⟩", "⟩"}, + {""}, {""}, {""}, +#line 1836 "HTMLCharacterReference.gperf" + {"ß", "ß"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1429 "HTMLCharacterReference.gperf" + {"⨣", "⨣"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1646 "HTMLCharacterReference.gperf" + {"ś", "ś"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 535 "HTMLCharacterReference.gperf" + {"⇌", "⇌"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 907 "HTMLCharacterReference.gperf" + {"⥎", "⥎"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 314 "HTMLCharacterReference.gperf" + {"∳", "∳"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 356 "HTMLCharacterReference.gperf" + {"ℸ", "ℸ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2023 "HTMLCharacterReference.gperf" + {"⊨", "⊨"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 507 "HTMLCharacterReference.gperf" + {"∅", "∅"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2011 "HTMLCharacterReference.gperf" + {"⊋︀", "⊋︀"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1914 "HTMLCharacterReference.gperf" + {"↠", "↠"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 472 "HTMLCharacterReference.gperf" + {"É", "É"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1358 "HTMLCharacterReference.gperf" + {"ω", "ω"}, + {""}, {""}, {""}, {""}, +#line 2021 "HTMLCharacterReference.gperf" + {"⊫", "⊫"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1121 "HTMLCharacterReference.gperf" + {"​", "​"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1619 "HTMLCharacterReference.gperf" + {"𝕣", "𝕣"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 2012 "HTMLCharacterReference.gperf" + {"⫌︀", "⫌︀"}, + {""}, {""}, {""}, {""}, +#line 1860 "HTMLCharacterReference.gperf" + {"∼", "∼"}, +#line 1970 "HTMLCharacterReference.gperf" + {"⊎", "⊎"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 854 "HTMLCharacterReference.gperf" + {"⥳", "⥳"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 691 "HTMLCharacterReference.gperf" + {"⥈", "⥈"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1473 "HTMLCharacterReference.gperf" + {"⌓", "⌓"}, + {""}, {""}, +#line 1376 "HTMLCharacterReference.gperf" + {"ª", "ª"}, + {""}, {""}, +#line 1625 "HTMLCharacterReference.gperf" + {"⨒", "⨒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 880 "HTMLCharacterReference.gperf" + {"⥧", "⥧"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 913 "HTMLCharacterReference.gperf" + {"⧏", "⧏"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 804 "HTMLCharacterReference.gperf" + {"ȷ", "ȷ"}, + {""}, {""}, {""}, {""}, {""}, +#line 1815 "HTMLCharacterReference.gperf" + {"⟉", "⟉"}, + {""}, {""}, +#line 754 "HTMLCharacterReference.gperf" + {"⊷", "⊷"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1578 "HTMLCharacterReference.gperf" + {"⇥", "⇥"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 753 "HTMLCharacterReference.gperf" + {"ı", "ı"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1597 "HTMLCharacterReference.gperf" + {"⊳", "⊳"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 2024 "HTMLCharacterReference.gperf" + {"⊢", "⊢"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 397 "HTMLCharacterReference.gperf" + {"ⅆ", "ⅆ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 284 "HTMLCharacterReference.gperf" + {"⧂", "⧂"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 443 "HTMLCharacterReference.gperf" + {"⇂", "⇂"}, + {""}, {""}, {""}, {""}, {""}, +#line 1058 "HTMLCharacterReference.gperf" + {"∡", "∡"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 386 "HTMLCharacterReference.gperf" + {"´", "´"}, + {""}, {""}, {""}, {""}, +#line 1462 "HTMLCharacterReference.gperf" + {"≾", "≾"}, + {""}, {""}, +#line 387 "HTMLCharacterReference.gperf" + {"˙", "˙"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 2022 "HTMLCharacterReference.gperf" + {"⊩", "⊩"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 448 "HTMLCharacterReference.gperf" + {"⥟", "⥟"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 388 "HTMLCharacterReference.gperf" + {"˝", "˝"}, + {""}, +#line 1592 "HTMLCharacterReference.gperf" + {"↝", "↝"}, + {""}, +#line 2025 "HTMLCharacterReference.gperf" + {"⫦", "⫦"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1361 "HTMLCharacterReference.gperf" + {"⦶", "⦶"}, + {""}, {""}, {""}, {""}, {""}, +#line 1604 "HTMLCharacterReference.gperf" + {"⇀", "⇀"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1044 "HTMLCharacterReference.gperf" + {"✠", "✠"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1510 "HTMLCharacterReference.gperf" + {"»", "»"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1039 "HTMLCharacterReference.gperf" + {"⥦", "⥦"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1471 "HTMLCharacterReference.gperf" + {"⌮", "⌮"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 705 "HTMLCharacterReference.gperf" + {"⤦", "⤦"}, + {""}, +#line 2009 "HTMLCharacterReference.gperf" + {"⊊︀", "⊊︀"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1581 "HTMLCharacterReference.gperf" + {"⌉", "⌉"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1590 "HTMLCharacterReference.gperf" + {"⇌", "⇌"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1525 "HTMLCharacterReference.gperf" + {"↝", "↝"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1319 "HTMLCharacterReference.gperf" + {"⤣", "⤣"}, + {""}, {""}, +#line 1403 "HTMLCharacterReference.gperf" + {"⫳", "⫳"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 449 "HTMLCharacterReference.gperf" + {"⇁", "⇁"}, + {""}, {""}, +#line 450 "HTMLCharacterReference.gperf" + {"⥗", "⥗"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1589 "HTMLCharacterReference.gperf" + {"⇄", "⇄"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1884 "HTMLCharacterReference.gperf" + {"⫚", "⫚"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1441 "HTMLCharacterReference.gperf" + {"⨕", "⨕"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1235 "HTMLCharacterReference.gperf" + {"⋡", "⋡"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1369 "HTMLCharacterReference.gperf" + {"⊕", "⊕"}, + {""}, +#line 2010 "HTMLCharacterReference.gperf" + {"⫋︀", "⫋︀"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 102 "HTMLCharacterReference.gperf" + {"∽", "∽"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1916 "HTMLCharacterReference.gperf" + {"ú", "ú"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 559 "HTMLCharacterReference.gperf" + {"≒", "≒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1315 "HTMLCharacterReference.gperf" + {"⊴⃒", "⊴⃒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1889 "HTMLCharacterReference.gperf" + {"▵", "▵"}, +#line 1893 "HTMLCharacterReference.gperf" + {"≜", "≜"}, + {""}, {""}, +#line 1891 "HTMLCharacterReference.gperf" + {"◃", "◃"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1892 "HTMLCharacterReference.gperf" + {"⊴", "⊴"}, + {""}, {""}, +#line 1948 "HTMLCharacterReference.gperf" + {"¨", "¨"}, + {""}, {""}, {""}, {""}, +#line 1362 "HTMLCharacterReference.gperf" + {"⊖", "⊖"}, + {""}, {""}, {""}, {""}, +#line 1890 "HTMLCharacterReference.gperf" + {"▿", "▿"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1586 "HTMLCharacterReference.gperf" + {"⌋", "⌋"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2007 "HTMLCharacterReference.gperf" + {"ϱ", "ϱ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1258 "HTMLCharacterReference.gperf" + {"⇏", "⇏"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1869 "HTMLCharacterReference.gperf" + {"˜", "˜"}, + {""}, {""}, +#line 1504 "HTMLCharacterReference.gperf" + {"⦳", "⦳"}, + {""}, {""}, {""}, +#line 1115 "HTMLCharacterReference.gperf" + {"⤤", "⤤"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1045 "HTMLCharacterReference.gperf" + {"✠", "✠"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 442 "HTMLCharacterReference.gperf" + {"⇃", "⇃"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1426 "HTMLCharacterReference.gperf" + {"ℎ", "ℎ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 569 "HTMLCharacterReference.gperf" + {"◼", "◼"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1435 "HTMLCharacterReference.gperf" + {"±", "±"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1406 "HTMLCharacterReference.gperf" + {"∂", "∂"}, + {""}, +#line 103 "HTMLCharacterReference.gperf" + {"⋍", "⋍"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1178 "HTMLCharacterReference.gperf" + {" ", " "}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1073 "HTMLCharacterReference.gperf" + {"∓", "∓"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1894 "HTMLCharacterReference.gperf" + {"▹", "▹"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1868 "HTMLCharacterReference.gperf" + {"∼", "∼"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1611 "HTMLCharacterReference.gperf" + {"⎱", "⎱"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1043 "HTMLCharacterReference.gperf" + {"♂", "♂"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1587 "HTMLCharacterReference.gperf" + {"⇁", "⇁"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1857 "HTMLCharacterReference.gperf" + {"ϑ", "ϑ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1588 "HTMLCharacterReference.gperf" + {"⇀", "⇀"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 390 "HTMLCharacterReference.gperf" + {"˜", "˜"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1870 "HTMLCharacterReference.gperf" + {"≃", "≃"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, +#line 1325 "HTMLCharacterReference.gperf" + {"ó", "ó"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 2015 "HTMLCharacterReference.gperf" + {"⊳", "⊳"}, + {""}, {""}, +#line 1577 "HTMLCharacterReference.gperf" + {"→", "→"}, + {""}, {""}, {""}, {""}, +#line 235 "HTMLCharacterReference.gperf" + {"ⅅ", "ⅅ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1895 "HTMLCharacterReference.gperf" + {"⊵", "⊵"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1599 "HTMLCharacterReference.gperf" + {"⊵", "⊵"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1056 "HTMLCharacterReference.gperf" + {"—", "—"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1423 "HTMLCharacterReference.gperf" + {"⋔", "⋔"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1591 "HTMLCharacterReference.gperf" + {"⇉", "⇉"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2056 "HTMLCharacterReference.gperf" + {"⦚", "⦚"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1831 "HTMLCharacterReference.gperf" + {"⤦", "⤦"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1502 "HTMLCharacterReference.gperf" + {"ŕ", "ŕ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 751 "HTMLCharacterReference.gperf" + {"ℐ", "ℐ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 2131 "HTMLCharacterReference.gperf" + {"​", "​"}, + {""}, {""}, {""}, {""}, {""}, +#line 1933 "HTMLCharacterReference.gperf" + {"⥾", "⥾"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 1099 "HTMLCharacterReference.gperf" + {"ℕ", "ℕ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1670 "HTMLCharacterReference.gperf" + {"⤥", "⤥"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 570 "HTMLCharacterReference.gperf" + {"▪", "▪"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1598 "HTMLCharacterReference.gperf" + {"⧐", "⧐"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 2037 "HTMLCharacterReference.gperf" + {"❘", "❘"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2014 "HTMLCharacterReference.gperf" + {"⊲", "⊲"}, + {""}, {""}, {""}, {""}, {""}, +#line 1098 "HTMLCharacterReference.gperf" + {"♮", "♮"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1332 "HTMLCharacterReference.gperf" + {"⊝", "⊝"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1605 "HTMLCharacterReference.gperf" + {"⥓", "⥓"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 885 "HTMLCharacterReference.gperf" + {"⟨", "⟨"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1582 "HTMLCharacterReference.gperf" + {"⟧", "⟧"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1612 "HTMLCharacterReference.gperf" + {"⎱", "⎱"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 938 "HTMLCharacterReference.gperf" + {"≦", "≦"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1050 "HTMLCharacterReference.gperf" + {"↤", "↤"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1563 "HTMLCharacterReference.gperf" + {"⥽", "⥽"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1431 "HTMLCharacterReference.gperf" + {"⨢", "⨢"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 685 "HTMLCharacterReference.gperf" + {"½", "½"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1437 "HTMLCharacterReference.gperf" + {"⨦", "⨦"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 944 "HTMLCharacterReference.gperf" + {"≲", "≲"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 772 "HTMLCharacterReference.gperf" + {"⁢", "⁢"}, + {""}, +#line 851 "HTMLCharacterReference.gperf" + {"↩", "↩"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 2008 "HTMLCharacterReference.gperf" + {"ς", "ς"}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 282 "HTMLCharacterReference.gperf" + {"⨐", "⨐"}, + {""}, +#line 389 "HTMLCharacterReference.gperf" + {"`", "`"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1642 "HTMLCharacterReference.gperf" + {"⧴", "⧴"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1713 "HTMLCharacterReference.gperf" + {"⨳", "⨳"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 1120 "HTMLCharacterReference.gperf" + {"​", "​"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1522 "HTMLCharacterReference.gperf" + {"⥴", "⥴"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1872 "HTMLCharacterReference.gperf" + {"≈", "≈"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1596 "HTMLCharacterReference.gperf" + {"⋌", "⋌"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1913 "HTMLCharacterReference.gperf" + {"↞", "↞"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1871 "HTMLCharacterReference.gperf" + {"≅", "≅"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 1967 "HTMLCharacterReference.gperf" + {"⥮", "⥮"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 2013 "HTMLCharacterReference.gperf" + {"ϑ", "ϑ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1472 "HTMLCharacterReference.gperf" + {"⌒", "⌒"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1580 "HTMLCharacterReference.gperf" + {"↣", "↣"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 2002 "HTMLCharacterReference.gperf" + {"ϕ", "ϕ"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 508 "HTMLCharacterReference.gperf" + {"▫", "▫"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 771 "HTMLCharacterReference.gperf" + {"⁣", "⁣"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 506 "HTMLCharacterReference.gperf" + {"◻", "◻"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1519 "HTMLCharacterReference.gperf" + {"↪", "↪"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 1903 "HTMLCharacterReference.gperf" + {"⏢", "⏢"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 1548 "HTMLCharacterReference.gperf" + {"⥩", "⥩"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 1643 "HTMLCharacterReference.gperf" + {"⥨", "⥨"} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + unsigned int key = hash (str, len); + + if (key <= MAX_HASH_VALUE) + { + const char *s = wordlist[key].name; + + if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0') + return &wordlist[key]; + } + } + return 0; +} +#line 2145 "HTMLCharacterReference.gperf" + From 110c03c17730cf22a6d6335dc991293e52370bdc Mon Sep 17 00:00:00 2001 From: VanDarkholme7 <980181529@qq.com> Date: Thu, 24 Aug 2023 02:26:26 +0000 Subject: [PATCH 0287/1687] empty commit to restart CI checks From 4cb4ff95bffb8448b18e6ff6a8d59d9c7f8689d6 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Thu, 24 Aug 2023 13:16:23 +0200 Subject: [PATCH 0288/1687] Restart CI From 798df8eae74f887f59c3ebdf91fb941851c704a3 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 24 Aug 2023 14:20:11 +0000 Subject: [PATCH 0289/1687] Fix nodes removal in hideRecursive() --- src/Common/Config/ConfigProcessor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 996d1051bad..c23cf977840 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -256,8 +256,9 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) void ConfigProcessor::hideRecursive(Poco::XML::Node * config_root) { - for (Node * node = config_root->firstChild(); node; node = node->nextSibling()) + for (Node * node = config_root->firstChild(); node;) { + Node * next_node = node->nextSibling(); if (node->nodeType() == Node::ELEMENT_NODE) { Element & element = dynamic_cast(*node); @@ -267,6 +268,7 @@ void ConfigProcessor::hideRecursive(Poco::XML::Node * config_root) } else hideRecursive(node); } + node = next_node; } } From b4e0ca3b0a72a7ae94eec2ac416fb6722b49074c Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 24 Aug 2023 14:23:46 +0000 Subject: [PATCH 0290/1687] Add test for nested nodes, add checks for presence of nodes in pre-processed config --- .../configs/config.xml | 8 ++++++++ .../configs/users.xml | 7 +++++++ .../test_config_hidden_attributes/test.py | 18 +++++++++++++++++- 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_config_hidden_attributes/configs/users.xml diff --git a/tests/integration/test_config_hidden_attributes/configs/config.xml b/tests/integration/test_config_hidden_attributes/configs/config.xml index 7d622ef7bbc..8f5c6f2ed39 100644 --- a/tests/integration/test_config_hidden_attributes/configs/config.xml +++ b/tests/integration/test_config_hidden_attributes/configs/config.xml @@ -1,3 +1,11 @@ + + diff --git a/tests/integration/test_config_hidden_attributes/configs/users.xml b/tests/integration/test_config_hidden_attributes/configs/users.xml new file mode 100644 index 00000000000..7f196179f80 --- /dev/null +++ b/tests/integration/test_config_hidden_attributes/configs/users.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_config_hidden_attributes/test.py b/tests/integration/test_config_hidden_attributes/test.py index f7109022abd..7ab2cb0e225 100644 --- a/tests/integration/test_config_hidden_attributes/test.py +++ b/tests/integration/test_config_hidden_attributes/test.py @@ -2,8 +2,11 @@ import pytest import os from helpers.cluster import ClickHouseCluster + cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", main_configs=["configs/config.xml"]) +node = cluster.add_instance( + "node", main_configs=["configs/config.xml"], user_configs=["configs/users.xml"] +) @pytest.fixture(scope="module") @@ -23,3 +26,16 @@ def test_hidden(started_cluster): ) == "60000000000\n" ) + assert ( + node.query( + "select value from system.server_settings where name ='max_partition_size_to_drop'" + ) + == "40000000000\n" + ) + assert "key_1" in node.query("select collection from system.named_collections") + out = node.exec_in_container( + ["cat", "/var/lib/clickhouse/preprocessed_configs/config.xml"] + ) + assert "max_table_size_to_drop" not in out + assert "max_partition_size_to_drop" in out + assert "named_collections" not in out From 11262d0156d1cb61ea5953002121eb28ab3faba4 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 24 Aug 2023 17:11:58 +0200 Subject: [PATCH 0291/1687] Revert "Revert "dateDiff: add support for plural units."" --- .../functions/date-time-functions.md | 20 +++++++++---------- src/Functions/dateDiff.cpp | 20 +++++++++---------- .../0_stateless/00538_datediff.reference | 10 ++++++++++ tests/queries/0_stateless/00538_datediff.sql | 11 ++++++++++ 4 files changed, 41 insertions(+), 20 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 3901ca9667a..56a17519b61 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -815,16 +815,16 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - - `microsecond` (possible abbreviations: `us`, `u`) - - `millisecond` (possible abbreviations: `ms`) - - `second` (possible abbreviations: `ss`, `s`) - - `minute` (possible abbreviations: `mi`, `n`) - - `hour` (possible abbreviations: `hh`, `h`) - - `day` (possible abbreviations: `dd`, `d`) - - `week` (possible abbreviations: `wk`, `ww`) - - `month` (possible abbreviations: `mm`, `m`) - - `quarter` (possible abbreviations: `qq`, `q`) - - `year` (possible abbreviations: `yyyy`, `yy`) + - `microsecond` (possible abbreviations: `microseconds`, `us`, `u`) + - `millisecond` (possible abbreviations: `milliseconds`, `ms`) + - `second` (possible abbreviations: `seconds`, `ss`, `s`) + - `minute` (possible abbreviations: `minutes`, `mi`, `n`) + - `hour` (possible abbreviations: `hours`, `hh`, `h`) + - `day` (possible abbreviations: `days`, `dd`, `d`) + - `week` (possible abbreviations: `weeks`, `wk`, `ww`) + - `month` (possible abbreviations: `months`, `mm`, `m`) + - `quarter` (possible abbreviations: `quarters`, `qq`, `q`) + - `year` (possible abbreviations: `years`, `yyyy`, `yy`) - `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 6bfbbb7c735..c9c9020f068 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -381,25 +381,25 @@ public: const auto & timezone_x = extractTimeZoneFromFunctionArguments(arguments, 3, 1); const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); - if (unit == "year" || unit == "yy" || unit == "yyyy") + if (unit == "year" || unit == "years" || unit == "yy" || unit == "yyyy") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "quarter" || unit == "qq" || unit == "q") + else if (unit == "quarter" || unit == "quarters" || unit == "qq" || unit == "q") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "month" || unit == "mm" || unit == "m") + else if (unit == "month" || unit == "months" || unit == "mm" || unit == "m") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "week" || unit == "wk" || unit == "ww") + else if (unit == "week" || unit == "weeks" || unit == "wk" || unit == "ww") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "day" || unit == "dd" || unit == "d") + else if (unit == "day" || unit == "days" || unit == "dd" || unit == "d") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "hour" || unit == "hh" || unit == "h") + else if (unit == "hour" || unit == "hours" || unit == "hh" || unit == "h") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "minute" || unit == "mi" || unit == "n") + else if (unit == "minute" || unit == "minutes" || unit == "mi" || unit == "n") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "second" || unit == "ss" || unit == "s") + else if (unit == "second" || unit == "seconds" || unit == "ss" || unit == "s") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "millisecond" || unit == "ms") + else if (unit == "millisecond" || unit == "milliseconds" || unit == "ms") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "microsecond" || unit == "us" || unit == "u") + else if (unit == "microsecond" || unit == "microseconds" || unit == "us" || unit == "u") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, diff --git a/tests/queries/0_stateless/00538_datediff.reference b/tests/queries/0_stateless/00538_datediff.reference index aac0767f49f..55853e53ec0 100644 --- a/tests/queries/0_stateless/00538_datediff.reference +++ b/tests/queries/0_stateless/00538_datediff.reference @@ -69,3 +69,13 @@ Additional test 1 1 1 +-1 +-7 +-23 +-104 +-730 +-17520 +-1051200 +-63072000 +-63072000000 +-63072000000000 diff --git a/tests/queries/0_stateless/00538_datediff.sql b/tests/queries/0_stateless/00538_datediff.sql index 5dc416ad0bd..f1469b75ffa 100644 --- a/tests/queries/0_stateless/00538_datediff.sql +++ b/tests/queries/0_stateless/00538_datediff.sql @@ -74,3 +74,14 @@ SELECT dateDiff('second', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime(' SELECT 'Additional test'; SELECT number = dateDiff('month', now() - INTERVAL number MONTH, now()) FROM system.numbers LIMIT 10; + +SELECT dateDiff('years', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT dateDiff('quarters', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT dateDiff('months', toDateTime('2017-12-31'), toDateTime('2016-01-01')); +SELECT dateDiff('weeks', toDateTime('2017-12-31'), toDateTime('2016-01-01')); +SELECT dateDiff('days', toDateTime('2017-12-31'), toDateTime('2016-01-01')); +SELECT dateDiff('hours', toDateTime('2017-12-31'), toDateTime('2016-01-01'), 'UTC'); +SELECT dateDiff('minutes', toDateTime('2017-12-31'), toDateTime('2016-01-01'), 'UTC'); +SELECT dateDiff('seconds', toDateTime('2017-12-31'), toDateTime('2016-01-01'), 'UTC'); +SELECT dateDiff('milliseconds', toDateTime('2017-12-31'), toDateTime('2016-01-01'), 'UTC'); +SELECT dateDiff('microseconds', toDateTime('2017-12-31'), toDateTime('2016-01-01'), 'UTC'); From 72947ad1434d35f8911e099bcde097685788d492 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 24 Aug 2023 17:16:35 +0200 Subject: [PATCH 0292/1687] fix ci --- tests/queries/0_stateless/00538_datediff.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/00538_datediff.sql b/tests/queries/0_stateless/00538_datediff.sql index f1469b75ffa..4c7c3aa14ff 100644 --- a/tests/queries/0_stateless/00538_datediff.sql +++ b/tests/queries/0_stateless/00538_datediff.sql @@ -80,8 +80,8 @@ SELECT dateDiff('quarters', toDate('2017-12-31'), toDate('2016-01-01')); SELECT dateDiff('months', toDateTime('2017-12-31'), toDateTime('2016-01-01')); SELECT dateDiff('weeks', toDateTime('2017-12-31'), toDateTime('2016-01-01')); SELECT dateDiff('days', toDateTime('2017-12-31'), toDateTime('2016-01-01')); -SELECT dateDiff('hours', toDateTime('2017-12-31'), toDateTime('2016-01-01'), 'UTC'); -SELECT dateDiff('minutes', toDateTime('2017-12-31'), toDateTime('2016-01-01'), 'UTC'); -SELECT dateDiff('seconds', toDateTime('2017-12-31'), toDateTime('2016-01-01'), 'UTC'); -SELECT dateDiff('milliseconds', toDateTime('2017-12-31'), toDateTime('2016-01-01'), 'UTC'); -SELECT dateDiff('microseconds', toDateTime('2017-12-31'), toDateTime('2016-01-01'), 'UTC'); +SELECT dateDiff('hours', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('minutes', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('seconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('milliseconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('microseconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); From bb1e3aaa293b7bdaedecca1de4d98e418af3edfb Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 24 Aug 2023 23:21:37 +0000 Subject: [PATCH 0293/1687] fix --- programs/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/main.cpp b/programs/main.cpp index 4af9e3a3067..07cea38a920 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -351,7 +351,7 @@ struct Checker ; -#if !defined(USE_MUSL) +#if !defined(FUZZING_MODE) && !defined(USE_MUSL) /// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. void checkHarmfulEnvironmentVariables(char ** argv) { From 415a993c914cf54445051ecd228268dfda6dfc0c Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 24 Aug 2023 23:22:39 +0000 Subject: [PATCH 0294/1687] fix fuzzers build --- CMakeLists.txt | 34 ++++++++++++++++++- programs/server/fuzzers/CMakeLists.txt | 3 -- src/AggregateFunctions/fuzzers/CMakeLists.txt | 2 +- src/Compression/fuzzers/CMakeLists.txt | 10 +++--- src/Core/fuzzers/CMakeLists.txt | 2 +- src/DataTypes/fuzzers/CMakeLists.txt | 2 +- src/Formats/fuzzers/CMakeLists.txt | 2 +- src/Interpreters/fuzzers/CMakeLists.txt | 3 +- src/Parsers/fuzzers/CMakeLists.txt | 6 ++-- .../fuzzers/codegen_fuzzer/CMakeLists.txt | 2 +- src/Storages/fuzzers/CMakeLists.txt | 4 +-- 11 files changed, 49 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 55bcf5fbf3c..7895421954a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,7 +118,11 @@ endif() # - sanitize.cmake add_library(global-libs INTERFACE) -include (cmake/fuzzer.cmake) +# We don't want to instrument everything with fuzzer, but only specific targets (see below), +# also, since we build our own llvm, we specifically don't wan't to instrument +# libFuzzer library itself - it would result in infinite recursion +#include (cmake/fuzzer.cmake) + include (cmake/sanitize.cmake) option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON) @@ -558,6 +562,34 @@ add_subdirectory (programs) add_subdirectory (tests) add_subdirectory (utils) +# Function get_all_targets collects all targets recursively +function(get_all_targets var) + macro(get_all_targets_recursive targets dir) + get_property(subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES) + foreach(subdir ${subdirectories}) + get_all_targets_recursive(${targets} ${subdir}) + endforeach() + get_property(current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS) + list(APPEND ${targets} ${current_targets}) + endmacro() + + set(targets) + get_all_targets_recursive(targets ${CMAKE_CURRENT_SOURCE_DIR}) + set(${var} ${targets} PARENT_SCOPE) +endfunction() + +if (FUZZER) + # Instrument all targets having pattern *_fuzzer with fuzzer and link with libfuzzer + get_all_targets(all_targets) + foreach(target ${all_targets}) + if (target MATCHES ".+_fuzzer") + target_link_libraries(${target} PRIVATE ch_contrib::fuzzer) + target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link") + message(STATUS "${target} instrumented with fuzzer") + endif() + endforeach() +endif() + include (cmake/sanitize_targets.cmake) # Build native targets if necessary diff --git a/programs/server/fuzzers/CMakeLists.txt b/programs/server/fuzzers/CMakeLists.txt index b8f57c37e31..7be44ecd884 100644 --- a/programs/server/fuzzers/CMakeLists.txt +++ b/programs/server/fuzzers/CMakeLists.txt @@ -7,7 +7,6 @@ set (TCP_PROTOCOL_FUZZER_LINK clickhouse_functions clickhouse_table_functions ch_contrib::fuzzer - "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}" ) if (TARGET ch_contrib::jemalloc) @@ -15,5 +14,3 @@ if (TARGET ch_contrib::jemalloc) endif() target_link_libraries(tcp_protocol_fuzzer ${TCP_PROTOCOL_FUZZER_LINK}) - -add_dependencies(tcp_protocol_fuzzer clickhouse_server_configs) diff --git a/src/AggregateFunctions/fuzzers/CMakeLists.txt b/src/AggregateFunctions/fuzzers/CMakeLists.txt index 3876ffac7ab..907a275b4b3 100644 --- a/src/AggregateFunctions/fuzzers/CMakeLists.txt +++ b/src/AggregateFunctions/fuzzers/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(aggregate_function_state_deserialization_fuzzer aggregate_function_state_deserialization_fuzzer.cpp ${SRCS}) -target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions ${LIB_FUZZING_ENGINE}) +target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions) diff --git a/src/Compression/fuzzers/CMakeLists.txt b/src/Compression/fuzzers/CMakeLists.txt index 6c0e36afdf7..33a0c2d78ce 100644 --- a/src/Compression/fuzzers/CMakeLists.txt +++ b/src/Compression/fuzzers/CMakeLists.txt @@ -5,16 +5,16 @@ # If you want really small size of the resulted binary, just link with fuzz_compression and clickhouse_common_io clickhouse_add_executable (compressed_buffer_fuzzer compressed_buffer_fuzzer.cpp) -target_link_libraries (compressed_buffer_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries (compressed_buffer_fuzzer PRIVATE dbms) clickhouse_add_executable (lz4_decompress_fuzzer lz4_decompress_fuzzer.cpp) -target_link_libraries (lz4_decompress_fuzzer PUBLIC dbms ch_contrib::lz4 ${LIB_FUZZING_ENGINE}) +target_link_libraries (lz4_decompress_fuzzer PUBLIC dbms ch_contrib::lz4) clickhouse_add_executable (delta_decompress_fuzzer delta_decompress_fuzzer.cpp) -target_link_libraries (delta_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries (delta_decompress_fuzzer PRIVATE dbms) clickhouse_add_executable (double_delta_decompress_fuzzer double_delta_decompress_fuzzer.cpp) -target_link_libraries (double_delta_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries (double_delta_decompress_fuzzer PRIVATE dbms) clickhouse_add_executable (encrypted_decompress_fuzzer encrypted_decompress_fuzzer.cpp) -target_link_libraries (encrypted_decompress_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries (encrypted_decompress_fuzzer PRIVATE dbms) diff --git a/src/Core/fuzzers/CMakeLists.txt b/src/Core/fuzzers/CMakeLists.txt index 269217392e7..51db6fa0b53 100644 --- a/src/Core/fuzzers/CMakeLists.txt +++ b/src/Core/fuzzers/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp) -target_link_libraries (names_and_types_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries (names_and_types_fuzzer PRIVATE dbms) diff --git a/src/DataTypes/fuzzers/CMakeLists.txt b/src/DataTypes/fuzzers/CMakeLists.txt index d9c19cb7d01..939bf5f5e3f 100644 --- a/src/DataTypes/fuzzers/CMakeLists.txt +++ b/src/DataTypes/fuzzers/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(data_type_deserialization_fuzzer data_type_deserialization_fuzzer.cpp ${SRCS}) -target_link_libraries(data_type_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions ${LIB_FUZZING_ENGINE}) +target_link_libraries(data_type_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions) diff --git a/src/Formats/fuzzers/CMakeLists.txt b/src/Formats/fuzzers/CMakeLists.txt index 984823f3360..38009aeec1d 100644 --- a/src/Formats/fuzzers/CMakeLists.txt +++ b/src/Formats/fuzzers/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(format_fuzzer format_fuzzer.cpp ${SRCS}) -target_link_libraries(format_fuzzer PRIVATE dbms clickhouse_aggregate_functions ${LIB_FUZZING_ENGINE}) +target_link_libraries(format_fuzzer PRIVATE dbms clickhouse_aggregate_functions) diff --git a/src/Interpreters/fuzzers/CMakeLists.txt b/src/Interpreters/fuzzers/CMakeLists.txt index 8e301470de2..4ac002d3d4a 100644 --- a/src/Interpreters/fuzzers/CMakeLists.txt +++ b/src/Interpreters/fuzzers/CMakeLists.txt @@ -5,5 +5,4 @@ target_link_libraries(execute_query_fuzzer PRIVATE clickhouse_table_functions clickhouse_aggregate_functions clickhouse_dictionaries - clickhouse_dictionaries_embedded - ${LIB_FUZZING_ENGINE}) + clickhouse_dictionaries_embedded) diff --git a/src/Parsers/fuzzers/CMakeLists.txt b/src/Parsers/fuzzers/CMakeLists.txt index c3aa21e2a04..310ed724f07 100644 --- a/src/Parsers/fuzzers/CMakeLists.txt +++ b/src/Parsers/fuzzers/CMakeLists.txt @@ -1,11 +1,11 @@ clickhouse_add_executable(lexer_fuzzer lexer_fuzzer.cpp ${SRCS}) -target_link_libraries(lexer_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE}) +target_link_libraries(lexer_fuzzer PRIVATE clickhouse_parsers) clickhouse_add_executable(select_parser_fuzzer select_parser_fuzzer.cpp ${SRCS}) -target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE}) +target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers) clickhouse_add_executable(create_parser_fuzzer create_parser_fuzzer.cpp ${SRCS}) -target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE}) +target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers) add_subdirectory(codegen_fuzzer) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt index 727c49cfc4d..7be3a2ef909 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -42,4 +42,4 @@ clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS}) set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier") target_include_directories(codegen_select_fuzzer SYSTEM BEFORE PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") -target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms) diff --git a/src/Storages/fuzzers/CMakeLists.txt b/src/Storages/fuzzers/CMakeLists.txt index 98f490c5984..719b9b77cd9 100644 --- a/src/Storages/fuzzers/CMakeLists.txt +++ b/src/Storages/fuzzers/CMakeLists.txt @@ -1,7 +1,7 @@ clickhouse_add_executable (mergetree_checksum_fuzzer mergetree_checksum_fuzzer.cpp) # Look at comment around fuzz_compression target declaration -target_link_libraries (mergetree_checksum_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries (mergetree_checksum_fuzzer PRIVATE dbms) clickhouse_add_executable (columns_description_fuzzer columns_description_fuzzer.cpp) -target_link_libraries (columns_description_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries (columns_description_fuzzer PRIVATE dbms) From 5888bef8c4e7bb1fb15401319541c778f9b1be34 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 25 Aug 2023 02:20:24 +0000 Subject: [PATCH 0295/1687] fix indentation --- .github/workflows/pull_request.yml | 102 ++++++++++++++--------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 985bb7eb2a2..32a1cf8d4a0 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -243,9 +243,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -######################################################################################### -#################################### ORDINARY BUILDS #################################### -######################################################################################### + ######################################################################################### + #################################### ORDINARY BUILDS #################################### + ######################################################################################### BuilderDebRelease: needs: [DockerHubPush, FastTest, StyleCheck] runs-on: [self-hosted, builder] @@ -583,9 +583,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" -########################################################################################## -##################################### SPECIAL BUILDS ##################################### -########################################################################################## + ########################################################################################## + ##################################### SPECIAL BUILDS ##################################### + ########################################################################################## BuilderBinClangTidy: needs: [DockerHubPush, FastTest, StyleCheck] runs-on: [self-hosted, builder] @@ -996,9 +996,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" -############################################################################################ -##################################### Docker images ####################################### -############################################################################################ + ############################################################################################ + ##################################### Docker images ####################################### + ############################################################################################ DockerServerImages: needs: - BuilderDebRelease @@ -1023,9 +1023,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################ -##################################### BUILD REPORTER ####################################### -############################################################################################ + ############################################################################################ + ##################################### BUILD REPORTER ####################################### + ############################################################################################ BuilderReport: needs: - BuilderBinRelease @@ -1116,9 +1116,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################ -#################################### INSTALL PACKAGES ###################################### -############################################################################################ + ############################################################################################ + #################################### INSTALL PACKAGES ###################################### + ############################################################################################ InstallPackagesTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] @@ -1185,9 +1185,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################## -########################### FUNCTIONAl STATELESS TESTS ####################################### -############################################################################################## + ############################################################################################## + ########################### FUNCTIONAl STATELESS TESTS ####################################### + ############################################################################################## FunctionalStatelessTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -2807,9 +2807,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################## -############################ FUNCTIONAl STATEFUL TESTS ####################################### -############################################################################################## + ############################################################################################## + ############################ FUNCTIONAl STATEFUL TESTS ####################################### + ############################################################################################## FunctionalStatefulTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -3259,9 +3259,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################## -######################################### STRESS TESTS ####################################### -############################################################################################## + ############################################################################################## + ######################################### STRESS TESTS ####################################### + ############################################################################################## StressTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -3567,9 +3567,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################## -##################################### AST FUZZERS ############################################ -############################################################################################## + ############################################################################################## + ##################################### AST FUZZERS ############################################ + ############################################################################################## ASTFuzzerTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -3735,9 +3735,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################# -############################# INTEGRATION TESTS ############################################# -############################################################################################# + ############################################################################################# + ############################# INTEGRATION TESTS ############################################# + ############################################################################################# IntegrationTestsAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -4541,9 +4541,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################# -#################################### UNIT TESTS ############################################# -############################################################################################# + ############################################################################################# + #################################### UNIT TESTS ############################################# + ############################################################################################# UnitTestsAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -4709,9 +4709,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################# -#################################### PERFORMANCE TESTS ###################################### -############################################################################################# + ############################################################################################# + #################################### PERFORMANCE TESTS ###################################### + ############################################################################################# PerformanceComparisonX86-0: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] @@ -4992,9 +4992,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################## -###################################### SQLANCER FUZZERS ###################################### -############################################################################################## + ############################################################################################## + ###################################### SQLANCER FUZZERS ###################################### + ############################################################################################## SQLancerTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, fuzzer-unit-tester] @@ -5061,9 +5061,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################# -#################################### OSS FUZZ ############################################### -############################################################################################# + ############################################################################################# + #################################### OSS FUZZ ############################################### + ############################################################################################# Fuzzing: if: contains(github.event.pull_request.labels.*.name, 'build-fuzzers') runs-on: [self-hosted, fuzzer-unit-tester] @@ -5092,9 +5092,9 @@ jobs: with: name: ${{ matrix.sanitizer }}-artifacts path: ./out/artifacts -############################################################################################# -###################################### JEPSEN TESTS ######################################### -############################################################################################# + ############################################################################################# + ###################################### JEPSEN TESTS ######################################### + ############################################################################################# Jepsen: # This is special test NOT INCLUDED in FinishCheck # When it's skipped, all dependent tasks will be skipped too. @@ -5220,9 +5220,9 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py python3 merge_pr.py --check-approved -############################################################################################## -########################### SQLLOGIC TEST ################################################### -############################################################################################## + ############################################################################################## + ########################### SQLLOGIC TEST ################################################### + ############################################################################################## SQLLogicTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -5258,9 +5258,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" -############################################################################################## -##################################### SQL TEST ############################################### -############################################################################################## + ############################################################################################## + ##################################### SQL TEST ############################################### + ############################################################################################## SQLTest: needs: [BuilderDebRelease] runs-on: [self-hosted, fuzzer-unit-tester] From 680cbb6b7951e1396c157b2e20678ff41b529617 Mon Sep 17 00:00:00 2001 From: seshWCS Date: Fri, 25 Aug 2023 05:05:49 +0000 Subject: [PATCH 0296/1687] Fixed bugs with memory --- src/Compression/CompressionCodecGCD.cpp | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index e13826dc8b4..bf0988b6a33 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -52,7 +52,10 @@ namespace ErrorCodes UInt32 CompressionCodecGCD::getMaxCompressedDataSize(UInt32 uncompressed_size) const { - return uncompressed_size + 2; + return uncompressed_size + + gcd_bytes_size // To store gcd + + gcd_bytes_size // Max bytes_to_skip + + 2; // Local header } CompressionCodecGCD::CompressionCodecGCD(UInt8 gcd_bytes_size_) @@ -75,7 +78,7 @@ namespace { template -void compressDataForType(const char * source, UInt32 source_size, char * dest) +UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) { if (source_size % sizeof(T) != 0) throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot GCD compress, data size {} is not aligned to {}", source_size, sizeof(T)); @@ -97,6 +100,7 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest) } unalignedStore(dest, gcd_divider); + auto * dest_start = dest; dest += sizeof(T); if (typeid(T) == typeid(UInt32) || typeid(T) == typeid(UInt64)) @@ -122,6 +126,7 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest) dest += sizeof(T); } } + return static_cast(dest - dest_start); } template @@ -158,28 +163,29 @@ UInt32 CompressionCodecGCD::doCompressData(const char * source, UInt32 source_si dest[1] = bytes_to_skip; /// unused (backward compatibility) memcpy(&dest[2], source, bytes_to_skip); size_t start_pos = 2 + bytes_to_skip; + UInt32 result_size = 0; switch (gcd_bytes_size) { case 1: - compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + result_size = compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); break; case 2: - compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + result_size = compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); break; case 4: - compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + result_size = compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); break; case 8: - compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + result_size = compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); break; case 16: - compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + result_size = compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); break; case 32: - compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); + result_size = compressDataForType(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]); break; } - return 2 + source_size; + return 2 + bytes_to_skip + result_size; } void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const From ed3ad4c943a773653f2e43d4b9be030557932c75 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 25 Aug 2023 05:37:10 +0000 Subject: [PATCH 0297/1687] Add documentation --- docs/en/operations/configuration-files.md | 17 +++++++++++++++++ docs/ru/operations/configuration-files.md | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index a19c55673ed..b091910ecaa 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -102,6 +102,23 @@ Example: 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 ``` +## Hiding Configuration {#hidden} + +You can use `hidden` attribute to hide certain elements from the preprocessed file. + +Example: + +```xml + + + + + +``` + ## User Settings {#user-settings} The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`. diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 085761d80c7..18c9b7b1bac 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -122,6 +122,23 @@ $ cat /etc/clickhouse-server/users.d/alice.xml 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 ``` +## Скрытие {#hidden} + +Вы можете использовать `hidden` аттрибут для скрытие некоторых элементов в файле предобработки. + +Пример: + +```xml + + + + + +``` + ## Примеры записи конфигурации на YAML {#example} Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example). From e4ce24ea0f1d2b5321bb16bbe47913f5deab0219 Mon Sep 17 00:00:00 2001 From: seshWCS Date: Fri, 25 Aug 2023 05:47:35 +0000 Subject: [PATCH 0298/1687] Edited ENGINE --- tests/performance/codec_gcd.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/performance/codec_gcd.xml b/tests/performance/codec_gcd.xml index 0cca46d802e..92363c480b0 100644 --- a/tests/performance/codec_gcd.xml +++ b/tests/performance/codec_gcd.xml @@ -1,6 +1,6 @@ - CREATE TABLE gcd_codec (n DateTime64 CODEC(GCD, LZ4)) ENGINE = MergeTree ORDER BY tuple() - CREATE TABLE delta_codec (n DateTime64 CODEC(Delta, LZ4)) ENGINE = MergeTree ORDER BY tuple() + CREATE TABLE gcd_codec (n DateTime64 CODEC(GCD, LZ4)) ENGINE = Memory + CREATE TABLE delta_codec (n DateTime64 CODEC(Delta, LZ4)) ENGINE = Memory INSERT INTO gcd_codec SELECT * FROM generateRandom() LIMIT 1337228 SETTINGS max_threads=1 From 7a85c9af5f3d85d53267fb8c66b9b9eb751b77c7 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 25 Aug 2023 05:56:37 +0000 Subject: [PATCH 0299/1687] Use parseBool() for hidden attribute value parsing --- src/Common/Config/ConfigProcessor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index c23cf977840..b4c48a08cad 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -262,7 +263,7 @@ void ConfigProcessor::hideRecursive(Poco::XML::Node * config_root) if (node->nodeType() == Node::ELEMENT_NODE) { Element & element = dynamic_cast(*node); - if (element.hasAttribute("hidden") && element.getAttribute("hidden") == "true") + if (element.hasAttribute("hidden") && Poco::NumberParser::parseBool(element.getAttribute("hidden"))) { config_root->removeChild(node); } else From a9ba0f31399d53d42057d06bbd4fd36050202941 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 25 Aug 2023 14:12:17 +0000 Subject: [PATCH 0300/1687] style --- .github/workflows/pull_request.yml | 102 ++++++++++++++--------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 32a1cf8d4a0..985bb7eb2a2 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -243,9 +243,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ######################################################################################### - #################################### ORDINARY BUILDS #################################### - ######################################################################################### +######################################################################################### +#################################### ORDINARY BUILDS #################################### +######################################################################################### BuilderDebRelease: needs: [DockerHubPush, FastTest, StyleCheck] runs-on: [self-hosted, builder] @@ -583,9 +583,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - ########################################################################################## - ##################################### SPECIAL BUILDS ##################################### - ########################################################################################## +########################################################################################## +##################################### SPECIAL BUILDS ##################################### +########################################################################################## BuilderBinClangTidy: needs: [DockerHubPush, FastTest, StyleCheck] runs-on: [self-hosted, builder] @@ -996,9 +996,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - ############################################################################################ - ##################################### Docker images ####################################### - ############################################################################################ +############################################################################################ +##################################### Docker images ####################################### +############################################################################################ DockerServerImages: needs: - BuilderDebRelease @@ -1023,9 +1023,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################ - ##################################### BUILD REPORTER ####################################### - ############################################################################################ +############################################################################################ +##################################### BUILD REPORTER ####################################### +############################################################################################ BuilderReport: needs: - BuilderBinRelease @@ -1116,9 +1116,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################ - #################################### INSTALL PACKAGES ###################################### - ############################################################################################ +############################################################################################ +#################################### INSTALL PACKAGES ###################################### +############################################################################################ InstallPackagesTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] @@ -1185,9 +1185,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################## - ########################### FUNCTIONAl STATELESS TESTS ####################################### - ############################################################################################## +############################################################################################## +########################### FUNCTIONAl STATELESS TESTS ####################################### +############################################################################################## FunctionalStatelessTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -2807,9 +2807,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################## - ############################ FUNCTIONAl STATEFUL TESTS ####################################### - ############################################################################################## +############################################################################################## +############################ FUNCTIONAl STATEFUL TESTS ####################################### +############################################################################################## FunctionalStatefulTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -3259,9 +3259,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################## - ######################################### STRESS TESTS ####################################### - ############################################################################################## +############################################################################################## +######################################### STRESS TESTS ####################################### +############################################################################################## StressTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -3567,9 +3567,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################## - ##################################### AST FUZZERS ############################################ - ############################################################################################## +############################################################################################## +##################################### AST FUZZERS ############################################ +############################################################################################## ASTFuzzerTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -3735,9 +3735,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################# - ############################# INTEGRATION TESTS ############################################# - ############################################################################################# +############################################################################################# +############################# INTEGRATION TESTS ############################################# +############################################################################################# IntegrationTestsAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] @@ -4541,9 +4541,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################# - #################################### UNIT TESTS ############################################# - ############################################################################################# +############################################################################################# +#################################### UNIT TESTS ############################################# +############################################################################################# UnitTestsAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -4709,9 +4709,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################# - #################################### PERFORMANCE TESTS ###################################### - ############################################################################################# +############################################################################################# +#################################### PERFORMANCE TESTS ###################################### +############################################################################################# PerformanceComparisonX86-0: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] @@ -4992,9 +4992,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################## - ###################################### SQLANCER FUZZERS ###################################### - ############################################################################################## +############################################################################################## +###################################### SQLANCER FUZZERS ###################################### +############################################################################################## SQLancerTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, fuzzer-unit-tester] @@ -5061,9 +5061,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################# - #################################### OSS FUZZ ############################################### - ############################################################################################# +############################################################################################# +#################################### OSS FUZZ ############################################### +############################################################################################# Fuzzing: if: contains(github.event.pull_request.labels.*.name, 'build-fuzzers') runs-on: [self-hosted, fuzzer-unit-tester] @@ -5092,9 +5092,9 @@ jobs: with: name: ${{ matrix.sanitizer }}-artifacts path: ./out/artifacts - ############################################################################################# - ###################################### JEPSEN TESTS ######################################### - ############################################################################################# +############################################################################################# +###################################### JEPSEN TESTS ######################################### +############################################################################################# Jepsen: # This is special test NOT INCLUDED in FinishCheck # When it's skipped, all dependent tasks will be skipped too. @@ -5220,9 +5220,9 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py python3 merge_pr.py --check-approved - ############################################################################################## - ########################### SQLLOGIC TEST ################################################### - ############################################################################################## +############################################################################################## +########################### SQLLOGIC TEST ################################################### +############################################################################################## SQLLogicTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -5258,9 +5258,9 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - ############################################################################################## - ##################################### SQL TEST ############################################### - ############################################################################################## +############################################################################################## +##################################### SQL TEST ############################################### +############################################################################################## SQLTest: needs: [BuilderDebRelease] runs-on: [self-hosted, fuzzer-unit-tester] From 49d33774f9ee1e066d323c799045b9c266c6aee9 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 25 Aug 2023 17:13:03 +0200 Subject: [PATCH 0301/1687] Fix --- contrib/libpqxx | 2 +- src/Core/PostgreSQL/Connection.h | 2 ++ src/Core/PostgreSQL/ConnectionHolder.h | 18 ++++++++++++++++++ src/Processors/Sources/PostgreSQLSource.cpp | 15 +++++++++++++-- src/Processors/Sources/PostgreSQLSource.h | 3 +++ 5 files changed, 37 insertions(+), 3 deletions(-) diff --git a/contrib/libpqxx b/contrib/libpqxx index bdd6540fb95..791d68fd899 160000 --- a/contrib/libpqxx +++ b/contrib/libpqxx @@ -1 +1 @@ -Subproject commit bdd6540fb95ff56c813691ceb5da5a3266cf235d +Subproject commit 791d68fd89902835133c50435e380ec7a73271b7 diff --git a/src/Core/PostgreSQL/Connection.h b/src/Core/PostgreSQL/Connection.h index 96cc19babea..efc10b6ed20 100644 --- a/src/Core/PostgreSQL/Connection.h +++ b/src/Core/PostgreSQL/Connection.h @@ -47,6 +47,8 @@ public: void tryUpdateConnection(); + bool isConnected() const { return connection != nullptr && connection->is_open(); } + const ConnectionInfo & getConnectionInfo() { return connection_info; } String getInfoForLog() const { return connection_info.host_port; } diff --git a/src/Core/PostgreSQL/ConnectionHolder.h b/src/Core/PostgreSQL/ConnectionHolder.h index 16803c823ba..43998c494c9 100644 --- a/src/Core/PostgreSQL/ConnectionHolder.h +++ b/src/Core/PostgreSQL/ConnectionHolder.h @@ -28,10 +28,27 @@ public: ConnectionHolder(const ConnectionHolder & other) = delete; + void setBroken() { is_broken = true; } + ~ConnectionHolder() { if (auto_close) + { connection.reset(); + } + else if (is_broken) + { + try + { + if (connection->isConnected()) + connection->getRef().reset(); + } + catch (...) + { + DB::tryLogCurrentException(__PRETTY_FUNCTION__); + connection.reset(); + } + } pool->returnObject(std::move(connection)); } @@ -49,6 +66,7 @@ private: PoolPtr pool; ConnectionPtr connection; bool auto_close; + bool is_broken = false; }; using ConnectionHolderPtr = std::unique_ptr; diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index 115e24d5740..f57d0fe9cc1 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -59,7 +59,6 @@ PostgreSQLSource::PostgreSQLSource( init(sample_block); } - template void PostgreSQLSource::init(const Block & sample_block) { @@ -82,7 +81,8 @@ void PostgreSQLSource::onStart() { try { - tx = std::make_shared(connection_holder->get()); + auto & conn = connection_holder->get(); + tx = std::make_shared(conn); } catch (const pqxx::broken_connection &) { @@ -180,6 +180,17 @@ void PostgreSQLSource::onFinish() if (tx && auto_commit) tx->commit(); + + is_completed = true; +} + +template +PostgreSQLSource::~PostgreSQLSource() +{ + if (!is_completed) + { + connection_holder->setBroken(); + } } template diff --git a/src/Processors/Sources/PostgreSQLSource.h b/src/Processors/Sources/PostgreSQLSource.h index 312e9f5fb18..8a648ae8bb5 100644 --- a/src/Processors/Sources/PostgreSQLSource.h +++ b/src/Processors/Sources/PostgreSQLSource.h @@ -28,6 +28,8 @@ public: String getName() const override { return "PostgreSQL"; } + ~PostgreSQLSource() override; + protected: PostgreSQLSource( std::shared_ptr tx_, @@ -54,6 +56,7 @@ private: ExternalResultDescription description; bool started = false; + bool is_completed = false; postgres::ConnectionHolderPtr connection_holder; From 9ab545e28c55ecb0dc10d0f31bec51ffe7d2a732 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 25 Aug 2023 19:06:26 +0200 Subject: [PATCH 0302/1687] do not wait for flush on shutdown --- .../DistributedAsyncInsertDirectoryQueue.cpp | 11 +++++++++- .../DistributedAsyncInsertDirectoryQueue.h | 2 ++ src/Storages/StorageDistributed.cpp | 20 ++++++++++++------- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index 51839ad973a..a0e7ab3759c 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -186,6 +186,15 @@ void DistributedAsyncInsertDirectoryQueue::shutdownAndDropAllData() fs::remove_all(path); } +void DistributedAsyncInsertDirectoryQueue::shutdown() +{ + /// It's incompatible with should_batch_inserts + /// because processFilesWithBatching may push to the queue after shutdown + chassert(!should_batch_inserts); + pending_files.finish(); + task_handle->deactivate(); +} + void DistributedAsyncInsertDirectoryQueue::run() { @@ -401,7 +410,7 @@ try if (!current_file.empty()) processFile(current_file); - while (pending_files.tryPop(current_file)) + while (!pending_files.isFinished() && pending_files.tryPop(current_file)) processFile(current_file); } diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h index 45c355bb64e..db9abfb5fe0 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h @@ -66,6 +66,8 @@ public: void shutdownAndDropAllData(); + void shutdown(); + static std::shared_ptr createSourceFromFile(const String & file_name); /// For scheduling via DistributedSink. diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 6f0072c4560..6df4aedc146 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -331,6 +331,9 @@ StorageDistributed::StorageDistributed( , distributed_settings(distributed_settings_) , rng(randomSeed()) { + if (!distributed_settings.flush_on_detach && distributed_settings.monitor_batch_inserts) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Settings flush_on_detach=0 and monitor_batch_inserts=1 are incompatible"); + StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { @@ -1438,12 +1441,6 @@ ActionLock StorageDistributed::getActionLock(StorageActionBlockType type) void StorageDistributed::flushAndPrepareForShutdown() { - if (!getDistributedSettingsRef().flush_on_detach) - { - LOG_INFO(log, "Skip flushing data (due to flush_on_detach=0)"); - return; - } - try { flushClusterNodesAllData(getContext()); @@ -1469,9 +1466,18 @@ void StorageDistributed::flushClusterNodesAllData(ContextPtr local_context) directory_monitors.push_back(node.second.directory_monitor); } + bool need_flush = getDistributedSettingsRef().flush_on_detach; + if (!need_flush) + LOG_INFO(log, "Skip flushing data (due to flush_on_detach=0)"); + /// TODO: Maybe it should be executed in parallel for (auto & node : directory_monitors) - node->flushAllData(); + { + if (need_flush) + node->flushAllData(); + else + node->shutdown(); + } } void StorageDistributed::rename(const String & new_path_to_table_data, const StorageID & new_table_id) From 4af1bf60edc3e9dcc9ad3c551e6a72665a436f6a Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 25 Aug 2023 19:24:31 +0200 Subject: [PATCH 0303/1687] Fix --- src/Core/PostgreSQL/ConnectionHolder.h | 4 +--- src/Processors/Sources/PostgreSQLSource.cpp | 10 ++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/Core/PostgreSQL/ConnectionHolder.h b/src/Core/PostgreSQL/ConnectionHolder.h index 43998c494c9..ad311f9cc2f 100644 --- a/src/Core/PostgreSQL/ConnectionHolder.h +++ b/src/Core/PostgreSQL/ConnectionHolder.h @@ -40,12 +40,10 @@ public: { try { - if (connection->isConnected()) - connection->getRef().reset(); + connection->getRef().reset(); } catch (...) { - DB::tryLogCurrentException(__PRETTY_FUNCTION__); connection.reset(); } } diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index f57d0fe9cc1..a4e81a081a4 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -189,6 +189,16 @@ PostgreSQLSource::~PostgreSQLSource() { if (!is_completed) { + try + { + stream.reset(); + tx.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + connection_holder->setBroken(); } } From 734ffd916c6786ffd27479cace1298cab8e4cda7 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Fri, 25 Aug 2023 20:31:21 +0200 Subject: [PATCH 0304/1687] WIP prepared statements --- src/Core/MySQL/PacketsGeneric.cpp | 475 +++++++++--------- src/Core/MySQL/PacketsProtocolBinary.cpp | 230 +++++++++ src/Core/MySQL/PacketsProtocolBinary.h | 45 ++ src/Core/MySQL/PacketsProtocolText.cpp | 388 +++++++------- src/DataTypes/DataTypesDecimal.cpp | 26 +- src/Formats/FormatSettings.h | 16 +- .../InterpreterShowColumnsQuery.cpp | 2 +- .../Formats/Impl/MySQLOutputFormat.cpp | 95 ++-- .../Formats/Impl/MySQLOutputFormat.h | 3 +- src/Server/MySQLHandler.cpp | 142 ++++-- src/Server/MySQLHandler.h | 32 +- .../InformationSchema/key_column_usage.sql | 38 ++ .../referential_constraints.sql | 25 + .../System/InformationSchema/schemata.sql | 55 +- .../System/InformationSchema/tables.sql | 50 +- .../System/attachInformationSchemaTables.cpp | 5 +- 16 files changed, 1051 insertions(+), 576 deletions(-) create mode 100644 src/Core/MySQL/PacketsProtocolBinary.cpp create mode 100644 src/Core/MySQL/PacketsProtocolBinary.h create mode 100644 src/Storages/System/InformationSchema/key_column_usage.sql create mode 100644 src/Storages/System/InformationSchema/referential_constraints.sql diff --git a/src/Core/MySQL/PacketsGeneric.cpp b/src/Core/MySQL/PacketsGeneric.cpp index af80797d5c1..88183890399 100644 --- a/src/Core/MySQL/PacketsGeneric.cpp +++ b/src/Core/MySQL/PacketsGeneric.cpp @@ -8,254 +8,263 @@ namespace DB namespace MySQLProtocol { -namespace Generic -{ - -static const size_t MYSQL_ERRMSG_SIZE = 512; - -void SSLRequest::readPayloadImpl(ReadBuffer & buf) -{ - buf.readStrict(reinterpret_cast(&capability_flags), 4); - buf.readStrict(reinterpret_cast(&max_packet_size), 4); - buf.readStrict(reinterpret_cast(&character_set), 1); -} - -OKPacket::OKPacket(uint32_t capabilities_) - : header(0x00), capabilities(capabilities_), affected_rows(0x00), last_insert_id(0x00), status_flags(0x00) -{ -} - -OKPacket::OKPacket( - uint8_t header_, uint32_t capabilities_, uint64_t affected_rows_, uint32_t status_flags_, int16_t warnings_, - String session_state_changes_, String info_) - : header(header_), capabilities(capabilities_), affected_rows(affected_rows_), last_insert_id(0), warnings(warnings_), - status_flags(status_flags_), session_state_changes(std::move(session_state_changes_)), info(std::move(info_)) -{ -} - -size_t OKPacket::getPayloadSize() const -{ - size_t result = 2 + getLengthEncodedNumberSize(affected_rows); - - if (capabilities & CLIENT_PROTOCOL_41) + namespace Generic { - result += 4; - } - else if (capabilities & CLIENT_TRANSACTIONS) - { - result += 2; - } - if (capabilities & CLIENT_SESSION_TRACK) - { - result += getLengthEncodedStringSize(info); - if (status_flags & SERVER_SESSION_STATE_CHANGED) - result += getLengthEncodedStringSize(session_state_changes); - } - else - { - result += info.size(); - } + static const size_t MYSQL_ERRMSG_SIZE = 512; - return result; -} - -void OKPacket::readPayloadImpl(ReadBuffer & payload) - -{ - payload.readStrict(reinterpret_cast(&header), 1); - affected_rows = readLengthEncodedNumber(payload); - last_insert_id = readLengthEncodedNumber(payload); - - if (capabilities & CLIENT_PROTOCOL_41) - { - payload.readStrict(reinterpret_cast(&status_flags), 2); - payload.readStrict(reinterpret_cast(&warnings), 2); - } - else if (capabilities & CLIENT_TRANSACTIONS) - { - payload.readStrict(reinterpret_cast(&status_flags), 2); - } - - if (capabilities & CLIENT_SESSION_TRACK) - { - readLengthEncodedString(info, payload); - if (status_flags & SERVER_SESSION_STATE_CHANGED) + void SSLRequest::readPayloadImpl(ReadBuffer & buf) { - readLengthEncodedString(session_state_changes, payload); + buf.readStrict(reinterpret_cast(&capability_flags), 4); + buf.readStrict(reinterpret_cast(&max_packet_size), 4); + buf.readStrict(reinterpret_cast(&character_set), 1); } - } - else - { - readString(info, payload); - } -} -void OKPacket::writePayloadImpl(WriteBuffer & buffer) const + OKPacket::OKPacket(uint32_t capabilities_) + : header(0x00), capabilities(capabilities_), affected_rows(0x00), last_insert_id(0x00), status_flags(0x00) + { + } -{ - buffer.write(header); - writeLengthEncodedNumber(affected_rows, buffer); - writeLengthEncodedNumber(last_insert_id, buffer); /// last insert-id + OKPacket::OKPacket( + uint8_t header_, + uint32_t capabilities_, + uint64_t affected_rows_, + uint32_t status_flags_, + int16_t warnings_, + String session_state_changes_, + String info_) + : header(header_) + , capabilities(capabilities_) + , affected_rows(affected_rows_) + , last_insert_id(0) + , warnings(warnings_) + , status_flags(status_flags_) + , session_state_changes(std::move(session_state_changes_)) + , info(std::move(info_)) + { + } - if (capabilities & CLIENT_PROTOCOL_41) - { - buffer.write(reinterpret_cast(&status_flags), 2); - buffer.write(reinterpret_cast(&warnings), 2); - } - else if (capabilities & CLIENT_TRANSACTIONS) - { - buffer.write(reinterpret_cast(&status_flags), 2); - } + size_t OKPacket::getPayloadSize() const + { + size_t result = 2 + getLengthEncodedNumberSize(affected_rows); - if (capabilities & CLIENT_SESSION_TRACK) - { - writeLengthEncodedString(info, buffer); - if (status_flags & SERVER_SESSION_STATE_CHANGED) - writeLengthEncodedString(session_state_changes, buffer); - } - else - { - writeString(info, buffer); - } -} - -EOFPacket::EOFPacket() : warnings(0x00), status_flags(0x00) -{ -} - -EOFPacket::EOFPacket(int warnings_, int status_flags_) - : warnings(warnings_), status_flags(status_flags_) -{ -} - -size_t EOFPacket::getPayloadSize() const -{ - return 5; -} - -void EOFPacket::readPayloadImpl(ReadBuffer & payload) -{ - payload.readStrict(reinterpret_cast(&header), 1); - assert(header == 0xfe); - payload.readStrict(reinterpret_cast(&warnings), 2); - payload.readStrict(reinterpret_cast(&status_flags), 2); -} - -void EOFPacket::writePayloadImpl(WriteBuffer & buffer) const -{ - buffer.write(header); // EOF header - buffer.write(reinterpret_cast(&warnings), 2); - buffer.write(reinterpret_cast(&status_flags), 2); -} - -void AuthSwitchPacket::readPayloadImpl(ReadBuffer & payload) -{ - payload.readStrict(reinterpret_cast(&header), 1); - assert(header == 0xfe); - readStringUntilEOF(plugin_name, payload); -} - -ERRPacket::ERRPacket() : error_code(0x00) -{ -} - -ERRPacket::ERRPacket(int error_code_, String sql_state_, String error_message_) - : error_code(error_code_), sql_state(std::move(sql_state_)), error_message(std::move(error_message_)) -{ -} - -size_t ERRPacket::getPayloadSize() const -{ - return 4 + sql_state.length() + std::min(error_message.length(), MYSQL_ERRMSG_SIZE); -} - -void ERRPacket::readPayloadImpl(ReadBuffer & payload) -{ - payload.readStrict(reinterpret_cast(&header), 1); - assert(header == 0xff); - - payload.readStrict(reinterpret_cast(&error_code), 2); - - /// SQL State [optional: # + 5bytes string] - UInt8 sharp = static_cast(*payload.position()); - if (sharp == 0x23) - { - payload.ignore(1); - sql_state.resize(5); - payload.readStrict(reinterpret_cast(sql_state.data()), 5); - } - readString(error_message, payload); -} - -void ERRPacket::writePayloadImpl(WriteBuffer & buffer) const -{ - buffer.write(header); - buffer.write(reinterpret_cast(&error_code), 2); - buffer.write('#'); - buffer.write(sql_state.data(), sql_state.length()); - buffer.write(error_message.data(), std::min(error_message.length(), MYSQL_ERRMSG_SIZE)); -} - -ResponsePacket::ResponsePacket(UInt32 server_capability_flags_) - : ok(OKPacket(server_capability_flags_)) -{ -} - -ResponsePacket::ResponsePacket(UInt32 server_capability_flags_, bool is_handshake_) - : ok(OKPacket(server_capability_flags_)), is_handshake(is_handshake_) -{ -} - -void ResponsePacket::readPayloadImpl(ReadBuffer & payload) -{ - UInt16 header = static_cast(*payload.position()); - switch (header) - { - case PACKET_OK: - packetType = PACKET_OK; - ok.readPayloadWithUnpacked(payload); - break; - case PACKET_ERR: - packetType = PACKET_ERR; - err.readPayloadWithUnpacked(payload); - break; - case PACKET_EOF: - if (is_handshake) + if (capabilities & CLIENT_PROTOCOL_41) { - packetType = PACKET_AUTH_SWITCH; - auth_switch.readPayloadWithUnpacked(payload); + result += 4; + } + else if (capabilities & CLIENT_TRANSACTIONS) + { + result += 2; + } + + if (capabilities & CLIENT_SESSION_TRACK) + { + result += getLengthEncodedStringSize(info); + if (status_flags & SERVER_SESSION_STATE_CHANGED) + result += getLengthEncodedStringSize(session_state_changes); } else { - packetType = PACKET_EOF; - eof.readPayloadWithUnpacked(payload); + result += info.size(); } - break; - case PACKET_LOCALINFILE: - packetType = PACKET_LOCALINFILE; - break; - default: - packetType = PACKET_OK; - column_length = readLengthEncodedNumber(payload); + + return result; + } + + void OKPacket::readPayloadImpl(ReadBuffer & payload) + + { + payload.readStrict(reinterpret_cast(&header), 1); + affected_rows = readLengthEncodedNumber(payload); + last_insert_id = readLengthEncodedNumber(payload); + + if (capabilities & CLIENT_PROTOCOL_41) + { + payload.readStrict(reinterpret_cast(&status_flags), 2); + payload.readStrict(reinterpret_cast(&warnings), 2); + } + else if (capabilities & CLIENT_TRANSACTIONS) + { + payload.readStrict(reinterpret_cast(&status_flags), 2); + } + + if (capabilities & CLIENT_SESSION_TRACK) + { + readLengthEncodedString(info, payload); + if (status_flags & SERVER_SESSION_STATE_CHANGED) + { + readLengthEncodedString(session_state_changes, payload); + } + } + else + { + readString(info, payload); + } + } + + void OKPacket::writePayloadImpl(WriteBuffer & buffer) const + + { + buffer.write(header); + writeLengthEncodedNumber(affected_rows, buffer); + writeLengthEncodedNumber(last_insert_id, buffer); /// last insert-id + + if (capabilities & CLIENT_PROTOCOL_41) + { + buffer.write(reinterpret_cast(&status_flags), 2); + buffer.write(reinterpret_cast(&warnings), 2); + } + else if (capabilities & CLIENT_TRANSACTIONS) + { + buffer.write(reinterpret_cast(&status_flags), 2); + } + + if (capabilities & CLIENT_SESSION_TRACK) + { + writeLengthEncodedString(info, buffer); + if (status_flags & SERVER_SESSION_STATE_CHANGED) + writeLengthEncodedString(session_state_changes, buffer); + } + else + { + writeString(info, buffer); + } + } + + EOFPacket::EOFPacket() : warnings(0x00), status_flags(0x00) + { + } + + EOFPacket::EOFPacket(int warnings_, int status_flags_) : warnings(warnings_), status_flags(status_flags_) + { + } + + size_t EOFPacket::getPayloadSize() const + { + return 5; + } + + void EOFPacket::readPayloadImpl(ReadBuffer & payload) + { + payload.readStrict(reinterpret_cast(&header), 1); + assert(header == 0xfe); + payload.readStrict(reinterpret_cast(&warnings), 2); + payload.readStrict(reinterpret_cast(&status_flags), 2); + } + + void EOFPacket::writePayloadImpl(WriteBuffer & buffer) const + { + buffer.write(header); // EOF header + buffer.write(reinterpret_cast(&warnings), 2); + buffer.write(reinterpret_cast(&status_flags), 2); + } + + void AuthSwitchPacket::readPayloadImpl(ReadBuffer & payload) + { + payload.readStrict(reinterpret_cast(&header), 1); + assert(header == 0xfe); + readStringUntilEOF(plugin_name, payload); + } + + ERRPacket::ERRPacket() : error_code(0x00) + { + } + + ERRPacket::ERRPacket(int error_code_, String sql_state_, String error_message_) + : error_code(error_code_), sql_state(std::move(sql_state_)), error_message(std::move(error_message_)) + { + } + + size_t ERRPacket::getPayloadSize() const + { + return 4 + sql_state.length() + std::min(error_message.length(), MYSQL_ERRMSG_SIZE); + } + + void ERRPacket::readPayloadImpl(ReadBuffer & payload) + { + payload.readStrict(reinterpret_cast(&header), 1); + assert(header == 0xff); + + payload.readStrict(reinterpret_cast(&error_code), 2); + + /// SQL State [optional: # + 5bytes string] + UInt8 sharp = static_cast(*payload.position()); + if (sharp == 0x23) + { + payload.ignore(1); + sql_state.resize(5); + payload.readStrict(reinterpret_cast(sql_state.data()), 5); + } + readString(error_message, payload); + } + + void ERRPacket::writePayloadImpl(WriteBuffer & buffer) const + { + buffer.write(header); + buffer.write(reinterpret_cast(&error_code), 2); + buffer.write('#'); + buffer.write(sql_state.data(), sql_state.length()); + buffer.write(error_message.data(), std::min(error_message.length(), MYSQL_ERRMSG_SIZE)); + } + + ResponsePacket::ResponsePacket(UInt32 server_capability_flags_) : ok(OKPacket(server_capability_flags_)) + { + } + + ResponsePacket::ResponsePacket(UInt32 server_capability_flags_, bool is_handshake_) + : ok(OKPacket(server_capability_flags_)), is_handshake(is_handshake_) + { + } + + void ResponsePacket::readPayloadImpl(ReadBuffer & payload) + { + UInt16 header = static_cast(*payload.position()); + switch (header) + { + case PACKET_OK: + packetType = PACKET_OK; + ok.readPayloadWithUnpacked(payload); + break; + case PACKET_ERR: + packetType = PACKET_ERR; + err.readPayloadWithUnpacked(payload); + break; + case PACKET_EOF: + if (is_handshake) + { + packetType = PACKET_AUTH_SWITCH; + auth_switch.readPayloadWithUnpacked(payload); + } + else + { + packetType = PACKET_EOF; + eof.readPayloadWithUnpacked(payload); + } + break; + case PACKET_LOCALINFILE: + packetType = PACKET_LOCALINFILE; + break; + default: + packetType = PACKET_OK; + column_length = readLengthEncodedNumber(payload); + } + } + + LengthEncodedNumber::LengthEncodedNumber(uint64_t value_) : value(value_) + { + } + + size_t LengthEncodedNumber::getPayloadSize() const + { + return getLengthEncodedNumberSize(value); + } + + void LengthEncodedNumber::writePayloadImpl(WriteBuffer & buffer) const + { + writeLengthEncodedNumber(value, buffer); + } + } -} - -LengthEncodedNumber::LengthEncodedNumber(uint64_t value_) : value(value_) -{ -} - -size_t LengthEncodedNumber::getPayloadSize() const -{ - return getLengthEncodedNumberSize(value); -} - -void LengthEncodedNumber::writePayloadImpl(WriteBuffer & buffer) const -{ - writeLengthEncodedNumber(value, buffer); -} - -} } diff --git a/src/Core/MySQL/PacketsProtocolBinary.cpp b/src/Core/MySQL/PacketsProtocolBinary.cpp new file mode 100644 index 00000000000..287dda269e6 --- /dev/null +++ b/src/Core/MySQL/PacketsProtocolBinary.cpp @@ -0,0 +1,230 @@ +#include +#include +#include +#include +#include +#include +#include "Columns/ColumnLowCardinality.h" +#include "Columns/ColumnVector.h" +#include "DataTypes/DataTypeLowCardinality.h" +#include "DataTypes/DataTypeNullable.h" +#include "Formats/FormatSettings.h" +#include "IO/WriteBufferFromString.h" +#include "base/types.h" + +namespace DB +{ + +namespace MySQLProtocol +{ + + namespace ProtocolBinary + { + ResultSetRow::ResultSetRow( + const Serializations & serializations_, const DataTypes & data_types_, const Columns & columns_, int row_num_) + : row_num(row_num_), columns(columns_), data_types(data_types_), serializations(serializations_) + { + /// See https://dev.mysql.com/doc/dev/mysql-server/8.1.0/page_protocol_binary_resultset.html#sect_protocol_binary_resultset_row + payload_size = 1 + null_bitmap_size; + // LOG_TRACE(&Poco::Logger::get("ResultSetRow"), "Null bitmap size: {}", null_bitmap_size); + FormatSettings format_settings; + for (size_t i = 0; i < columns.size(); ++i) + { + ColumnPtr col = columns[i]; + + if (col->isNullAt(row_num)) + { + null_bitmap[i / 8] |= 1 << i % 8; + } + + TypeIndex type_index = removeNullable(removeLowCardinality(data_types[i]))->getTypeId(); + switch (type_index) + { + case TypeIndex::Int8: + case TypeIndex::UInt8: + payload_size += 1; + break; + case TypeIndex::Int16: + case TypeIndex::UInt16: + payload_size += 2; + break; + case TypeIndex::Int32: + case TypeIndex::UInt32: + case TypeIndex::Float32: + payload_size += 4; + break; + case TypeIndex::Int64: + case TypeIndex::UInt64: + case TypeIndex::Float64: + payload_size += 8; + break; + case TypeIndex::Date: { + UInt64 value = col->get64(row_num); + if (value == 0) + { + payload_size += 1; // length only, no other fields + } + else + { + payload_size += 5; + } + break; + } + case TypeIndex::DateTime: { + UInt64 value = col->get64(row_num); + if (value == 0) + { + payload_size += 1; // length only, no other fields + } + else + { + Poco::DateTime dt = Poco::DateTime(Poco::Timestamp(value * 1000 * 1000)); + if (dt.second() == 0 && dt.minute() == 0 && dt.hour() == 0) + { + payload_size += 5; + } + else + { + payload_size += 8; + } + } + break; + } + default: + WriteBufferFromOwnString ostr; + serializations[i]->serializeText(*columns[i], row_num, ostr, format_settings); + payload_size += getLengthEncodedStringSize(ostr.str()); + serialized[i] = std::move(ostr.str()); + break; + } + } + } + + void ResultSetRow::writePayloadImpl(WriteBuffer & buffer) const + { + buffer.write(static_cast(0x00)); + buffer.write(null_bitmap.data(), null_bitmap_size); + for (size_t i = 0; i < columns.size(); ++i) + { + ColumnPtr col = columns[i]; + if (col->isNullAt(row_num)) + { + continue; // NULLs are stored in the null bitmap only + } + + TypeIndex type_index = removeNullable(removeLowCardinality(data_types[i]))->getTypeId(); + switch (type_index) + { + case TypeIndex::UInt8: { + UInt64 value = col->get64(row_num); + buffer.write(reinterpret_cast(&value), 1); + break; + } + case TypeIndex::UInt16: { + UInt64 value = col->get64(row_num); + buffer.write(reinterpret_cast(&value), 2); + break; + } + case TypeIndex::UInt32: { + UInt64 value = col->get64(row_num); + buffer.write(reinterpret_cast(&value), 4); + break; + } + case TypeIndex::UInt64: { + UInt64 value = col->get64(row_num); + buffer.write(reinterpret_cast(&value), 8); + break; + } + case TypeIndex::Int8: { + UInt64 value = col->get64(row_num); + buffer.write(reinterpret_cast(&value), 1); + break; + } + case TypeIndex::Int16: { + UInt64 value = col->get64(row_num); + buffer.write(reinterpret_cast(&value), 2); + break; + } + case TypeIndex::Int32: { + UInt64 value = col->get64(row_num); + buffer.write(reinterpret_cast(&value), 4); + break; + } + case TypeIndex::Int64: { + UInt64 value = col->get64(row_num); + buffer.write(reinterpret_cast(&value), 8); + break; + } + case TypeIndex::Float32: { + Float32 value = col->getFloat32(row_num); + buffer.write(reinterpret_cast(&value), 4); + break; + } + case TypeIndex::Float64: { + Float64 value = col->getFloat64(row_num); + buffer.write(reinterpret_cast(&value), 8); + break; + } + case TypeIndex::Date: { + UInt64 value = col->get64(row_num); + if (value != 0) + { + Poco::DateTime dt = Poco::DateTime(Poco::Timestamp(value * 1000 * 1000)); + buffer.write(static_cast(4)); // bytes_following + int year = dt.year(); + int month = dt.month(); + int day = dt.day(); + buffer.write(reinterpret_cast(&year), 2); + buffer.write(reinterpret_cast(&month), 1); + buffer.write(reinterpret_cast(&day), 1); + } + else + { + buffer.write(static_cast(0)); + } + break; + } + case TypeIndex::DateTime: { + UInt64 value = col->get64(row_num); + if (value != 0) + { + Poco::DateTime dt = Poco::DateTime(Poco::Timestamp(value * 1000 * 1000)); + bool is_date_time = !(dt.hour() == 0 && dt.minute() == 0 && dt.second() == 0); + size_t bytes_following = is_date_time ? 7 : 4; + buffer.write(reinterpret_cast(&bytes_following), 1); + int year = dt.year(); + int month = dt.month(); + int day = dt.day(); + buffer.write(reinterpret_cast(&year), 2); + buffer.write(reinterpret_cast(&month), 1); + buffer.write(reinterpret_cast(&day), 1); + if (is_date_time) + { + int hour = dt.hourAMPM(); + int minute = dt.minute(); + int second = dt.second(); + buffer.write(reinterpret_cast(&hour), 1); + buffer.write(reinterpret_cast(&minute), 1); + buffer.write(reinterpret_cast(&second), 1); + } + } + else + { + buffer.write(static_cast(0)); + } + break; + } + default: + writeLengthEncodedString(serialized[i], buffer); + break; + } + } + } + + size_t ResultSetRow::getPayloadSize() const + { + return payload_size; + }; + } +} +} diff --git a/src/Core/MySQL/PacketsProtocolBinary.h b/src/Core/MySQL/PacketsProtocolBinary.h new file mode 100644 index 00000000000..69936e527c1 --- /dev/null +++ b/src/Core/MySQL/PacketsProtocolBinary.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include +#include "DataTypes/IDataType.h" +#include "DataTypes/Serializations/ISerialization.h" + +namespace DB +{ + +namespace MySQLProtocol +{ + + namespace ProtocolBinary + { + class ResultSetRow : public IMySQLWritePacket + { + private: + TypeIndex getTypeIndex(DataTypePtr data_type, const ColumnPtr & col) const; + + protected: + int row_num; + const Columns & columns; + const DataTypes & data_types; + const Serializations & serializations; + + std::vector serialized = std::vector(columns.size()); + + size_t null_bitmap_size = (columns.size() + 7) / 8; + std::vector null_bitmap = std::vector(null_bitmap_size, 0); + + size_t payload_size = 0; + + size_t getPayloadSize() const override; + + void writePayloadImpl(WriteBuffer & buffer) const override; + + public: + ResultSetRow(const Serializations & serializations_, const DataTypes & data_types_, const Columns & columns_, int row_num_); + }; + } +} +} diff --git a/src/Core/MySQL/PacketsProtocolText.cpp b/src/Core/MySQL/PacketsProtocolText.cpp index 728e8061e87..9c5bf6b6e05 100644 --- a/src/Core/MySQL/PacketsProtocolText.cpp +++ b/src/Core/MySQL/PacketsProtocolText.cpp @@ -1,7 +1,8 @@ #include -#include #include +#include #include +#include "Core/MySQL/IMySQLWritePacket.h" namespace DB { @@ -9,197 +10,212 @@ namespace DB namespace MySQLProtocol { -namespace ProtocolText -{ - -ResultSetRow::ResultSetRow(const Serializations & serializations, const Columns & columns_, int row_num_) - : columns(columns_), row_num(row_num_) -{ - for (size_t i = 0; i < columns.size(); ++i) + namespace ProtocolText { - if (columns[i]->isNullAt(row_num)) + + ResultSetRow::ResultSetRow(const Serializations & serializations, const Columns & columns_, int row_num_) + : columns(columns_), row_num(row_num_) { - payload_size += 1; - serialized.emplace_back("\xfb"); + for (size_t i = 0; i < columns.size(); ++i) + { + if (columns[i]->isNullAt(row_num)) + { + payload_size += 1; + serialized.emplace_back("\xfb"); + } + else + { + WriteBufferFromOwnString ostr; + serializations[i]->serializeText(*columns[i], row_num, ostr, FormatSettings()); + payload_size += getLengthEncodedStringSize(ostr.str()); + serialized.push_back(std::move(ostr.str())); + } + } } - else + + size_t ResultSetRow::getPayloadSize() const { - WriteBufferFromOwnString ostr; - serializations[i]->serializeText(*columns[i], row_num, ostr, FormatSettings()); - payload_size += getLengthEncodedStringSize(ostr.str()); - serialized.push_back(std::move(ostr.str())); + return payload_size; } + + void ResultSetRow::writePayloadImpl(WriteBuffer & buffer) const + { + for (size_t i = 0; i < columns.size(); ++i) + { + if (columns[i]->isNullAt(row_num)) + buffer.write(serialized[i].data(), 1); + else + writeLengthEncodedString(serialized[i], buffer); + } + } + + void ComFieldList::readPayloadImpl(ReadBuffer & payload) + { + // Command byte has been already read from payload. + readNullTerminated(table, payload); + readStringUntilEOF(field_wildcard, payload); + } + + ColumnDefinition::ColumnDefinition() : character_set(0x00), column_length(0), column_type(MYSQL_TYPE_DECIMAL), flags(0x00) + { + } + + ColumnDefinition::ColumnDefinition( + String schema_, + String table_, + String org_table_, + String name_, + String org_name_, + uint16_t character_set_, + uint32_t column_length_, + ColumnType column_type_, + uint16_t flags_, + uint8_t decimals_, + bool with_defaults_) + : schema(std::move(schema_)) + , table(std::move(table_)) + , org_table(std::move(org_table_)) + , name(std::move(name_)) + , org_name(std::move(org_name_)) + , character_set(character_set_) + , column_length(column_length_) + , column_type(column_type_) + , flags(flags_) + , decimals(decimals_) + , is_comm_field_list_response(with_defaults_) + { + } + + ColumnDefinition::ColumnDefinition( + String name_, uint16_t character_set_, uint32_t column_length_, ColumnType column_type_, uint16_t flags_, uint8_t decimals_) + : ColumnDefinition("", "", "", std::move(name_), "", character_set_, column_length_, column_type_, flags_, decimals_) + { + } + + size_t ColumnDefinition::getPayloadSize() const + { + return 12 + getLengthEncodedStringSize("def") + getLengthEncodedStringSize(schema) + getLengthEncodedStringSize(table) + + getLengthEncodedStringSize(org_table) + getLengthEncodedStringSize(name) + getLengthEncodedStringSize(org_name) + + getLengthEncodedNumberSize(next_length) + is_comm_field_list_response; + } + + void ColumnDefinition::readPayloadImpl(ReadBuffer & payload) + { + String def; + readLengthEncodedString(def, payload); + assert(def == "def"); + readLengthEncodedString(schema, payload); + readLengthEncodedString(table, payload); + readLengthEncodedString(org_table, payload); + readLengthEncodedString(name, payload); + readLengthEncodedString(org_name, payload); + next_length = readLengthEncodedNumber(payload); + payload.readStrict(reinterpret_cast(&character_set), 2); + payload.readStrict(reinterpret_cast(&column_length), 4); + payload.readStrict(reinterpret_cast(&column_type), 1); + payload.readStrict(reinterpret_cast(&flags), 2); + payload.readStrict(reinterpret_cast(&decimals), 1); + payload.ignore(2); + } + + void ColumnDefinition::writePayloadImpl(WriteBuffer & buffer) const + { + writeLengthEncodedString(std::string("def"), buffer); /// always "def" + writeLengthEncodedString(schema, buffer); + writeLengthEncodedString(table, buffer); + writeLengthEncodedString(org_table, buffer); + writeLengthEncodedString(name, buffer); + writeLengthEncodedString(org_name, buffer); + writeLengthEncodedNumber(next_length, buffer); + buffer.write(reinterpret_cast(&character_set), 2); + buffer.write(reinterpret_cast(&column_length), 4); + buffer.write(reinterpret_cast(&column_type), 1); + buffer.write(reinterpret_cast(&flags), 2); + buffer.write(reinterpret_cast(&decimals), 1); + writeChar(0x0, 2, buffer); + if (is_comm_field_list_response) + { + /// We should write length encoded int with string size + /// followed by string with some "default values" (possibly it's column defaults). + /// But we just send NULL for simplicity. + writeChar(0xfb, buffer); + } + } + + ColumnDefinition getColumnDefinition(const String & column_name, const TypeIndex type_index) + { + ColumnType column_type; + CharacterSet charset = CharacterSet::binary; + int flags = 0; + uint8_t decimals = 0; + switch (type_index) + { + case TypeIndex::UInt8: + column_type = ColumnType::MYSQL_TYPE_TINY; + flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; + break; + case TypeIndex::UInt16: + column_type = ColumnType::MYSQL_TYPE_SHORT; + flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; + break; + case TypeIndex::UInt32: + column_type = ColumnType::MYSQL_TYPE_LONG; + flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; + break; + case TypeIndex::UInt64: + column_type = ColumnType::MYSQL_TYPE_LONGLONG; + flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; + break; + case TypeIndex::Int8: + column_type = ColumnType::MYSQL_TYPE_TINY; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::Int16: + column_type = ColumnType::MYSQL_TYPE_SHORT; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::Int32: + column_type = ColumnType::MYSQL_TYPE_LONG; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::Int64: + column_type = ColumnType::MYSQL_TYPE_LONGLONG; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::Float32: + column_type = ColumnType::MYSQL_TYPE_FLOAT; + flags = ColumnDefinitionFlags::BINARY_FLAG; + decimals = 31; + break; + case TypeIndex::Float64: + column_type = ColumnType::MYSQL_TYPE_DOUBLE; + flags = ColumnDefinitionFlags::BINARY_FLAG; + decimals = 31; + break; + case TypeIndex::Date: + column_type = ColumnType::MYSQL_TYPE_DATE; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::DateTime: + column_type = ColumnType::MYSQL_TYPE_DATETIME; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::Decimal32: + case TypeIndex::Decimal64: + case TypeIndex::Decimal128: + /// MySQL Decimal has max 65 precision and 30 scale. Thus, Decimal256 is reported as a string + column_type = ColumnType::MYSQL_TYPE_DECIMAL; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + default: + column_type = ColumnType::MYSQL_TYPE_STRING; + charset = CharacterSet::utf8_general_ci; + break; + } + return ColumnDefinition(column_name, charset, 0, column_type, flags, decimals); + } + } -} - -size_t ResultSetRow::getPayloadSize() const -{ - return payload_size; -} - -void ResultSetRow::writePayloadImpl(WriteBuffer & buffer) const -{ - for (size_t i = 0; i < columns.size(); ++i) - { - if (columns[i]->isNullAt(row_num)) - buffer.write(serialized[i].data(), 1); - else - writeLengthEncodedString(serialized[i], buffer); - } -} - -void ComFieldList::readPayloadImpl(ReadBuffer & payload) -{ - // Command byte has been already read from payload. - readNullTerminated(table, payload); - readStringUntilEOF(field_wildcard, payload); -} - -ColumnDefinition::ColumnDefinition() - : character_set(0x00), column_length(0), column_type(MYSQL_TYPE_DECIMAL), flags(0x00) -{ -} - -ColumnDefinition::ColumnDefinition( - String schema_, String table_, String org_table_, String name_, String org_name_, uint16_t character_set_, uint32_t column_length_, - ColumnType column_type_, uint16_t flags_, uint8_t decimals_, bool with_defaults_) - : schema(std::move(schema_)), table(std::move(table_)), org_table(std::move(org_table_)), name(std::move(name_)), - org_name(std::move(org_name_)), character_set(character_set_), column_length(column_length_), column_type(column_type_), - flags(flags_), decimals(decimals_), is_comm_field_list_response(with_defaults_) -{ -} - -ColumnDefinition::ColumnDefinition( - String name_, uint16_t character_set_, uint32_t column_length_, ColumnType column_type_, uint16_t flags_, uint8_t decimals_) - : ColumnDefinition("", "", "", std::move(name_), "", character_set_, column_length_, column_type_, flags_, decimals_) -{ -} - -size_t ColumnDefinition::getPayloadSize() const -{ - return 12 + - getLengthEncodedStringSize("def") + - getLengthEncodedStringSize(schema) + - getLengthEncodedStringSize(table) + - getLengthEncodedStringSize(org_table) + - getLengthEncodedStringSize(name) + - getLengthEncodedStringSize(org_name) + - getLengthEncodedNumberSize(next_length) + - is_comm_field_list_response; -} - -void ColumnDefinition::readPayloadImpl(ReadBuffer & payload) -{ - String def; - readLengthEncodedString(def, payload); - assert(def == "def"); - readLengthEncodedString(schema, payload); - readLengthEncodedString(table, payload); - readLengthEncodedString(org_table, payload); - readLengthEncodedString(name, payload); - readLengthEncodedString(org_name, payload); - next_length = readLengthEncodedNumber(payload); - payload.readStrict(reinterpret_cast(&character_set), 2); - payload.readStrict(reinterpret_cast(&column_length), 4); - payload.readStrict(reinterpret_cast(&column_type), 1); - payload.readStrict(reinterpret_cast(&flags), 2); - payload.readStrict(reinterpret_cast(&decimals), 1); - payload.ignore(2); -} - -void ColumnDefinition::writePayloadImpl(WriteBuffer & buffer) const -{ - writeLengthEncodedString(std::string("def"), buffer); /// always "def" - writeLengthEncodedString(schema, buffer); - writeLengthEncodedString(table, buffer); - writeLengthEncodedString(org_table, buffer); - writeLengthEncodedString(name, buffer); - writeLengthEncodedString(org_name, buffer); - writeLengthEncodedNumber(next_length, buffer); - buffer.write(reinterpret_cast(&character_set), 2); - buffer.write(reinterpret_cast(&column_length), 4); - buffer.write(reinterpret_cast(&column_type), 1); - buffer.write(reinterpret_cast(&flags), 2); - buffer.write(reinterpret_cast(&decimals), 1); - writeChar(0x0, 2, buffer); - if (is_comm_field_list_response) - { - /// We should write length encoded int with string size - /// followed by string with some "default values" (possibly it's column defaults). - /// But we just send NULL for simplicity. - writeChar(0xfb, buffer); - } -} - -ColumnDefinition getColumnDefinition(const String & column_name, const TypeIndex type_index) -{ - ColumnType column_type; - CharacterSet charset = CharacterSet::binary; - int flags = 0; - switch (type_index) - { - case TypeIndex::UInt8: - column_type = ColumnType::MYSQL_TYPE_TINY; - flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; - break; - case TypeIndex::UInt16: - column_type = ColumnType::MYSQL_TYPE_SHORT; - flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; - break; - case TypeIndex::UInt32: - column_type = ColumnType::MYSQL_TYPE_LONG; - flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; - break; - case TypeIndex::UInt64: - column_type = ColumnType::MYSQL_TYPE_LONGLONG; - flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; - break; - case TypeIndex::Int8: - column_type = ColumnType::MYSQL_TYPE_TINY; - flags = ColumnDefinitionFlags::BINARY_FLAG; - break; - case TypeIndex::Int16: - column_type = ColumnType::MYSQL_TYPE_SHORT; - flags = ColumnDefinitionFlags::BINARY_FLAG; - break; - case TypeIndex::Int32: - column_type = ColumnType::MYSQL_TYPE_LONG; - flags = ColumnDefinitionFlags::BINARY_FLAG; - break; - case TypeIndex::Int64: - column_type = ColumnType::MYSQL_TYPE_LONGLONG; - flags = ColumnDefinitionFlags::BINARY_FLAG; - break; - case TypeIndex::Float32: - column_type = ColumnType::MYSQL_TYPE_FLOAT; - flags = ColumnDefinitionFlags::BINARY_FLAG; - break; - case TypeIndex::Float64: - column_type = ColumnType::MYSQL_TYPE_DOUBLE; - flags = ColumnDefinitionFlags::BINARY_FLAG; - break; - case TypeIndex::Date: - column_type = ColumnType::MYSQL_TYPE_DATE; - flags = ColumnDefinitionFlags::BINARY_FLAG; - break; - case TypeIndex::DateTime: - column_type = ColumnType::MYSQL_TYPE_DATETIME; - flags = ColumnDefinitionFlags::BINARY_FLAG; - break; - case TypeIndex::String: - case TypeIndex::FixedString: - column_type = ColumnType::MYSQL_TYPE_STRING; - charset = CharacterSet::utf8_general_ci; - break; - default: - column_type = ColumnType::MYSQL_TYPE_STRING; - charset = CharacterSet::utf8_general_ci; - break; - } - return ColumnDefinition(column_name, charset, 0, column_type, flags, 0); -} - -} } diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index fa044d4ac9c..2af216529e5 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -1,13 +1,13 @@ #include #include -#include #include #include #include #include #include #include +#include #include @@ -31,6 +31,12 @@ std::string DataTypeDecimal::doGetName() const template std::string DataTypeDecimal::getSQLCompatibleName() const { + /// See https://dev.mysql.com/doc/refman/8.0/en/precision-math-decimal-characteristics.html + /// DECIMAL(M,D) + /// M is the maximum number of digits (the precision). It has a range of 1 to 65. + /// D is the number of digits to the right of the decimal point (the scale). It has a range of 0 to 30 and must be no larger than M. + if (this->precision > 65 || this->scale > 30) + return "TEXT"; return fmt::format("DECIMAL({}, {})", this->precision, this->scale); } @@ -75,14 +81,14 @@ SerializationPtr DataTypeDecimal::doGetDefaultSerialization() const static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Decimal data type family must have exactly two arguments: precision and scale"); + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Decimal data type family must have exactly two arguments: precision and scale"); const auto * precision = arguments->children[0]->as(); const auto * scale = arguments->children[1]->as(); - if (!precision || precision->value.getType() != Field::Types::UInt64 || - !scale || !(scale->value.getType() == Field::Types::Int64 || scale->value.getType() == Field::Types::UInt64)) + if (!precision || precision->value.getType() != Field::Types::UInt64 || !scale + || !(scale->value.getType() == Field::Types::Int64 || scale->value.getType() == Field::Types::UInt64)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal data type family must have two numbers as its arguments"); UInt64 precision_value = precision->value.get(); @@ -95,13 +101,15 @@ template static DataTypePtr createExact(const ASTPtr & arguments) { if (!arguments || arguments->children.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Decimal32 | Decimal64 | Decimal128 | Decimal256 data type family must have exactly one arguments: scale"); + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Decimal32 | Decimal64 | Decimal128 | Decimal256 data type family must have exactly one arguments: scale"); const auto * scale_arg = arguments->children[0]->as(); if (!scale_arg || !(scale_arg->value.getType() == Field::Types::Int64 || scale_arg->value.getType() == Field::Types::UInt64)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Decimal32 | Decimal64 | Decimal128 | Decimal256 data type family must have a one number as its argument"); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Decimal32 | Decimal64 | Decimal128 | Decimal256 data type family must have a one number as its argument"); UInt64 precision = DecimalUtils::max_precision; UInt64 scale = scale_arg->value.get(); diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 2c283dcc2b7..56359fd0bea 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include #include @@ -48,9 +48,9 @@ struct FormatSettings enum class DateTimeInputFormat { - Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. - BestEffort, /// Use sophisticated rules to parse whatever possible. - BestEffortUS /// Use sophisticated rules to parse American style: mm/dd/yyyy + Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. + BestEffort, /// Use sophisticated rules to parse whatever possible. + BestEffortUS /// Use sophisticated rules to parse American style: mm/dd/yyyy }; DateTimeInputFormat date_time_input_format = DateTimeInputFormat::Basic; @@ -282,6 +282,14 @@ struct FormatSettings uint32_t client_capabilities = 0; size_t max_packet_size = 0; uint8_t * sequence_id = nullptr; /// Not null if it's MySQLWire output format used to handle MySQL protocol connections. + /** + * COM_QUERY uses Text ResultSet + * https://dev.mysql.com/doc/dev/mysql-server/latest/page_protocol_com_query_response_text_resultset.html + * COM_STMT_EXECUTE uses Binary Protocol ResultSet + * https://dev.mysql.com/doc/dev/mysql-server/latest/page_protocol_com_stmt_execute_response.html + * By default, use Text ResultSet. + */ + bool use_binary_result_set = false; } mysql_wire; struct diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index c86d3c753c4..922f9887a82 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -37,7 +37,7 @@ String InterpreterShowColumnsQuery::getRewrittenQuery() SELECT name AS field, type AS type, - startsWith(type, 'Nullable') AS null, + if(startsWith(type, 'Nullable'), 'YES', 'NO') AS null, trim(concatWithSeparator(' ', if (is_in_primary_key, 'PRI', ''), if (is_in_sorting_key, 'SOR', ''))) AS key, if (default_kind IN ('ALIAS', 'DEFAULT', 'MATERIALIZED'), default_expression, NULL) AS default, '' AS extra )"; diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index f2157f63c25..3dafe560281 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -1,11 +1,12 @@ -#include #include +#include #include #include #include #include #include - +#include +#include "Common/logger_useful.h" namespace DB { @@ -13,17 +14,18 @@ namespace DB using namespace MySQLProtocol; using namespace MySQLProtocol::Generic; using namespace MySQLProtocol::ProtocolText; - +using namespace MySQLProtocol::ProtocolBinary; MySQLOutputFormat::MySQLOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & settings_) - : IOutputFormat(header_, out_) - , client_capabilities(settings_.mysql_wire.client_capabilities) + : IOutputFormat(header_, out_), client_capabilities(settings_.mysql_wire.client_capabilities) { /// MySQlWire is a special format that is usually used as output format for MySQL protocol connections. /// In this case we have a correct `sequence_id` stored in `settings_.mysql_wire`. /// But it's also possible to specify MySQLWire as output format for clickhouse-client or clickhouse-local. /// There is no `sequence_id` stored in `settings_.mysql_wire` in this case, so we create a dummy one. sequence_id = settings_.mysql_wire.sequence_id ? settings_.mysql_wire.sequence_id : &dummy_sequence_id; + /// Switch between Text (COM_QUERY) and Binary (COM_EXECUTE_STMT) ResultSet + use_binary_result_set = settings_.mysql_wire.use_binary_result_set; const auto & header = getPort(PortKind::Main).getHeader(); data_types = header.getDataTypes(); @@ -54,7 +56,7 @@ void MySQLOutputFormat::writePrefix() packet_endpoint->sendPacket(getColumnDefinition(column_name, data_types[i]->getTypeId())); } - if (!(client_capabilities & Capability::CLIENT_DEPRECATE_EOF)) + if (!(client_capabilities & Capability::CLIENT_DEPRECATE_EOF) && !use_binary_result_set) { packet_endpoint->sendPacket(EOFPacket(0, 0)); } @@ -63,39 +65,67 @@ void MySQLOutputFormat::writePrefix() void MySQLOutputFormat::consume(Chunk chunk) { - for (size_t i = 0; i < chunk.getNumRows(); ++i) + if (!use_binary_result_set) { - ProtocolText::ResultSetRow row_packet(serializations, chunk.getColumns(), static_cast(i)); - packet_endpoint->sendPacket(row_packet); + for (size_t i = 0; i < chunk.getNumRows(); ++i) + { + ProtocolText::ResultSetRow row_packet(serializations, chunk.getColumns(), static_cast(i)); + packet_endpoint->sendPacket(row_packet); + } + } + else + { + for (size_t i = 0; i < chunk.getNumRows(); ++i) + { + ProtocolBinary::ResultSetRow row_packet(serializations, data_types, chunk.getColumns(), static_cast(i)); + packet_endpoint->sendPacket(row_packet); + } } } void MySQLOutputFormat::finalizeImpl() { - size_t affected_rows = 0; - std::string human_readable_info; - if (QueryStatusPtr process_list_elem = getContext()->getProcessListElement()) + if (!use_binary_result_set) { - CurrentThread::finalizePerformanceCounters(); - QueryStatusInfo info = process_list_elem->getInfo(); - affected_rows = info.written_rows; - double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; - human_readable_info = fmt::format( - "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", - info.read_rows, - ReadableSize(info.read_bytes), - elapsed_seconds, - static_cast(info.read_rows / elapsed_seconds), - ReadableSize(info.read_bytes / elapsed_seconds)); - } + size_t affected_rows = 0; + std::string human_readable_info; + if (QueryStatusPtr process_list_elem = getContext()->getProcessListElement()) + { + CurrentThread::finalizePerformanceCounters(); + QueryStatusInfo info = process_list_elem->getInfo(); + affected_rows = info.written_rows; + double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; + human_readable_info = fmt::format( + "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", + info.read_rows, + ReadableSize(info.read_bytes), + elapsed_seconds, + static_cast(info.read_rows / elapsed_seconds), + ReadableSize(info.read_bytes / elapsed_seconds)); + } - const auto & header = getPort(PortKind::Main).getHeader(); - if (header.columns() == 0) - packet_endpoint->sendPacket(OKPacket(0x0, client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); - else if (client_capabilities & CLIENT_DEPRECATE_EOF) - packet_endpoint->sendPacket(OKPacket(0xfe, client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); + const auto & header = getPort(PortKind::Main).getHeader(); + if (header.columns() == 0) + packet_endpoint->sendPacket(OKPacket(0x0, client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); + else if (client_capabilities & CLIENT_DEPRECATE_EOF) + packet_endpoint->sendPacket(OKPacket(0xfe, client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); + else + packet_endpoint->sendPacket(EOFPacket(0, 0), true); + } else - packet_endpoint->sendPacket(EOFPacket(0, 0), true); + { + size_t affected_rows = 0; + if (QueryStatusPtr process_list_elem = getContext()->getProcessListElement()) + { + CurrentThread::finalizePerformanceCounters(); + QueryStatusInfo info = process_list_elem->getInfo(); + affected_rows = info.written_rows; + } + if (client_capabilities & CLIENT_DEPRECATE_EOF) + packet_endpoint->sendPacket(OKPacket(0xfe, client_capabilities, affected_rows, 0, 0, "", ""), true); + else + packet_endpoint->sendPacket(EOFPacket(0, 0), true); + } } void MySQLOutputFormat::flush() @@ -107,9 +137,8 @@ void registerOutputFormatMySQLWire(FormatFactory & factory) { factory.registerOutputFormat( "MySQLWire", - [](WriteBuffer & buf, - const Block & sample, - const FormatSettings & settings) { return std::make_shared(buf, sample, settings); }); + [](WriteBuffer & buf, const Block & sample, const FormatSettings & settings) + { return std::make_shared(buf, sample, settings); }); } } diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.h b/src/Processors/Formats/Impl/MySQLOutputFormat.h index 9481ef67070..6161b6bdc14 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.h +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include #include @@ -39,6 +39,7 @@ private: MySQLProtocol::PacketEndpointPtr packet_endpoint; DataTypes data_types; Serializations serializations; + bool use_binary_result_set = false; }; } diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 868575b701f..3715dfea9f7 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -1,29 +1,29 @@ #include "MySQLHandler.h" #include -#include -#include +#include +#include +#include #include #include -#include #include #include -#include -#include -#include #include #include #include +#include #include #include -#include +#include +#include +#include #include #include -#include -#include -#include -#include #include +#include +#include +#include +#include #include "config_version.h" @@ -67,10 +67,7 @@ static String killConnectionIdReplacementQuery(const String & query); static String selectLimitReplacementQuery(const String & query); MySQLHandler::MySQLHandler( - IServer & server_, - TCPServer & tcp_server_, - const Poco::Net::StreamSocket & socket_, - bool ssl_enabled, uint32_t connection_id_) + IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, uint32_t connection_id_) : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) @@ -78,7 +75,8 @@ MySQLHandler::MySQLHandler( , connection_id(connection_id_) , auth_plugin(new MySQLProtocol::Authentication::Native41()) { - server_capabilities = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF; + server_capabilities = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA + | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF; if (ssl_enabled) server_capabilities |= CLIENT_SSL; @@ -104,8 +102,13 @@ void MySQLHandler::run() try { - Handshake handshake(server_capabilities, connection_id, VERSION_STRING + String("-") + VERSION_NAME, - auth_plugin->getName(), auth_plugin->getAuthPluginData(), CharacterSet::utf8_general_ci); + Handshake handshake( + server_capabilities, + connection_id, + VERSION_STRING + String("-") + VERSION_NAME, + auth_plugin->getName(), + auth_plugin->getAuthPluginData(), + CharacterSet::utf8_general_ci); packet_endpoint->sendPacket(handshake, true); LOG_TRACE(log, "Sent handshake"); @@ -115,8 +118,10 @@ void MySQLHandler::run() client_capabilities = handshake_response.capability_flags; max_packet_size = handshake_response.max_packet_size ? handshake_response.max_packet_size : MAX_PACKET_LENGTH; - LOG_TRACE(log, - "Capabilities: {}, max_packet_size: {}, character_set: {}, user: {}, auth_response length: {}, database: {}, auth_plugin_name: {}", + LOG_TRACE( + log, + "Capabilities: {}, max_packet_size: {}, character_set: {}, user: {}, auth_response length: {}, database: {}, auth_plugin_name: " + "{}", handshake_response.capability_flags, handshake_response.max_packet_size, static_cast(handshake_response.character_set), @@ -160,8 +165,8 @@ void MySQLHandler::run() // For commands which are executed without MemoryTracker. LimitReadBuffer limited_payload(payload, 10000, /* trow_exception */ true, /* exact_limit */ {}, "too long MySQL packet."); - LOG_DEBUG(log, "Received command: {}. Connection id: {}.", - static_cast(static_cast(command)), connection_id); + LOG_DEBUG( + log, "Received command: {}. Connection id: {}.", static_cast(static_cast(command)), connection_id); if (!tcp_server.isOpen()) return; @@ -175,7 +180,7 @@ void MySQLHandler::run() comInitDB(limited_payload); break; case COM_QUERY: - comQuery(payload); + comQuery(payload, false); break; case COM_FIELD_LIST: comFieldList(limited_payload); @@ -227,13 +232,15 @@ void MySQLHandler::finishHandshake(MySQLProtocol::ConnectionPhase::HandshakeResp size_t pos = 0; /// Reads at least count and at most packet_size bytes. - auto read_bytes = [this, &buf, &pos, &packet_size](size_t count) -> void { + auto read_bytes = [this, &buf, &pos, &packet_size](size_t count) -> void + { while (pos < count) { int ret = socket().receiveBytes(buf + pos, static_cast(packet_size - pos)); if (ret == 0) { - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data. Bytes read: {}. Bytes expected: 3", std::to_string(pos)); + throw Exception( + ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data. Bytes read: {}. Bytes expected: 3", std::to_string(pos)); } pos += ret; } @@ -272,7 +279,8 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl authPluginSSL(); } - std::optional auth_response = auth_plugin_name == auth_plugin->getName() ? std::make_optional(initial_auth_response) : std::nullopt; + std::optional auth_response + = auth_plugin_name == auth_plugin->getName() ? std::make_optional(initial_auth_response) : std::nullopt; auth_plugin->authenticate(user_name, *session, auth_response, packet_endpoint, secure_connection, socket().peerAddress()); } catch (const Exception & exc) @@ -304,8 +312,17 @@ void MySQLHandler::comFieldList(ReadBuffer & payload) for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAll()) { ColumnDefinition column_definition( - database, packet.table, packet.table, column.name, column.name, CharacterSet::binary, 100, ColumnType::MYSQL_TYPE_STRING, 0, 0, true - ); + database, + packet.table, + packet.table, + column.name, + column.name, + CharacterSet::binary, + 100, + ColumnType::MYSQL_TYPE_STRING, + 0, + 0, + true); packet_endpoint->sendPacket(column_definition); } packet_endpoint->sendPacket(OKPacket(0xfe, client_capabilities, 0, 0, 0), true); @@ -318,7 +335,7 @@ void MySQLHandler::comPing() static bool isFederatedServerSetupSetCommand(const String & query); -void MySQLHandler::comQuery(ReadBuffer & payload) +void MySQLHandler::comQuery(ReadBuffer & payload, bool use_binary_protocol_result_set) { String query = String(payload.position(), payload.buffer().end()); @@ -350,20 +367,22 @@ void MySQLHandler::comQuery(ReadBuffer & payload) query_context->setCurrentQueryId(fmt::format("mysql:{}:{}", connection_id, toString(UUIDHelpers::generateV4()))); CurrentThread::QueryScope query_scope{query_context}; - std::atomic affected_rows {0}; + std::atomic affected_rows{0}; auto prev = query_context->getProgressCallback(); - query_context->setProgressCallback([&, my_prev = prev](const Progress & progress) - { - if (my_prev) - my_prev(progress); + query_context->setProgressCallback( + [&, my_prev = prev](const Progress & progress) + { + if (my_prev) + my_prev(progress); - affected_rows += progress.written_rows; - }); + affected_rows += progress.written_rows; + }); FormatSettings format_settings; format_settings.mysql_wire.client_capabilities = client_capabilities; format_settings.mysql_wire.max_packet_size = max_packet_size; format_settings.mysql_wire.sequence_id = &sequence_id; + format_settings.mysql_wire.use_binary_result_set = use_binary_protocol_result_set; auto set_result_details = [&with_output](const QueryResultDetails & details) { @@ -385,11 +404,18 @@ void MySQLHandler::comQuery(ReadBuffer & payload) void MySQLHandler::comStmtPrepare(DB::ReadBuffer & payload) { + if (prepared_statements_map.size() > 10000) /// Shouldn't happen in reality as COM_STMT_CLOSE cleans up the elements + { + LOG_ERROR(log, "Too many prepared statements"); + packet_endpoint->sendPacket(ERRPacket(), true); + return; + } + String query; readStringUntilEOF(query, payload); uint32_t statement_id = current_prepared_statement_id; - if (current_prepared_statement_id == std::numeric_limits::max()) [[unlikely]] + if (current_prepared_statement_id == std::numeric_limits::max()) { current_prepared_statement_id = 0; } @@ -400,7 +426,7 @@ void MySQLHandler::comStmtPrepare(DB::ReadBuffer & payload) // Key collisions should not happen here, as we remove the elements from the map with COM_STMT_CLOSE, // and we have quite a big range of available identifiers with 32-bit unsigned integer - if (prepared_statements_map.contains(statement_id)) [[unlikely]] + if (prepared_statements_map.contains(statement_id)) { LOG_ERROR( log, @@ -411,8 +437,8 @@ void MySQLHandler::comStmtPrepare(DB::ReadBuffer & payload) packet_endpoint->sendPacket(ERRPacket(), true); return; } - prepared_statements_map.emplace(statement_id, query); + prepared_statements_map.emplace(statement_id, query); packet_endpoint->sendPacket(PrepareStatementResponseOK(statement_id, 0, 0, 0), true); } @@ -421,7 +447,7 @@ void MySQLHandler::comStmtExecute(ReadBuffer & payload) uint32_t statement_id; payload.readStrict(reinterpret_cast(&statement_id), 4); - if (!prepared_statements_map.contains(statement_id)) [[unlikely]] + if (!prepared_statements_map.contains(statement_id)) { LOG_ERROR(log, "Could not find prepared statement with id {}", statement_id); packet_endpoint->sendPacket(ERRPacket(), true); @@ -430,14 +456,16 @@ void MySQLHandler::comStmtExecute(ReadBuffer & payload) // Temporary workaround as we work only with queries that do not bind any parameters atm ReadBufferFromString com_query_payload(prepared_statements_map.at(statement_id)); - MySQLHandler::comQuery(com_query_payload); + MySQLHandler::comQuery(com_query_payload, true); }; -void MySQLHandler::comStmtClose([[maybe_unused]] ReadBuffer & payload) { +void MySQLHandler::comStmtClose(ReadBuffer & payload) +{ uint32_t statement_id; payload.readStrict(reinterpret_cast(&statement_id), 4); - if (prepared_statements_map.contains(statement_id)) { + if (prepared_statements_map.contains(statement_id)) + { prepared_statements_map.erase(statement_id); } @@ -447,13 +475,17 @@ void MySQLHandler::comStmtClose([[maybe_unused]] ReadBuffer & payload) { void MySQLHandler::authPluginSSL() { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "ClickHouse was built without SSL support. Try specifying password using double SHA1 in users.xml."); + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "ClickHouse was built without SSL support. Try specifying password using double SHA1 in users.xml."); } void MySQLHandler::finishHandshakeSSL( - [[maybe_unused]] size_t packet_size, [[maybe_unused]] char * buf, [[maybe_unused]] size_t pos, - [[maybe_unused]] std::function read_bytes, [[maybe_unused]] MySQLProtocol::ConnectionPhase::HandshakeResponse & packet) + [[maybe_unused]] size_t packet_size, + [[maybe_unused]] char * buf, + [[maybe_unused]] size_t pos, + [[maybe_unused]] std::function read_bytes, + [[maybe_unused]] MySQLProtocol::ConnectionPhase::HandshakeResponse & packet) { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Client requested SSL, while it is disabled."); } @@ -467,10 +499,9 @@ MySQLHandlerSSL::MySQLHandlerSSL( uint32_t connection_id_, RSA & public_key_, RSA & private_key_) - : MySQLHandler(server_, tcp_server_, socket_, ssl_enabled, connection_id_) - , public_key(public_key_) - , private_key(private_key_) -{} + : MySQLHandler(server_, tcp_server_, socket_, ssl_enabled, connection_id_), public_key(public_key_), private_key(private_key_) +{ +} void MySQLHandlerSSL::authPluginSSL() { @@ -478,7 +509,10 @@ void MySQLHandlerSSL::authPluginSSL() } void MySQLHandlerSSL::finishHandshakeSSL( - size_t packet_size, char *buf, size_t pos, std::function read_bytes, + size_t packet_size, + char * buf, + size_t pos, + std::function read_bytes, MySQLProtocol::ConnectionPhase::HandshakeResponse & packet) { read_bytes(packet_size); /// Reading rest SSLRequest. @@ -508,8 +542,8 @@ static bool isFederatedServerSetupSetCommand(const String & query) "|(^(SET AUTOCOMMIT(.*)))" "|(^(SET sql_mode(.*)))" "|(^(SET @@(.*)))" - "|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))" - , std::regex::icase}; + "|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))", + std::regex::icase}; return 1 == std::regex_match(query, expr); } diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 6b8cc56a46e..a412b647ae2 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -1,12 +1,12 @@ #pragma once -#include -#include -#include #include -#include #include +#include #include +#include +#include +#include #include "IServer.h" #include "config.h" @@ -19,7 +19,7 @@ namespace CurrentMetrics { - extern const Metric MySQLConnection; +extern const Metric MySQLConnection; } namespace DB @@ -32,11 +32,7 @@ class MySQLHandler : public Poco::Net::TCPServerConnection { public: MySQLHandler( - IServer & server_, - TCPServer & tcp_server_, - const Poco::Net::StreamSocket & socket_, - bool ssl_enabled, - uint32_t connection_id_); + IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, uint32_t connection_id_); void run() final; @@ -46,7 +42,7 @@ protected: /// Enables SSL, if client requested. void finishHandshake(MySQLProtocol::ConnectionPhase::HandshakeResponse &); - void comQuery(ReadBuffer & payload); + void comQuery(ReadBuffer & payload, bool use_binary_protocol_result_set); void comFieldList(ReadBuffer & payload); @@ -63,7 +59,12 @@ protected: void comStmtClose(ReadBuffer & payload); virtual void authPluginSSL(); - virtual void finishHandshakeSSL(size_t packet_size, char * buf, size_t pos, std::function read_bytes, MySQLProtocol::ConnectionPhase::HandshakeResponse & packet); + virtual void finishHandshakeSSL( + size_t packet_size, + char * buf, + size_t pos, + std::function read_bytes, + MySQLProtocol::ConnectionPhase::HandshakeResponse & packet); IServer & server; TCPServer & tcp_server; @@ -109,8 +110,11 @@ private: void authPluginSSL() override; void finishHandshakeSSL( - size_t packet_size, char * buf, size_t pos, - std::function read_bytes, MySQLProtocol::ConnectionPhase::HandshakeResponse & packet) override; + size_t packet_size, + char * buf, + size_t pos, + std::function read_bytes, + MySQLProtocol::ConnectionPhase::HandshakeResponse & packet) override; RSA & public_key; RSA & private_key; diff --git a/src/Storages/System/InformationSchema/key_column_usage.sql b/src/Storages/System/InformationSchema/key_column_usage.sql new file mode 100644 index 00000000000..43630b8c8b9 --- /dev/null +++ b/src/Storages/System/InformationSchema/key_column_usage.sql @@ -0,0 +1,38 @@ +ATTACH VIEW key_column_usage + ( + `referenced_table_schema` Nullable(String), + `referenced_table_name` Nullable(String), + `referenced_column_name` Nullable(String), + `table_schema` String, + `table_name` String, + `column_name` Nullable(String), + `ordinal_position` UInt32, + `constraint_name` Nullable(String), + `REFERENCED_TABLE_SCHEMA` Nullable(String), + `REFERENCED_TABLE_NAME` Nullable(String), + `REFERENCED_COLUMN_NAME` Nullable(String), + `TABLE_SCHEMA` String, + `TABLE_NAME` String, + `COLUMN_NAME` Nullable(String), + `ORDINAL_POSITION` UInt32, + `CONSTRAINT_NAME` Nullable(String) + ) AS +SELECT NULL AS `referenced_table_schema`, + NULL AS `referenced_table_name`, + NULL AS `referenced_column_name`, + database AS `table_schema`, + table AS `table_name`, + name AS `column_name`, + position AS `ordinal_position`, + 'PRIMARY' AS `constraint_name`, + + `referenced_table_schema` AS `REFERENCED_TABLE_SCHEMA`, + `referenced_table_name` AS `REFERENCED_TABLE_NAME`, + `referenced_column_name` AS `REFERENCED_COLUMN_NAME`, + `table_schema` AS `TABLE_SCHEMA`, + `table_name` AS `TABLE_NAME`, + `column_name` AS `COLUMN_NAME`, + `ordinal_position` AS `ORDINAL_POSITION`, + `constraint_name` AS `CONSTRAINT_NAME` +FROM system.columns +WHERE is_in_primary_key; \ No newline at end of file diff --git a/src/Storages/System/InformationSchema/referential_constraints.sql b/src/Storages/System/InformationSchema/referential_constraints.sql new file mode 100644 index 00000000000..8216b8fff83 --- /dev/null +++ b/src/Storages/System/InformationSchema/referential_constraints.sql @@ -0,0 +1,25 @@ +ATTACH VIEW referential_constraints + ( + `constraint_name` Nullable(String), + `constraint_schema` String, + `table_name` String, + `update_rule` String, + `delete_rule` String, + `CONSTRAINT_NAME` Nullable(String), + `CONSTRAINT_SCHEMA` String, + `TABLE_NAME` String, + `UPDATE_RULE` String, + `DELETE_RULE` String + ) AS +SELECT NULL AS `constraint_name`, + '' AS `constraint_schema`, + '' AS `table_name`, + '' AS `update_rule`, + '' AS `delete_rule`, + + NULL AS `CONSTRAINT_NAME`, + '' AS `CONSTRAINT_SCHEMA`, + '' AS `TABLE_NAME`, + '' AS `UPDATE_RULE`, + '' AS `DELETE_RULE` +WHERE false; \ No newline at end of file diff --git a/src/Storages/System/InformationSchema/schemata.sql b/src/Storages/System/InformationSchema/schemata.sql index 9686fcbf4fa..ca4ad4f7310 100644 --- a/src/Storages/System/InformationSchema/schemata.sql +++ b/src/Storages/System/InformationSchema/schemata.sql @@ -1,26 +1,33 @@ ATTACH VIEW schemata -( - `catalog_name` String, - `schema_name` String, - `schema_owner` String, - `default_character_set_catalog` Nullable(String), - `default_character_set_schema` Nullable(String), - `default_character_set_name` Nullable(String), - `sql_path` Nullable(String), - `CATALOG_NAME` String ALIAS catalog_name, - `SCHEMA_NAME` String ALIAS schema_name, - `SCHEMA_OWNER` String ALIAS schema_owner, - `DEFAULT_CHARACTER_SET_CATALOG` Nullable(String) ALIAS default_character_set_catalog, - `DEFAULT_CHARACTER_SET_SCHEMA` Nullable(String) ALIAS default_character_set_schema, - `DEFAULT_CHARACTER_SET_NAME` Nullable(String) ALIAS default_character_set_name, - `SQL_PATH` Nullable(String) ALIAS sql_path -) AS -SELECT - name AS catalog_name, - name AS schema_name, - 'default' AS schema_owner, - NULL AS default_character_set_catalog, - NULL AS default_character_set_schema, - NULL AS default_character_set_name, - NULL AS sql_path + ( + `catalog_name` String, + `schema_name` String, + `schema_owner` String, + `default_character_set_catalog` Nullable(String), + `default_character_set_schema` Nullable(String), + `default_character_set_name` Nullable(String), + `sql_path` Nullable(String), + `CATALOG_NAME` String, + `SCHEMA_NAME` String, + `SCHEMA_OWNER` String, + `DEFAULT_CHARACTER_SET_CATALOG` Nullable(String), + `DEFAULT_CHARACTER_SET_SCHEMA` Nullable(String), + `DEFAULT_CHARACTER_SET_NAME` Nullable(String), + `SQL_PATH` Nullable(String) + ) AS +SELECT name AS `catalog_name`, + name AS `schema_name`, + 'default' AS `schema_owner`, + NULL AS `default_character_set_catalog`, + NULL AS `default_character_set_schema`, + NULL AS `default_character_set_name`, + NULL AS `sql_path`, + + catalog_name AS `CATALOG_NAME`, + schema_name AS `SCHEMA_NAME`, + schema_owner AS `SCHEMA_OWNER`, + NULL AS `DEFAULT_CHARACTER_SET_CATALOG`, + NULL AS `DEFAULT_CHARACTER_SET_SCHEMA`, + NULL AS `DEFAULT_CHARACTER_SET_NAME`, + NULL AS `SQL_PATH` FROM system.databases diff --git a/src/Storages/System/InformationSchema/tables.sql b/src/Storages/System/InformationSchema/tables.sql index 8eea3713923..b3bbfa72517 100644 --- a/src/Storages/System/InformationSchema/tables.sql +++ b/src/Storages/System/InformationSchema/tables.sql @@ -1,17 +1,35 @@ ATTACH VIEW tables -( - `table_catalog` String, - `table_schema` String, - `table_name` String, - `table_type` Enum8('BASE TABLE' = 1, 'VIEW' = 2, 'FOREIGN TABLE' = 3, 'LOCAL TEMPORARY' = 4, 'SYSTEM VIEW' = 5), - `TABLE_CATALOG` String ALIAS table_catalog, - `TABLE_SCHEMA` String ALIAS table_schema, - `TABLE_NAME` String ALIAS table_name, - `TABLE_TYPE` Enum8('BASE TABLE' = 1, 'VIEW' = 2, 'FOREIGN TABLE' = 3, 'LOCAL TEMPORARY' = 4, 'SYSTEM VIEW' = 5) ALIAS table_type -) AS -SELECT - database AS table_catalog, - database AS table_schema, - name AS table_name, - multiIf(is_temporary, 4, engine like '%View', 2, engine LIKE 'System%', 5, has_own_data = 0, 3, 1) AS table_type -FROM system.tables + ( + `table_catalog` String, + `table_schema` String, + `table_name` String, + `table_type` String, + `table_comment` String, + `table_collation` String, + `TABLE_CATALOG` String, + `TABLE_SCHEMA` String, + `TABLE_NAME` String, + `TABLE_TYPE` String, + `TABLE_COMMENT` String, + `TABLE_COLLATION` String + ) AS +SELECT database AS `table_catalog`, + database AS `table_schema`, + name AS `table_name`, + comment AS `table_comment`, + multiIf( + is_temporary, 'LOCAL TEMPORARY', + engine LIKE '%View', 'VIEW', + engine LIKE 'System%', 'SYSTEM VIEW', + has_own_data = 0, 'FOREIGN TABLE', + 'BASE TABLE' + ) AS `table_type`, + 'utf8mb4_0900_ai_ci' AS `table_collation`, + + table_catalog AS `TABLE_CATALOG`, + table_schema AS `TABLE_SCHEMA`, + table_name AS `TABLE_NAME`, + table_comment AS `TABLE_COMMENT`, + table_type AS `TABLE_TYPE`, + table_collation AS `TABLE_COLLATION` +FROM system.tables \ No newline at end of file diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 074a648d235..d4775bf0d4a 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -12,7 +12,8 @@ INCBIN(resource_schemata_sql, SOURCE_DIR "/src/Storages/System/InformationSchema INCBIN(resource_tables_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/tables.sql"); INCBIN(resource_views_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/views.sql"); INCBIN(resource_columns_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/columns.sql"); - +INCBIN(resource_key_column_usage_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/key_column_usage.sql"); +INCBIN(resource_referential_constraints_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/referential_constraints.sql"); namespace DB { @@ -66,6 +67,8 @@ void attachInformationSchema(ContextMutablePtr context, IDatabase & information_ createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast(gresource_tables_sqlData), gresource_tables_sqlSize)); createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast(gresource_views_sqlData), gresource_views_sqlSize)); createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast(gresource_columns_sqlData), gresource_columns_sqlSize)); + createInformationSchemaView(context, information_schema_database, "key_column_usage", std::string_view(reinterpret_cast(gresource_key_column_usage_sqlData), gresource_key_column_usage_sqlSize)); + createInformationSchemaView(context, information_schema_database, "referential_constraints", std::string_view(reinterpret_cast(gresource_referential_constraints_sqlData), gresource_referential_constraints_sqlSize)); } } From e62dc054219aa144e8d8682b376ef5622e259e2e Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 25 Aug 2023 21:06:14 +0000 Subject: [PATCH 0305/1687] fix --- .github/workflows/pull_request.yml | 1 + src/Common/MemoryTracker.cpp | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 985bb7eb2a2..bb2f2783b27 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -5092,6 +5092,7 @@ jobs: with: name: ${{ matrix.sanitizer }}-artifacts path: ./out/artifacts + ############################################################################################# ###################################### JEPSEN TESTS ######################################### ############################################################################################# diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 93bd50a0b49..7747302b29e 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -85,7 +85,10 @@ inline std::string_view toDescription(OvercommitResult result) bool shouldTrackAllocation(DB::Float64 probability, void * ptr) { +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion" return intHash64(uintptr_t(ptr)) < std::numeric_limits::max() * probability; +# pragma clang diagnostic pop } } From 15f77f1d4f4fd93b27425fd461199b9c96e860ce Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 25 Aug 2023 21:23:12 +0000 Subject: [PATCH 0306/1687] style fix --- .github/workflows/pull_request.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index bb2f2783b27..2f74327d866 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -5092,10 +5092,9 @@ jobs: with: name: ${{ matrix.sanitizer }}-artifacts path: ./out/artifacts - -############################################################################################# -###################################### JEPSEN TESTS ######################################### -############################################################################################# + ############################################################################################# + ###################################### JEPSEN TESTS ######################################### + ############################################################################################# Jepsen: # This is special test NOT INCLUDED in FinishCheck # When it's skipped, all dependent tasks will be skipped too. From 3da0945316f35c43814301db29d50760a17c5242 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 26 Aug 2023 02:16:05 +0000 Subject: [PATCH 0307/1687] no warning for implicit-const-int-float-conversion - clang 15 --- cmake/warnings.cmake | 1 + src/Common/MemoryTracker.cpp | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 00fa32a6b7f..4d2f7f09a0c 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -27,6 +27,7 @@ if (COMPILER_CLANG) no_warning(sign-conversion) no_warning(implicit-int-conversion) no_warning(implicit-int-float-conversion) + no_warning(implicit-const-int-float-conversion) no_warning(ctad-maybe-unsupported) # clang 9+, linux-only no_warning(disabled-macro-expansion) no_warning(documentation-unknown-command) diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 7747302b29e..93bd50a0b49 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -85,10 +85,7 @@ inline std::string_view toDescription(OvercommitResult result) bool shouldTrackAllocation(DB::Float64 probability, void * ptr) { -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion" return intHash64(uintptr_t(ptr)) < std::numeric_limits::max() * probability; -# pragma clang diagnostic pop } } From fb0956b76aaf2692bde16d89672f957c7bfe2c16 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 26 Aug 2023 02:45:25 +0000 Subject: [PATCH 0308/1687] debug --- cmake/warnings.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 4d2f7f09a0c..527f55e69c5 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -18,6 +18,7 @@ endif () if (COMPILER_CLANG) # Add some warnings that are not available even with -Wall -Wextra -Wpedantic. # We want to get everything out of the compiler for code quality. + message(STATUS "WARNINGS SETUP") add_warning(everything) add_warning(pedantic) no_warning(zero-length-array) From bb6189aaeb3f5120ebf32c2dfb7d254e62eeca0b Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 26 Aug 2023 03:42:16 +0000 Subject: [PATCH 0309/1687] debug --- CMakeLists.txt | 2 ++ cmake/add_warning.cmake | 1 + cmake/warnings.cmake | 1 + 3 files changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7895421954a..f3d344f9cc2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -529,7 +529,9 @@ set(CONFIG_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTER include_directories(${CONFIG_INCLUDE_PATH}) # Add as many warnings as possible for our own code. +message(STATUS "BEFORE WARNINGS") include (cmake/warnings.cmake) +message(STATUS "AFTER WARNINGS") include (cmake/print_flags.cmake) if (ENABLE_RUST) diff --git a/cmake/add_warning.cmake b/cmake/add_warning.cmake index e7797fcc7a6..bf712d2320a 100644 --- a/cmake/add_warning.cmake +++ b/cmake/add_warning.cmake @@ -3,6 +3,7 @@ include (CheckCCompilerFlag) # Try to add -Wflag if compiler supports it macro (add_warning flag) + message(STATUS "ADD WARNING ${flag}") string (REPLACE "-" "_" underscored_flag ${flag}) string (REPLACE "+" "x" underscored_flag ${underscored_flag}) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 527f55e69c5..f2ddf65b2c1 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -7,6 +7,7 @@ # - sometimes warnings from 3rd party libraries may come from macro substitutions in our code # and we have to wrap them with #pragma GCC/clang diagnostic ignored +message(STATUS "INSIDE WARNINGS") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") # Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size. From e27593bfd39bf88c65c62fd9ed9b9eb66d0c769c Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 26 Aug 2023 04:45:43 +0000 Subject: [PATCH 0310/1687] debug --- cmake/warnings.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index f2ddf65b2c1..c595b70a252 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -1,3 +1,5 @@ +message(STATUS "INSIDE WARNINGS 0") + # Our principle is to enable as many warnings as possible and always do it with "warnings as errors" flag. # # But it comes with some cost: From 06aae687a0a7494646a66c665134023d0e39dc2f Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 26 Aug 2023 06:01:48 +0000 Subject: [PATCH 0311/1687] local oss-fuzz build_fuzzers --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 2f74327d866..8b7bced6550 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -5074,7 +5074,7 @@ jobs: steps: - name: Build Fuzzers (${{ matrix.sanitizer }}) id: build - uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + uses: clickhouse/oss-fuzz/infra/cifuzz/actions/build_fuzzers@no-implicit-int-float-conversion with: oss-fuzz-project-name: 'clickhouse' language: c++ From 8ed7fe98464175067eac6644957f198d2a241fd6 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 26 Aug 2023 15:48:48 +0000 Subject: [PATCH 0312/1687] debug --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 8b7bced6550..3eb0e7e41d9 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -5074,7 +5074,7 @@ jobs: steps: - name: Build Fuzzers (${{ matrix.sanitizer }}) id: build - uses: clickhouse/oss-fuzz/infra/cifuzz/actions/build_fuzzers@no-implicit-int-float-conversion + uses: clickhouse/oss-fuzz/infra/cifuzz/actions/build_fuzzers@no-implicit-int-float-conversion_x with: oss-fuzz-project-name: 'clickhouse' language: c++ From 713f971b569e096fd23e0fe187f073db4d473b1e Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 26 Aug 2023 16:06:41 +0000 Subject: [PATCH 0313/1687] debug --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 3eb0e7e41d9..8b7bced6550 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -5074,7 +5074,7 @@ jobs: steps: - name: Build Fuzzers (${{ matrix.sanitizer }}) id: build - uses: clickhouse/oss-fuzz/infra/cifuzz/actions/build_fuzzers@no-implicit-int-float-conversion_x + uses: clickhouse/oss-fuzz/infra/cifuzz/actions/build_fuzzers@no-implicit-int-float-conversion with: oss-fuzz-project-name: 'clickhouse' language: c++ From 49b3f208c0490978c99bb18b4253976979485c57 Mon Sep 17 00:00:00 2001 From: seshWCS Date: Sat, 26 Aug 2023 23:40:55 +0000 Subject: [PATCH 0314/1687] Added result to compressDataForType --- src/Compression/CompressionCodecGCD.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index bf0988b6a33..8aa8545e55c 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -80,6 +80,7 @@ namespace template UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) { + UInt32 result = 0; if (source_size % sizeof(T) != 0) throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot GCD compress, data size {} is not aligned to {}", source_size, sizeof(T)); @@ -100,8 +101,8 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) } unalignedStore(dest, gcd_divider); - auto * dest_start = dest; dest += sizeof(T); + result += sizeof(T); if (typeid(T) == typeid(UInt32) || typeid(T) == typeid(UInt64)) { @@ -114,6 +115,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) unalignedStore(dest, unalignedLoad(cur_source) / divider); cur_source += sizeof(TUInt32Or64); dest += sizeof(TUInt32Or64); + result += sizeof(T); } } else @@ -124,9 +126,10 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) unalignedStore(dest, unalignedLoad(cur_source) / gcd_divider); cur_source += sizeof(T); dest += sizeof(T); + result += sizeof(T); } } - return static_cast(dest - dest_start); + return result; } template From 62fe4e21c145dbddc8141d7fc58a9e4668f569b4 Mon Sep 17 00:00:00 2001 From: seshWCS Date: Sat, 26 Aug 2023 23:57:44 +0000 Subject: [PATCH 0315/1687] UInt32 => size_t + static_cast(size_t) --- src/Compression/CompressionCodecGCD.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 8aa8545e55c..4815e871bab 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -80,7 +80,7 @@ namespace template UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) { - UInt32 result = 0; + size_t result = 0; if (source_size % sizeof(T) != 0) throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot GCD compress, data size {} is not aligned to {}", source_size, sizeof(T)); @@ -129,7 +129,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) result += sizeof(T); } } - return result; + return static_cast(result); } template From 887a4da6b31f1962314754b250bb6cee36a9bab7 Mon Sep 17 00:00:00 2001 From: seshWCS Date: Sun, 27 Aug 2023 01:08:38 +0000 Subject: [PATCH 0316/1687] Bug-fix --- src/Compression/CompressionCodecGCD.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 4815e871bab..58b50420840 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -98,6 +98,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) { gcd_divider = boost::math::gcd(gcd_divider, unalignedLoad(cur_source)); } + cur_source += sizeof(T); } unalignedStore(dest, gcd_divider); From bbaef4e4f4c6be6e054617b7e300cdbaf276454b Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 27 Aug 2023 13:29:16 +0000 Subject: [PATCH 0317/1687] fix --- .../fuzzers/aggregate_function_state_deserialization_fuzzer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp index 2ea01e1d5bc..8e1750edf40 100644 --- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp +++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include From b9425d8c4b0156b85b219a47d98e9e5fe405bb80 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 27 Aug 2023 15:47:29 +0000 Subject: [PATCH 0318/1687] no-newline-eof for Parsers/fuzzers/codegen_fuzzer --- src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt index 7be3a2ef909..042ab710a33 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -41,5 +41,8 @@ clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS}) set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier") +# contrib/libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h:143:44: error: no newline at end of file [-Werror,-Wnewline-eof] +target_compile_options (codegen_select_fuzzer PRIVATE -Wno-newline-eof) + target_include_directories(codegen_select_fuzzer SYSTEM BEFORE PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms) From 51aa89eed8470f05911d3d85edb7cb84477632a7 Mon Sep 17 00:00:00 2001 From: irenjj Date: Mon, 28 Aug 2023 00:10:33 +0800 Subject: [PATCH 0319/1687] Add a setting to automatically escape special characters in Markdown. --- base/base/find_symbols.h | 22 +++ .../operations/settings/settings-formats.md | 11 ++ src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + .../Serializations/ISerialization.cpp | 6 + src/DataTypes/Serializations/ISerialization.h | 2 + .../SerializationAggregateFunction.cpp | 13 ++ .../SerializationAggregateFunction.h | 2 + .../Serializations/SerializationBool.cpp | 6 + .../Serializations/SerializationBool.h | 2 + .../SerializationCustomSimpleText.cpp | 13 ++ .../SerializationCustomSimpleText.h | 2 + .../Serializations/SerializationDate.cpp | 6 + .../Serializations/SerializationDate.h | 1 + .../Serializations/SerializationDate32.cpp | 7 + .../Serializations/SerializationDate32.h | 1 + .../Serializations/SerializationDateTime.cpp | 6 + .../Serializations/SerializationDateTime.h | 1 + .../SerializationDateTime64.cpp | 6 + .../Serializations/SerializationDateTime64.h | 1 + .../Serializations/SerializationEnum.cpp | 14 ++ .../Serializations/SerializationEnum.h | 2 + .../SerializationFixedString.cpp | 13 ++ .../Serializations/SerializationFixedString.h | 2 + .../Serializations/SerializationIPv4andIPv6.h | 4 + .../Serializations/SerializationInterval.cpp | 7 + .../Serializations/SerializationInterval.h | 1 + .../SerializationLowCardinality.cpp | 6 + .../SerializationLowCardinality.h | 1 + .../Serializations/SerializationNullable.cpp | 6 + .../Serializations/SerializationNullable.h | 1 + .../Serializations/SerializationObject.cpp | 16 ++ .../Serializations/SerializationObject.h | 1 + .../Serializations/SerializationSparse.cpp | 6 + .../Serializations/SerializationSparse.h | 2 + .../Serializations/SerializationString.cpp | 12 ++ .../Serializations/SerializationString.h | 2 + .../Serializations/SerializationUUID.cpp | 6 + .../Serializations/SerializationUUID.h | 2 + .../Serializations/SerializationWrapper.cpp | 6 + .../Serializations/SerializationWrapper.h | 2 + .../Serializations/SimpleTextSerialization.h | 6 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 2 + src/IO/WriteHelpers.h | 166 ++++++++++++++++++ .../Formats/Impl/MarkdownRowOutputFormat.cpp | 2 +- .../0_stateless/01231_markdown_format.sql | 4 +- 47 files changed, 400 insertions(+), 3 deletions(-) diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h index fda94edaa88..ad8fbd126d3 100644 --- a/base/base/find_symbols.h +++ b/base/base/find_symbols.h @@ -205,6 +205,16 @@ inline const char * find_first_symbols_sse2(const char * const begin, const char return return_mode == ReturnMode::End ? end : nullptr; } +template +inline const char * find_first_symbols_sse2_markdown(const char * const begin, const char * const end) +{ + const char * pos = begin; + for (; pos < end; ++pos) + if (maybe_negate(is_in(*pos))) + return pos; + + return return_mode == ReturnMode::End ? end : nullptr; +} template inline const char * find_last_symbols_sse2(const char * const begin, const char * const end) @@ -346,6 +356,12 @@ inline const char * find_first_symbols_dispatch(const std::string_view haystack, return find_first_symbols_sse2(haystack.begin(), haystack.end(), symbols.str.data(), symbols.str.size()); } +template +inline const char * find_first_symbols_dispatch_markdown(const char * begin, const char * end) +{ + return find_first_symbols_sse2_markdown(begin, end); +} + } @@ -368,6 +384,12 @@ inline const char * find_first_symbols(std::string_view haystack, const SearchSy return detail::find_first_symbols_dispatch(haystack, symbols); } +template +inline const char * find_first_symbols_markdown(const char * begin, const char * end) +{ + return detail::find_first_symbols_dispatch_markdown(begin, end); +} + template inline const char * find_first_not_symbols(const char * begin, const char * end) { diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index bd87d7485e0..ec572c6e88e 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -634,6 +634,17 @@ Ignore extra columns in rows with more columns than expected and treat missing c Disabled by default. +### output_format_markdown_escape_special_characters {#output_format_markdown_escape_special_characters} + +When enabled, automatically escape special characters in Markdown. + +Possible values: + ++ 0 — Disable. ++ 1 — Enable. + +Default value: 0. + ## TSV format settings {#tsv-format-settings} ### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 136d5aa872d..6bae6b61f79 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -923,6 +923,7 @@ class IColumn; M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ + M(Bool, output_format_markdown_escape_special_characters, false, "Allow to escape special characters in markdown", 0) \ M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \ M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, "When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized", 0) \ M(UInt64, input_format_csv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in CSV format", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index dcb67165add..2b997572606 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,6 +80,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.8", {{"output_format_markdown_escape_special_characters", false, true, "Allow to escape special characters in markdown"}}}, {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index 782b890841a..e70dc6a2380 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -261,6 +261,12 @@ void ISerialization::deserializeTextRaw(IColumn & column, ReadBuffer & istr, con deserializeWholeText(column, buf, settings); } +void ISerialization::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + void ISerialization::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { serializeText(column, row_num, ostr, settings); diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 17e6dfb85bc..ed1ad037ea0 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -366,6 +366,8 @@ public: virtual void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; virtual void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; + virtual void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; + static String getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path); static String getFileNameForStream(const String & name_in_storage, const SubstreamPath & path); static String getSubcolumnNameForStream(const SubstreamPath & path); diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index c482c9623e9..bbc586480e4 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -215,4 +215,17 @@ void SerializationAggregateFunction::deserializeTextCSV(IColumn & column, ReadBu deserializeFromString(function, column, s, version); } +void SerializationAggregateFunction::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + if (settings.output_format_markdown_escape_special_characters) + { + writeMarkdownEscapedString(serializeToString(function, column, row_num, version), ostr); + } + else + { + serializeTextEscaped(column, row_num, ostr, settings); + } +} + } diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.h b/src/DataTypes/Serializations/SerializationAggregateFunction.h index 4212298bbc1..6344054111a 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.h +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.h @@ -41,6 +41,8 @@ public: void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; }; } diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp index 41b5bf806e5..946704d6372 100644 --- a/src/DataTypes/Serializations/SerializationBool.cpp +++ b/src/DataTypes/Serializations/SerializationBool.cpp @@ -332,4 +332,10 @@ void SerializationBool::serializeTextXML(const IColumn & column, size_t row_num, serializeSimple(column, row_num, ostr, settings); } +void SerializationBool::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + } diff --git a/src/DataTypes/Serializations/SerializationBool.h b/src/DataTypes/Serializations/SerializationBool.h index a5aa0ca80a2..4b5d2a8a5fe 100644 --- a/src/DataTypes/Serializations/SerializationBool.h +++ b/src/DataTypes/Serializations/SerializationBool.h @@ -32,6 +32,8 @@ public: void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; }; } diff --git a/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp b/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp index c35e1120ce8..c12343acfd4 100644 --- a/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp +++ b/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp @@ -94,4 +94,17 @@ void SerializationCustomSimpleText::serializeTextXML(const IColumn & column, siz writeXMLStringForTextElement(serializeToString(*this, column, row_num, settings), ostr); } +void SerializationCustomSimpleText::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + if (settings.output_format_markdown_escape_special_characters) + { + writeMarkdownEscapedString(serializeToString(*this, column, row_num, settings), ostr); + } + else + { + writeEscapedString(serializeToString(*this, column, row_num, settings), ostr); + } +} + } diff --git a/src/DataTypes/Serializations/SerializationCustomSimpleText.h b/src/DataTypes/Serializations/SerializationCustomSimpleText.h index 21d6f8af650..0c909350002 100644 --- a/src/DataTypes/Serializations/SerializationCustomSimpleText.h +++ b/src/DataTypes/Serializations/SerializationCustomSimpleText.h @@ -54,6 +54,8 @@ public: /** Text serialization for putting into the XML format. */ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; }; } diff --git a/src/DataTypes/Serializations/SerializationDate.cpp b/src/DataTypes/Serializations/SerializationDate.cpp index 1ed48fdd31d..428de2a8517 100644 --- a/src/DataTypes/Serializations/SerializationDate.cpp +++ b/src/DataTypes/Serializations/SerializationDate.cpp @@ -85,4 +85,10 @@ SerializationDate::SerializationDate(const DateLUTImpl & time_zone_) : time_zone { } +void SerializationDate::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + } diff --git a/src/DataTypes/Serializations/SerializationDate.h b/src/DataTypes/Serializations/SerializationDate.h index f751b06fba6..4b74caa59a9 100644 --- a/src/DataTypes/Serializations/SerializationDate.h +++ b/src/DataTypes/Serializations/SerializationDate.h @@ -21,6 +21,7 @@ public: void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; protected: const DateLUTImpl & time_zone; diff --git a/src/DataTypes/Serializations/SerializationDate32.cpp b/src/DataTypes/Serializations/SerializationDate32.cpp index 851710de839..dfa9326e66b 100644 --- a/src/DataTypes/Serializations/SerializationDate32.cpp +++ b/src/DataTypes/Serializations/SerializationDate32.cpp @@ -82,4 +82,11 @@ void SerializationDate32::deserializeTextCSV(IColumn & column, ReadBuffer & istr SerializationDate32::SerializationDate32(const DateLUTImpl & time_zone_) : time_zone(time_zone_) { } + +void SerializationDate32::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + } diff --git a/src/DataTypes/Serializations/SerializationDate32.h b/src/DataTypes/Serializations/SerializationDate32.h index 49560fb6c7d..ed54a75709f 100644 --- a/src/DataTypes/Serializations/SerializationDate32.h +++ b/src/DataTypes/Serializations/SerializationDate32.h @@ -20,6 +20,7 @@ public: void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; protected: const DateLUTImpl & time_zone; diff --git a/src/DataTypes/Serializations/SerializationDateTime.cpp b/src/DataTypes/Serializations/SerializationDateTime.cpp index 2ba24f5351b..4271a34f30b 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime.cpp @@ -176,4 +176,10 @@ void SerializationDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & is assert_cast(column).getData().push_back(static_cast(x)); } +void SerializationDateTime::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + } diff --git a/src/DataTypes/Serializations/SerializationDateTime.h b/src/DataTypes/Serializations/SerializationDateTime.h index f4a142483e5..0a283f65b65 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.h +++ b/src/DataTypes/Serializations/SerializationDateTime.h @@ -23,6 +23,7 @@ public: void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; }; } diff --git a/src/DataTypes/Serializations/SerializationDateTime64.cpp b/src/DataTypes/Serializations/SerializationDateTime64.cpp index c5964f1bd97..64841247187 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime64.cpp @@ -171,4 +171,10 @@ void SerializationDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer & assert_cast(column).getData().push_back(x); } +void SerializationDateTime64::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + } diff --git a/src/DataTypes/Serializations/SerializationDateTime64.h b/src/DataTypes/Serializations/SerializationDateTime64.h index f817edbf0dd..916224ffc06 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.h +++ b/src/DataTypes/Serializations/SerializationDateTime64.h @@ -24,6 +24,7 @@ public: void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; }; } diff --git a/src/DataTypes/Serializations/SerializationEnum.cpp b/src/DataTypes/Serializations/SerializationEnum.cpp index a1bd63d4327..5de7f3a4b8f 100644 --- a/src/DataTypes/Serializations/SerializationEnum.cpp +++ b/src/DataTypes/Serializations/SerializationEnum.cpp @@ -111,6 +111,20 @@ void SerializationEnum::deserializeTextCSV(IColumn & column, ReadBuffer & } } +template +void SerializationEnum::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + if (settings.output_format_markdown_escape_special_characters) + { + writeMarkdownEscapedString(this->getNameForValue(assert_cast(column).getData()[row_num]).toView(), ostr); + } + else + { + serializeTextEscaped(column, row_num, ostr, settings); + } +} + template class SerializationEnum; template class SerializationEnum; diff --git a/src/DataTypes/Serializations/SerializationEnum.h b/src/DataTypes/Serializations/SerializationEnum.h index bdd769b59c5..49a0e4943e0 100644 --- a/src/DataTypes/Serializations/SerializationEnum.h +++ b/src/DataTypes/Serializations/SerializationEnum.h @@ -29,6 +29,8 @@ public: void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + FieldType readValue(ReadBuffer & istr) const { FieldType x; diff --git a/src/DataTypes/Serializations/SerializationFixedString.cpp b/src/DataTypes/Serializations/SerializationFixedString.cpp index 3b405f6ec08..f2de1c530e3 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.cpp +++ b/src/DataTypes/Serializations/SerializationFixedString.cpp @@ -210,5 +210,18 @@ void SerializationFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto(data, istr, csv); }); } +void SerializationFixedString::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + if (settings.output_format_markdown_escape_special_characters) + { + const char * pos = reinterpret_cast(&assert_cast(column).getChars()[n * row_num]); + writeAnyMarkdownEscapedString<'\''>(pos, pos + n, ostr); + } + else + { + serializeTextEscaped(column, row_num, ostr, settings); + } +} } diff --git a/src/DataTypes/Serializations/SerializationFixedString.h b/src/DataTypes/Serializations/SerializationFixedString.h index 3db31ab02cb..c27b10ad158 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.h +++ b/src/DataTypes/Serializations/SerializationFixedString.h @@ -41,6 +41,8 @@ public: void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + /// Makes sure that the length of a newly inserted string to `chars` is equal to getN(). /// If the length is less than getN() the function will add zero characters up to getN(). /// If the length is greater than getN() the function will throw an exception. diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.h b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h index 61464962f1c..70695bc1032 100644 --- a/src/DataTypes/Serializations/SerializationIPv4andIPv6.h +++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h @@ -124,6 +124,10 @@ public: size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(IPv) * limit); x.resize(initial_size + size / sizeof(IPv)); } + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override + { + serializeTextEscaped(column, row_num, ostr, settings); + } }; using SerializationIPv4 = SerializationIP; diff --git a/src/DataTypes/Serializations/SerializationInterval.cpp b/src/DataTypes/Serializations/SerializationInterval.cpp index 59086d8aef3..be944b1a79b 100644 --- a/src/DataTypes/Serializations/SerializationInterval.cpp +++ b/src/DataTypes/Serializations/SerializationInterval.cpp @@ -206,4 +206,11 @@ void SerializationInterval::serializeTextRaw(const IColumn & column, size_t row, { dispatch(&ISerialization::serializeTextRaw, settings.interval.output_format, column, row, ostr, settings); } + +void SerializationInterval::serializeTextMarkdown( + const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeTextEscaped(column, row, ostr, settings); +} + } diff --git a/src/DataTypes/Serializations/SerializationInterval.h b/src/DataTypes/Serializations/SerializationInterval.h index a4e6c204e4f..9b3ac8c9805 100644 --- a/src/DataTypes/Serializations/SerializationInterval.h +++ b/src/DataTypes/Serializations/SerializationInterval.h @@ -66,6 +66,7 @@ public: void serializeTextJSON(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override; void serializeTextQuoted(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override; void serializeTextRaw(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override; private: template Method> diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index 3e1cbdb00f5..38f3ddcc456 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -778,4 +778,10 @@ void SerializationLowCardinality::deserializeImpl( low_cardinality_column.insertFromFullColumn(*temp_column, 0); } +void SerializationLowCardinality::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + } diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index 5f56bcf8108..7c5fa1002fe 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -66,6 +66,7 @@ public: void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; private: template diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 774b86472be..eaf4c035f84 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -467,6 +467,12 @@ void SerializationNullable::deserializeWholeText(IColumn & column, ReadBuffer & deserializeWholeTextImpl(column, istr, settings, nested); } +void SerializationNullable::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + template ReturnType SerializationNullable::deserializeWholeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested) diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index 3ec01b46de5..34e543b06cc 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -54,6 +54,7 @@ public: void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index df9489213c8..8bfcb6d4e71 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -513,6 +513,22 @@ void SerializationObject::serializeTextCSV(const IColumn & column, size_ writeCSVString(ostr_str.str(), ostr); } +template +void SerializationObject::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + if (settings.output_format_markdown_escape_special_characters) + { + WriteBufferFromOwnString ostr_str; + serializeTextImpl(column, row_num, ostr_str, settings); + writeMarkdownEscapedString(ostr_str.str(), ostr); + } + else + { + serializeTextEscaped(column, row_num, ostr, settings); + } +} + template void SerializationObject::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const { diff --git a/src/DataTypes/Serializations/SerializationObject.h b/src/DataTypes/Serializations/SerializationObject.h index de54f5739f5..39e1c514640 100644 --- a/src/DataTypes/Serializations/SerializationObject.h +++ b/src/DataTypes/Serializations/SerializationObject.h @@ -68,6 +68,7 @@ public: void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationSparse.cpp b/src/DataTypes/Serializations/SerializationSparse.cpp index 4d7514271ad..e1e08ceeaa4 100644 --- a/src/DataTypes/Serializations/SerializationSparse.cpp +++ b/src/DataTypes/Serializations/SerializationSparse.cpp @@ -384,4 +384,10 @@ void SerializationSparse::serializeTextXML(const IColumn & column, size_t row_nu nested->serializeTextXML(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings); } +void SerializationSparse::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + } diff --git a/src/DataTypes/Serializations/SerializationSparse.h b/src/DataTypes/Serializations/SerializationSparse.h index 2d31fba2509..6814ae2a812 100644 --- a/src/DataTypes/Serializations/SerializationSparse.h +++ b/src/DataTypes/Serializations/SerializationSparse.h @@ -84,6 +84,8 @@ public: void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + private: struct SubcolumnCreator : public ISubcolumnCreator { diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 46fd9d5272d..0e4c0407fe2 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -361,5 +361,17 @@ void SerializationString::deserializeTextCSV(IColumn & column, ReadBuffer & istr read(column, [&](ColumnString::Chars & data) { readCSVStringInto(data, istr, settings.csv); }); } +void SerializationString::serializeTextMarkdown( + const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + if (settings.output_format_markdown_escape_special_characters) + { + writeMarkdownEscapedString(assert_cast(column).getDataAt(row_num).toView(), ostr); + } + else + { + serializeTextEscaped(column, row_num, ostr, settings); + } +} } diff --git a/src/DataTypes/Serializations/SerializationString.h b/src/DataTypes/Serializations/SerializationString.h index f27a5116c15..cd4cdf79c11 100644 --- a/src/DataTypes/Serializations/SerializationString.h +++ b/src/DataTypes/Serializations/SerializationString.h @@ -32,6 +32,8 @@ public: void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; }; } diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp index 93658fd05a3..3a6c5b08472 100644 --- a/src/DataTypes/Serializations/SerializationUUID.cpp +++ b/src/DataTypes/Serializations/SerializationUUID.cpp @@ -155,4 +155,10 @@ void SerializationUUID::deserializeBinaryBulk(IColumn & column, ReadBuffer & ist x.resize(initial_size + size / sizeof(UUID)); } +void SerializationUUID::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + } diff --git a/src/DataTypes/Serializations/SerializationUUID.h b/src/DataTypes/Serializations/SerializationUUID.h index da8c15f7279..768ce97ad3d 100644 --- a/src/DataTypes/Serializations/SerializationUUID.h +++ b/src/DataTypes/Serializations/SerializationUUID.h @@ -25,6 +25,8 @@ public: void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; + + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; }; } diff --git a/src/DataTypes/Serializations/SerializationWrapper.cpp b/src/DataTypes/Serializations/SerializationWrapper.cpp index 18e4891ee65..1bf33deff51 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.cpp +++ b/src/DataTypes/Serializations/SerializationWrapper.cpp @@ -146,4 +146,10 @@ void SerializationWrapper::serializeTextXML(const IColumn & column, size_t row_n nested_serialization->serializeTextXML(column, row_num, ostr, settings); } +void SerializationWrapper::serializeTextMarkdown( + const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const +{ + serializeTextEscaped(column, row_num, ostr, settings); +} + } diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index 31900f93148..7d102560380 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -78,6 +78,8 @@ public: void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; }; } diff --git a/src/DataTypes/Serializations/SimpleTextSerialization.h b/src/DataTypes/Serializations/SimpleTextSerialization.h index 0247f30b30a..a92775bda25 100644 --- a/src/DataTypes/Serializations/SimpleTextSerialization.h +++ b/src/DataTypes/Serializations/SimpleTextSerialization.h @@ -56,6 +56,12 @@ protected: deserializeText(column, istr, settings, false); } + void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override + { + serializeTextEscaped(column, row_num, ostr, settings); + } + + /// whole = true means that buffer contains only one value, so we should read until EOF. /// It's needed to check if there is garbage after parsed field. virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const = 0; diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 36ee06c7a06..c5a4cf7a5c6 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -213,6 +213,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.try_infer_integers = settings.input_format_try_infer_integers; format_settings.try_infer_dates = settings.input_format_try_infer_dates; format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes; + format_settings.output_format_markdown_escape_special_characters = settings.output_format_markdown_escape_special_characters; format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference; format_settings.max_binary_string_size = settings.format_binary_max_string_size; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 0c760f91514..c2744f68088 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -46,6 +46,8 @@ struct FormatSettings bool try_infer_dates = false; bool try_infer_datetimes = false; + bool output_format_markdown_escape_special_characters = false; + enum class DateTimeInputFormat { Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 57337e7bb96..eaeaa806b68 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -380,6 +380,162 @@ void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & b } } +template +void writeAnyMarkdownEscapedString(const char * begin, const char * end, WriteBuffer & buf) +{ + const char * pos = begin; + while (true) + { + const char * next_pos = find_first_symbols_markdown< + '\b', + '\f', + '\n', + '\r', + '\t', + '\0', + '\\', + quote_character, + '`', + '*', + '_', + '{', + '}', + '[', + ']', + '<', + '>', + '(', + ')', + '#', + '+', + '-', + '.', + '!', + '|'>(pos, end); + + if (next_pos == end) + { + buf.write(pos, next_pos - pos); + break; + } + else + { + buf.write(pos, next_pos - pos); + pos = next_pos; + switch (*pos) + { + case '\b': + writeChar('\\', buf); + writeChar('b', buf); + break; + case '\f': + writeChar('\\', buf); + writeChar('f', buf); + break; + case '\n': + writeChar('\\', buf); + writeChar('n', buf); + break; + case '\r': + writeChar('\\', buf); + writeChar('r', buf); + break; + case '\t': + writeChar('\\', buf); + writeChar('t', buf); + break; + case '\0': + writeChar('\\', buf); + writeChar('0', buf); + break; + case '\\': + if constexpr (escape_backslash_with_backslash) + writeChar('\\', buf); + writeChar('\\', buf); + break; + case quote_character: { + if constexpr (escape_quote_with_quote) + writeChar(quote_character, buf); + else + writeChar('\\', buf); + writeChar(quote_character, buf); + break; + } + case '`': + writeChar('`', buf); + writeChar('`', buf); + break; + case '*': + writeChar('\\', buf); + writeChar('*', buf); + break; + case '_': + writeChar('\\', buf); + writeChar('_', buf); + break; + case '{': + writeChar('\\', buf); + writeChar('{', buf); + break; + case '}': + writeChar('\\', buf); + writeChar('}', buf); + break; + case '[': + writeChar('\\', buf); + writeChar('[', buf); + break; + case ']': + writeChar('\\', buf); + writeChar(']', buf); + break; + case '<': + writeChar('\\', buf); + writeChar('<', buf); + break; + case '>': + writeChar('\\', buf); + writeChar('>', buf); + break; + case '(': + writeChar('\\', buf); + writeChar('(', buf); + break; + case ')': + writeChar('\\', buf); + writeChar(')', buf); + break; + case '#': + writeChar('\\', buf); + writeChar('#', buf); + break; + case '+': + writeChar('\\', buf); + writeChar('+', buf); + break; + case '-': + writeChar('\\', buf); + writeChar('-', buf); + break; + case '.': + writeChar('\\', buf); + writeChar('.', buf); + break; + case '!': + writeChar('\\', buf); + writeChar('!', buf); + break; + case '|': + writeChar('\\', buf); + writeChar('|', buf); + break; + default: + writeChar(*pos, buf); + } + ++pos; + } + } +} inline void writeJSONString(std::string_view s, WriteBuffer & buf, const FormatSettings & settings) { @@ -444,6 +600,16 @@ inline void writeEscapedString(std::string_view ref, WriteBuffer & buf) writeEscapedString(ref.data(), ref.size(), buf); } +inline void writeMarkdownEscapedString(const char * str, size_t size, WriteBuffer & buf) +{ + writeAnyMarkdownEscapedString<'\''>(str, str + size, buf); +} + +inline void writeMarkdownEscapedString(std::string_view ref, WriteBuffer & buf) +{ + writeMarkdownEscapedString(ref.data(), ref.size(), buf); +} + template void writeAnyQuotedString(const char * begin, const char * end, WriteBuffer & buf) { diff --git a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp index ea414171ed6..00bb5ff6fcf 100644 --- a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp @@ -52,7 +52,7 @@ void MarkdownRowOutputFormat::writeRowEndDelimiter() void MarkdownRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - serialization.serializeTextEscaped(column, row_num, out, format_settings); + serialization.serializeTextMarkdown(column, row_num, out, format_settings); } void registerOutputFormatMarkdown(FormatFactory & factory) diff --git a/tests/queries/0_stateless/01231_markdown_format.sql b/tests/queries/0_stateless/01231_markdown_format.sql index 65c65389e12..f6bdbfa1a2f 100644 --- a/tests/queries/0_stateless/01231_markdown_format.sql +++ b/tests/queries/0_stateless/01231_markdown_format.sql @@ -1,6 +1,6 @@ -DROP TABLE IF EXISTS makrdown; +DROP TABLE IF EXISTS markdown; CREATE TABLE markdown (id UInt32, name String, array Array(Int32), nullable Nullable(String), low_cardinality LowCardinality(String), decimal Decimal32(6)) ENGINE = Memory; INSERT INTO markdown VALUES (1, 'name1', [1,2,3], 'Some long string', 'name1', 1.11), (2, 'name2', [4,5,60000], Null, 'Another long string', 222.222222), (30000, 'One more long string', [7,8,9], 'name3', 'name3', 3.33); SELECT * FROM markdown FORMAT Markdown; -DROP TABLE IF EXISTS markdown +DROP TABLE IF EXISTS markdown; From 264877b2a53594af915fdde028a6ef780e949fc5 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 27 Aug 2023 19:47:11 +0000 Subject: [PATCH 0320/1687] fix fuzzers, add bundle build target fuzzers --- CMakeLists.txt | 4 ++++ src/Compression/fuzzers/delta_decompress_fuzzer.cpp | 2 +- src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp | 2 +- src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp | 4 ++-- src/Compression/fuzzers/lz4_decompress_fuzzer.cpp | 5 +++-- src/Parsers/fuzzers/CMakeLists.txt | 4 ++-- 6 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f3d344f9cc2..02c72fb2246 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -581,6 +581,8 @@ function(get_all_targets var) endfunction() if (FUZZER) + # Bundle fuzzers target + add_custom_target(fuzzers) # Instrument all targets having pattern *_fuzzer with fuzzer and link with libfuzzer get_all_targets(all_targets) foreach(target ${all_targets}) @@ -588,6 +590,8 @@ if (FUZZER) target_link_libraries(${target} PRIVATE ch_contrib::fuzzer) target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link") message(STATUS "${target} instrumented with fuzzer") + # Add to bundle + add_dependencies(fuzzers ${target}) endif() endforeach() endif() diff --git a/src/Compression/fuzzers/delta_decompress_fuzzer.cpp b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp index eaef1d1896c..e9892b5415d 100644 --- a/src/Compression/fuzzers/delta_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp @@ -33,7 +33,7 @@ try DB::Memory<> memory; memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); - codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + codec->doDecompressData(reinterpret_cast(data), static_cast(size), memory.data(), static_cast(output_buffer_size)); return 0; } diff --git a/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp b/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp index c32120bacbf..c312b1cd06b 100644 --- a/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp @@ -33,7 +33,7 @@ try DB::Memory<> memory; memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); - codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + codec->doDecompressData(reinterpret_cast(data), static_cast(size), memory.data(), static_cast(output_buffer_size)); return 0; } diff --git a/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp index eb95c83e1d7..1a100c59c85 100644 --- a/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp @@ -291,10 +291,10 @@ try DB::Memory<> memory; memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer()); - codec_128->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31); + codec_128->doDecompressData(input.data(), static_cast(input.size()), memory.data(), static_cast(input.size()) - 31); memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer()); - codec_256->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31); + codec_256->doDecompressData(input.data(), static_cast(input.size()), memory.data(), static_cast(input.size()) - 31); return 0; } catch (...) diff --git a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp index f03fc716c2c..24c8103053e 100644 --- a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp @@ -1,3 +1,4 @@ +#include "base/types.h" #include #include @@ -23,7 +24,7 @@ try return 0; const auto * p = reinterpret_cast(data); - auto codec = DB::getCompressionCodecLZ4(p->level); + auto codec = DB::getCompressionCodecLZ4(static_cast(p->level)); size_t output_buffer_size = p->decompressed_size % 65536; size -= sizeof(AuxiliaryRandomData); @@ -36,7 +37,7 @@ try DB::Memory<> memory; memory.resize(output_buffer_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); - codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + codec->doDecompressData(reinterpret_cast(data), static_cast(size), memory.data(), static_cast(output_buffer_size)); return 0; } diff --git a/src/Parsers/fuzzers/CMakeLists.txt b/src/Parsers/fuzzers/CMakeLists.txt index 310ed724f07..903319d733c 100644 --- a/src/Parsers/fuzzers/CMakeLists.txt +++ b/src/Parsers/fuzzers/CMakeLists.txt @@ -2,10 +2,10 @@ clickhouse_add_executable(lexer_fuzzer lexer_fuzzer.cpp ${SRCS}) target_link_libraries(lexer_fuzzer PRIVATE clickhouse_parsers) clickhouse_add_executable(select_parser_fuzzer select_parser_fuzzer.cpp ${SRCS}) -target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers) +target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers dbms) clickhouse_add_executable(create_parser_fuzzer create_parser_fuzzer.cpp ${SRCS}) -target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers) +target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers dbms) add_subdirectory(codegen_fuzzer) From db1a291e1e1f98f2a752ba12cb4d854d1db45666 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 27 Aug 2023 23:19:24 +0000 Subject: [PATCH 0321/1687] cleanup --- .github/workflows/pull_request.yml | 4 ++-- CMakeLists.txt | 4 +--- cmake/add_warning.cmake | 1 - cmake/warnings.cmake | 5 ----- src/Compression/fuzzers/lz4_decompress_fuzzer.cpp | 1 - 5 files changed, 3 insertions(+), 12 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 8b7bced6550..93bedbeb9b9 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -5074,13 +5074,13 @@ jobs: steps: - name: Build Fuzzers (${{ matrix.sanitizer }}) id: build - uses: clickhouse/oss-fuzz/infra/cifuzz/actions/build_fuzzers@no-implicit-int-float-conversion + uses: clickhouse/oss-fuzz/infra/cifuzz/actions/build_fuzzers@localized.v1 with: oss-fuzz-project-name: 'clickhouse' language: c++ sanitizer: ${{ matrix.sanitizer }} - name: Run Fuzzers (${{ matrix.sanitizer }}) - uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + uses: clickhouse/oss-fuzz/infra/cifuzz/actions/run_fuzzers@localized.v1 with: oss-fuzz-project-name: 'clickhouse' language: c++ diff --git a/CMakeLists.txt b/CMakeLists.txt index 02c72fb2246..b343ee2f94a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,7 +119,7 @@ endif() add_library(global-libs INTERFACE) # We don't want to instrument everything with fuzzer, but only specific targets (see below), -# also, since we build our own llvm, we specifically don't wan't to instrument +# also, since we build our own llvm, we specifically don't want to instrument # libFuzzer library itself - it would result in infinite recursion #include (cmake/fuzzer.cmake) @@ -529,9 +529,7 @@ set(CONFIG_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTER include_directories(${CONFIG_INCLUDE_PATH}) # Add as many warnings as possible for our own code. -message(STATUS "BEFORE WARNINGS") include (cmake/warnings.cmake) -message(STATUS "AFTER WARNINGS") include (cmake/print_flags.cmake) if (ENABLE_RUST) diff --git a/cmake/add_warning.cmake b/cmake/add_warning.cmake index bf712d2320a..e7797fcc7a6 100644 --- a/cmake/add_warning.cmake +++ b/cmake/add_warning.cmake @@ -3,7 +3,6 @@ include (CheckCCompilerFlag) # Try to add -Wflag if compiler supports it macro (add_warning flag) - message(STATUS "ADD WARNING ${flag}") string (REPLACE "-" "_" underscored_flag ${flag}) string (REPLACE "+" "x" underscored_flag ${underscored_flag}) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index c595b70a252..00fa32a6b7f 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -1,5 +1,3 @@ -message(STATUS "INSIDE WARNINGS 0") - # Our principle is to enable as many warnings as possible and always do it with "warnings as errors" flag. # # But it comes with some cost: @@ -9,7 +7,6 @@ message(STATUS "INSIDE WARNINGS 0") # - sometimes warnings from 3rd party libraries may come from macro substitutions in our code # and we have to wrap them with #pragma GCC/clang diagnostic ignored -message(STATUS "INSIDE WARNINGS") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") # Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size. @@ -21,7 +18,6 @@ endif () if (COMPILER_CLANG) # Add some warnings that are not available even with -Wall -Wextra -Wpedantic. # We want to get everything out of the compiler for code quality. - message(STATUS "WARNINGS SETUP") add_warning(everything) add_warning(pedantic) no_warning(zero-length-array) @@ -31,7 +27,6 @@ if (COMPILER_CLANG) no_warning(sign-conversion) no_warning(implicit-int-conversion) no_warning(implicit-int-float-conversion) - no_warning(implicit-const-int-float-conversion) no_warning(ctad-maybe-unsupported) # clang 9+, linux-only no_warning(disabled-macro-expansion) no_warning(documentation-unknown-command) diff --git a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp index 24c8103053e..9ac8818d114 100644 --- a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp @@ -1,4 +1,3 @@ -#include "base/types.h" #include #include From 017eab4a079f659510ffe880c104e2357309f8ed Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 28 Aug 2023 02:05:43 +0000 Subject: [PATCH 0322/1687] skip adding execute_query_fuzzer to the fuzzers bundle - too large - linker fails --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b343ee2f94a..3185d90e8f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -588,6 +588,10 @@ if (FUZZER) target_link_libraries(${target} PRIVATE ch_contrib::fuzzer) target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link") message(STATUS "${target} instrumented with fuzzer") + # Skip this fuzzer because of linker errors (the size of the binary is too big) + if (target = "execute_query_fuzzer") + continue() + endif() # Add to bundle add_dependencies(fuzzers ${target}) endif() From 8ba08ed9e7468f1b4cfe7d77822e50819bba7e42 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 28 Aug 2023 02:08:23 +0000 Subject: [PATCH 0323/1687] fix --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3185d90e8f7..9cad854dacd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -589,7 +589,7 @@ if (FUZZER) target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link") message(STATUS "${target} instrumented with fuzzer") # Skip this fuzzer because of linker errors (the size of the binary is too big) - if (target = "execute_query_fuzzer") + if (target EQUAL "execute_query_fuzzer") continue() endif() # Add to bundle From a8078b88cec43f84fbb3c5575ce3e9fe06f7aa95 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 28 Aug 2023 14:22:09 +0800 Subject: [PATCH 0324/1687] Add stateless tests --- .../test_rewrite_uniq_to_count/__init__.py | 0 .../test_rewrite_uniq_to_count/test.py | 120 -------- ...8_distinct_to_count_optimization.reference | 287 ++++++++++++++++++ .../02868_distinct_to_count_optimization.sql | 71 +++++ 4 files changed, 358 insertions(+), 120 deletions(-) delete mode 100644 tests/integration/test_rewrite_uniq_to_count/__init__.py delete mode 100644 tests/integration/test_rewrite_uniq_to_count/test.py create mode 100644 tests/queries/0_stateless/02868_distinct_to_count_optimization.reference create mode 100644 tests/queries/0_stateless/02868_distinct_to_count_optimization.sql diff --git a/tests/integration/test_rewrite_uniq_to_count/__init__.py b/tests/integration/test_rewrite_uniq_to_count/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_rewrite_uniq_to_count/test.py b/tests/integration/test_rewrite_uniq_to_count/test.py deleted file mode 100644 index 82a979c6e63..00000000000 --- a/tests/integration/test_rewrite_uniq_to_count/test.py +++ /dev/null @@ -1,120 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node") - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - prepare() - yield cluster - finally: - shutdown() - cluster.shutdown() - - -def prepare(): - node.query( - """ - CREATE TABLE IF NOT EXISTS test_rewrite_uniq_to_count - ( - `a` UInt8, - `b` UInt8, - `c` UInt8 - ) - ENGINE = MergeTree - ORDER BY `a` - """ - ) - node.query( - "INSERT INTO test_rewrite_uniq_to_count values ('1', '1', '1'), ('1', '1', '1')" - ) - node.query( - "INSERT INTO test_rewrite_uniq_to_count values ('2', '2', '2'), ('2', '2', '2')" - ) - node.query( - "INSERT INTO test_rewrite_uniq_to_count values ('3', '3', '3'), ('3', '3', '3')" - ) - - -def shutdown(): - node.query("DROP TABLE IF EXISTS test_rewrite_uniq_to_count SYNC") - - -def check(query, result): - # old analyzer - query = query + " settings optimize_uniq_to_count = 1" - assert node.query(query) == f"{result}\n" - assert "count()" in node.query("EXPLAIN SYNTAX " + query) - - # new analyzer - query = query + ", allow_experimental_analyzer = 1" - assert node.query(query) == f"{result}\n" - assert "function_name: count" in node.query("EXPLAIN QUERY TREE " + query) - - -def test_rewrite_distinct(started_cluster): - # simple test - check( - "SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count)", - 3, - ) - - # test subquery alias - check( - "SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - # test compound column name - check( - "SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - -def test_rewrite_group_by(started_cluster): - # simple test - check( - "SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a)", - 3, - ) - - # test subquery alias - check( - "SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3, - ) diff --git a/tests/queries/0_stateless/02868_distinct_to_count_optimization.reference b/tests/queries/0_stateless/02868_distinct_to_count_optimization.reference new file mode 100644 index 00000000000..2de931ce0fd --- /dev/null +++ b/tests/queries/0_stateless/02868_distinct_to_count_optimization.reference @@ -0,0 +1,287 @@ +-- { echoOn } + +-- test simple distinct +SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) settings allow_experimental_analyzer=0; +3 +EXPLAIN SYNTAX SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) settings allow_experimental_analyzer=0; +SELECT count() +FROM +( + SELECT DISTINCT a + FROM test_rewrite_uniq_to_count +) +SETTINGS allow_experimental_analyzer = 0 +SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) settings allow_experimental_analyzer=1; +3 +EXPLAIN QUERY TREE SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) settings allow_experimental_analyzer=1; +QUERY id: 0 + PROJECTION COLUMNS + uniq(a) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + QUERY id: 3, is_subquery: 1, is_distinct: 1 + PROJECTION COLUMNS + a UInt8 + PROJECTION + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + JOIN TREE + TABLE id: 6, table_name: default.test_rewrite_uniq_to_count + SETTINGS allow_experimental_analyzer=1 +-- test distinct with subquery alias +SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +3 +EXPLAIN SYNTAX SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +SELECT count() +FROM +( + SELECT DISTINCT a + FROM test_rewrite_uniq_to_count +) AS t +SETTINGS allow_experimental_analyzer = 0 +SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; +3 +EXPLAIN QUERY TREE SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; +QUERY id: 0 + PROJECTION COLUMNS + uniq(a) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + QUERY id: 3, alias: t, is_subquery: 1, is_distinct: 1 + PROJECTION COLUMNS + a UInt8 + PROJECTION + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + JOIN TREE + TABLE id: 6, table_name: default.test_rewrite_uniq_to_count + SETTINGS allow_experimental_analyzer=1 +-- test distinct with compound column name +SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +3 +EXPLAIN SYNTAX SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +SELECT count() +FROM +( + SELECT DISTINCT a + FROM test_rewrite_uniq_to_count +) AS t +SETTINGS allow_experimental_analyzer = 0 +SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; +3 +EXPLAIN QUERY TREE SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; +QUERY id: 0 + PROJECTION COLUMNS + uniq(a) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + QUERY id: 3, alias: t, is_subquery: 1, is_distinct: 1 + PROJECTION COLUMNS + a UInt8 + PROJECTION + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + JOIN TREE + TABLE id: 6, table_name: default.test_rewrite_uniq_to_count + SETTINGS allow_experimental_analyzer=1 +-- test distinct with select expression alias +SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +3 +EXPLAIN SYNTAX SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +SELECT count() +FROM +( + SELECT DISTINCT a AS alias_of_a + FROM test_rewrite_uniq_to_count +) AS t +SETTINGS allow_experimental_analyzer = 0 +SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; +3 +EXPLAIN QUERY TREE SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; +QUERY id: 0 + PROJECTION COLUMNS + uniq(alias_of_a) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + QUERY id: 3, alias: t, is_subquery: 1, is_distinct: 1 + PROJECTION COLUMNS + alias_of_a UInt8 + PROJECTION + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + JOIN TREE + TABLE id: 6, table_name: default.test_rewrite_uniq_to_count + SETTINGS allow_experimental_analyzer=1 +-- test simple group by +SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) settings allow_experimental_analyzer=0; +3 +EXPLAIN SYNTAX SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) settings allow_experimental_analyzer=0; +SELECT count() +FROM +( + SELECT + a, + sum(b) + FROM test_rewrite_uniq_to_count + GROUP BY a +) +SETTINGS allow_experimental_analyzer = 0 +SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) settings allow_experimental_analyzer=1; +3 +EXPLAIN QUERY TREE SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) settings allow_experimental_analyzer=1; +QUERY id: 0 + PROJECTION COLUMNS + uniq(a) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + QUERY id: 3, is_subquery: 1 + PROJECTION COLUMNS + a UInt8 + sum(b) UInt64 + PROJECTION + LIST id: 4, nodes: 2 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + FUNCTION id: 7, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: b, result_type: UInt8, source_id: 6 + JOIN TREE + TABLE id: 6, table_name: default.test_rewrite_uniq_to_count + GROUP BY + LIST id: 10, nodes: 1 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + SETTINGS allow_experimental_analyzer=1 +-- test group by with subquery alias +SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=0; +3 +EXPLAIN SYNTAX SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=0; +SELECT count() +FROM +( + SELECT + a, + sum(b) + FROM test_rewrite_uniq_to_count + GROUP BY a +) AS t +SETTINGS allow_experimental_analyzer = 0 +SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=1; +3 +EXPLAIN QUERY TREE SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=1; +QUERY id: 0 + PROJECTION COLUMNS + uniq(a) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + QUERY id: 3, alias: t, is_subquery: 1 + PROJECTION COLUMNS + a UInt8 + sum(b) UInt64 + PROJECTION + LIST id: 4, nodes: 2 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + FUNCTION id: 7, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: b, result_type: UInt8, source_id: 6 + JOIN TREE + TABLE id: 6, table_name: default.test_rewrite_uniq_to_count + GROUP BY + LIST id: 10, nodes: 1 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + SETTINGS allow_experimental_analyzer=1 +-- test group by with compound column name +SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=0; +3 +EXPLAIN SYNTAX SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=0; +SELECT count() +FROM +( + SELECT + a AS alias_of_a, + sum(b) + FROM test_rewrite_uniq_to_count + GROUP BY a +) AS t +SETTINGS allow_experimental_analyzer = 0 +SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=1; +3 +EXPLAIN QUERY TREE SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=1; +QUERY id: 0 + PROJECTION COLUMNS + uniq(alias_of_a) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + QUERY id: 3, alias: t, is_subquery: 1 + PROJECTION COLUMNS + alias_of_a UInt8 + sum(b) UInt64 + PROJECTION + LIST id: 4, nodes: 2 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + FUNCTION id: 7, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: b, result_type: UInt8, source_id: 6 + JOIN TREE + TABLE id: 6, table_name: default.test_rewrite_uniq_to_count + GROUP BY + LIST id: 10, nodes: 1 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + SETTINGS allow_experimental_analyzer=1 +-- test group by with select expression alias +SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t settings allow_experimental_analyzer=0; +3 +EXPLAIN SYNTAX SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t settings allow_experimental_analyzer=0; +SELECT count() +FROM +( + SELECT + a AS alias_of_a, + sum(b) + FROM test_rewrite_uniq_to_count + GROUP BY alias_of_a +) AS t +SETTINGS allow_experimental_analyzer = 0 +SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t settings allow_experimental_analyzer=1; +3 +EXPLAIN QUERY TREE SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t settings allow_experimental_analyzer=1; +QUERY id: 0 + PROJECTION COLUMNS + uniq(alias_of_a) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + QUERY id: 3, alias: t, is_subquery: 1 + PROJECTION COLUMNS + alias_of_a UInt8 + sum(b) UInt64 + PROJECTION + LIST id: 4, nodes: 2 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + FUNCTION id: 7, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: b, result_type: UInt8, source_id: 6 + JOIN TREE + TABLE id: 6, table_name: default.test_rewrite_uniq_to_count + GROUP BY + LIST id: 10, nodes: 1 + COLUMN id: 5, column_name: a, result_type: UInt8, source_id: 6 + SETTINGS allow_experimental_analyzer=1 +drop table if exists test_rewrite_uniq_to_count; diff --git a/tests/queries/0_stateless/02868_distinct_to_count_optimization.sql b/tests/queries/0_stateless/02868_distinct_to_count_optimization.sql new file mode 100644 index 00000000000..c2b2d12fd7c --- /dev/null +++ b/tests/queries/0_stateless/02868_distinct_to_count_optimization.sql @@ -0,0 +1,71 @@ +drop table if exists test_rewrite_uniq_to_count; + +CREATE TABLE test_rewrite_uniq_to_count +( + `a` UInt8, + `b` UInt8, + `c` UInt8 +) ENGINE = MergeTree ORDER BY `a`; + + +INSERT INTO test_rewrite_uniq_to_count values ('1', '1', '1'), ('1', '1', '1'); +INSERT INTO test_rewrite_uniq_to_count values ('2', '2', '2'), ('2', '2', '2'); +INSERT INTO test_rewrite_uniq_to_count values ('3', '3', '3'), ('3', '3', '3'); + +set optimize_uniq_to_count=true; + +-- { echoOn } + +-- test simple distinct +SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) settings allow_experimental_analyzer=0; +EXPLAIN SYNTAX SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) settings allow_experimental_analyzer=0; +SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) settings allow_experimental_analyzer=1; +EXPLAIN QUERY TREE SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) settings allow_experimental_analyzer=1; + + +-- test distinct with subquery alias +SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +EXPLAIN SYNTAX SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; +EXPLAIN QUERY TREE SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; + +-- test distinct with compound column name +SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +EXPLAIN SYNTAX SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; +EXPLAIN QUERY TREE SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; + +-- test distinct with select expression alias +SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +EXPLAIN SYNTAX SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=0; +SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; +EXPLAIN QUERY TREE SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t settings allow_experimental_analyzer=1; + + +-- test simple group by +SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) settings allow_experimental_analyzer=0; +EXPLAIN SYNTAX SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) settings allow_experimental_analyzer=0; +SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) settings allow_experimental_analyzer=1; +EXPLAIN QUERY TREE SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) settings allow_experimental_analyzer=1; + +-- test group by with subquery alias +SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=0; +EXPLAIN SYNTAX SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=0; +SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=1; +EXPLAIN QUERY TREE SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=1; + +-- test group by with compound column name +SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=0; +EXPLAIN SYNTAX SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=0; +SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=1; +EXPLAIN QUERY TREE SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t settings allow_experimental_analyzer=1; + +-- test group by with select expression alias +SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t settings allow_experimental_analyzer=0; +EXPLAIN SYNTAX SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t settings allow_experimental_analyzer=0; +SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t settings allow_experimental_analyzer=1; +EXPLAIN QUERY TREE SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t settings allow_experimental_analyzer=1; + +drop table if exists test_rewrite_uniq_to_count; + +-- { echoOff } From 15310117b05e28ae977d8eff5081332e3345c154 Mon Sep 17 00:00:00 2001 From: Song Liyong Date: Fri, 11 Aug 2023 13:40:40 +0200 Subject: [PATCH 0325/1687] Introduce default parameters to DECIMAL family types In standard SQL, the syntax DECIMAL(M) is equivalent to DECIMAL(M,0). Similarly, the syntax DECIMAL is equivalent to DECIMAL(M,0), where the implementation is permitted to decide the value of M. https://dev.mysql.com/doc/refman/8.0/en/fixed-point-types.html Since MySQL uses 10 as the default value for precision, suggesting to also support this in ClickHouse --- docs/en/sql-reference/data-types/decimal.md | 6 ++-- src/DataTypes/DataTypesDecimal.cpp | 33 ++++++++++++------- ...with_default_precision_and_scale.reference | 4 +++ ...cimal_with_default_precision_and_scale.sql | 19 +++++++++++ 4 files changed, 48 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.reference create mode 100644 tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.sql diff --git a/docs/en/sql-reference/data-types/decimal.md b/docs/en/sql-reference/data-types/decimal.md index bba5ea74ebe..e082eb29fbd 100644 --- a/docs/en/sql-reference/data-types/decimal.md +++ b/docs/en/sql-reference/data-types/decimal.md @@ -4,15 +4,17 @@ sidebar_position: 42 sidebar_label: Decimal --- -# Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S) +# Decimal, Decimal(P), Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S) Signed fixed-point numbers that keep precision during add, subtract and multiply operations. For division least significant digits are discarded (not rounded). ## Parameters -- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). +- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). By default the precision is 10. - S - scale. Valid range: \[ 0 : P \]. Determines how many decimal digits fraction can have. +Decimal(P) is equivalent to Decimal(P, 0). Similarly, the syntax Decimal is equivalent to Decimal(10, 0). + Depending on P parameter value Decimal(P, S) is a synonym for: - P from \[ 1 : 9 \] - for Decimal32(S) - P from \[ 10 : 18 \] - for Decimal64(S) diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index fa044d4ac9c..6529ce09456 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -74,21 +74,30 @@ SerializationPtr DataTypeDecimal::doGetDefaultSerialization() const static DataTypePtr create(const ASTPtr & arguments) { - if (!arguments || arguments->children.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Decimal data type family must have exactly two arguments: precision and scale"); + UInt64 precision = 10; + UInt64 scale = 0; + if (arguments) + { + if (arguments->children.empty() || arguments->children.size() > 2) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Decimal data type family must have precision and optional scale arguments"); - const auto * precision = arguments->children[0]->as(); - const auto * scale = arguments->children[1]->as(); + const auto * precision_arg = arguments->children[0]->as(); + if (!precision_arg || precision_arg->value.getType() != Field::Types::UInt64) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal argument precision is invalid"); + precision = precision_arg->value.get(); - if (!precision || precision->value.getType() != Field::Types::UInt64 || - !scale || !(scale->value.getType() == Field::Types::Int64 || scale->value.getType() == Field::Types::UInt64)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal data type family must have two numbers as its arguments"); + if (arguments->children.size() == 2) + { + const auto * scale_arg = arguments->children[1]->as(); + if (!scale_arg || !isInt64OrUInt64FieldType(scale_arg->value.getType())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal argument scale is invalid"); + scale = scale_arg->value.get(); + } + } - UInt64 precision_value = precision->value.get(); - UInt64 scale_value = scale->value.get(); - - return createDecimal(precision_value, scale_value); + return createDecimal(precision, scale); } template diff --git a/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.reference b/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.reference new file mode 100644 index 00000000000..79e219b89fd --- /dev/null +++ b/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.reference @@ -0,0 +1,4 @@ +Decimal(9, 8) +Decimal(18, 0) +Decimal(10, 0) +Decimal(18, 0) Decimal(10, 0) diff --git a/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.sql b/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.sql new file mode 100644 index 00000000000..4d6048ed7ae --- /dev/null +++ b/tests/queries/0_stateless/00700_decimal_with_default_precision_and_scale.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS decimal; + +CREATE TABLE IF NOT EXISTS decimal +( + d1 DECIMAL(9, 8), + d2 DECIMAL(18), + d3 DECIMAL +) +ENGINE = MergeTree +PARTITION BY toInt32(d1) +ORDER BY (d2, d3); + +INSERT INTO decimal (d1, d2, d3) VALUES (4.2, 4.2, 4.2); + +SELECT type FROM system.columns WHERE table = 'decimal' AND database = currentDatabase(); + +SELECT toTypeName(d2), toTypeName(d3) FROM decimal LIMIT 1; + +DROP TABLE decimal; From d8a06bed76efea2f6c358699ebb03f780d992d08 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 28 Aug 2023 13:06:45 +0200 Subject: [PATCH 0326/1687] refine tests --- tests/queries/0_stateless/00538_datediff.reference | 10 ---------- tests/queries/0_stateless/00538_datediff.sql | 11 ----------- .../0_stateless/00538_datediff_plural_units.reference | 10 ++++++++++ .../0_stateless/00538_datediff_plural_units.sql | 10 ++++++++++ 4 files changed, 20 insertions(+), 21 deletions(-) create mode 100644 tests/queries/0_stateless/00538_datediff_plural_units.reference create mode 100644 tests/queries/0_stateless/00538_datediff_plural_units.sql diff --git a/tests/queries/0_stateless/00538_datediff.reference b/tests/queries/0_stateless/00538_datediff.reference index 55853e53ec0..aac0767f49f 100644 --- a/tests/queries/0_stateless/00538_datediff.reference +++ b/tests/queries/0_stateless/00538_datediff.reference @@ -69,13 +69,3 @@ Additional test 1 1 1 --1 --7 --23 --104 --730 --17520 --1051200 --63072000 --63072000000 --63072000000000 diff --git a/tests/queries/0_stateless/00538_datediff.sql b/tests/queries/0_stateless/00538_datediff.sql index 4c7c3aa14ff..5dc416ad0bd 100644 --- a/tests/queries/0_stateless/00538_datediff.sql +++ b/tests/queries/0_stateless/00538_datediff.sql @@ -74,14 +74,3 @@ SELECT dateDiff('second', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime(' SELECT 'Additional test'; SELECT number = dateDiff('month', now() - INTERVAL number MONTH, now()) FROM system.numbers LIMIT 10; - -SELECT dateDiff('years', toDate('2017-12-31'), toDate('2016-01-01')); -SELECT dateDiff('quarters', toDate('2017-12-31'), toDate('2016-01-01')); -SELECT dateDiff('months', toDateTime('2017-12-31'), toDateTime('2016-01-01')); -SELECT dateDiff('weeks', toDateTime('2017-12-31'), toDateTime('2016-01-01')); -SELECT dateDiff('days', toDateTime('2017-12-31'), toDateTime('2016-01-01')); -SELECT dateDiff('hours', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); -SELECT dateDiff('minutes', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); -SELECT dateDiff('seconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); -SELECT dateDiff('milliseconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); -SELECT dateDiff('microseconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); diff --git a/tests/queries/0_stateless/00538_datediff_plural_units.reference b/tests/queries/0_stateless/00538_datediff_plural_units.reference new file mode 100644 index 00000000000..ebe63974df8 --- /dev/null +++ b/tests/queries/0_stateless/00538_datediff_plural_units.reference @@ -0,0 +1,10 @@ +-1 +-7 +-23 +-104 +-730 +-17520 +-1051200 +-63072000 +-63072000000 +-63072000000000 diff --git a/tests/queries/0_stateless/00538_datediff_plural_units.sql b/tests/queries/0_stateless/00538_datediff_plural_units.sql new file mode 100644 index 00000000000..d1234155a56 --- /dev/null +++ b/tests/queries/0_stateless/00538_datediff_plural_units.sql @@ -0,0 +1,10 @@ +SELECT dateDiff('years', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT dateDiff('quarters', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT dateDiff('months', toDateTime('2017-12-31'), toDateTime('2016-01-01')); +SELECT dateDiff('weeks', toDateTime('2017-12-31'), toDateTime('2016-01-01')); +SELECT dateDiff('days', toDateTime('2017-12-31'), toDateTime('2016-01-01')); +SELECT dateDiff('hours', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('minutes', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('seconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('milliseconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('microseconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); From bb6b939d2bd22ac3c7736c002a60608be3675cc3 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 23 Aug 2023 15:46:56 +0000 Subject: [PATCH 0327/1687] Support 'is not distinct from' in join on section --- src/Functions/isNotDistinctFrom.cpp | 12 + src/Functions/isNotDistinctFrom.h | 52 ++ src/Functions/tuple.cpp | 91 +-- src/Functions/tuple.h | 52 ++ src/Interpreters/CollectJoinOnKeysVisitor.cpp | 15 +- src/Interpreters/CollectJoinOnKeysVisitor.h | 2 +- src/Interpreters/HashJoin.cpp | 8 + src/Interpreters/TableJoin.cpp | 310 ++++++-- src/Interpreters/TableJoin.h | 42 +- tests/analyzer_tech_debt.txt | 1 + ...2861_join_on_nullsafe_compare.reference.j2 | 671 ++++++++++++++++++ .../02861_join_on_nullsafe_compare.sql.j2 | 101 +++ 12 files changed, 1205 insertions(+), 152 deletions(-) create mode 100644 src/Functions/isNotDistinctFrom.cpp create mode 100644 src/Functions/isNotDistinctFrom.h create mode 100644 src/Functions/tuple.h create mode 100644 tests/queries/0_stateless/02861_join_on_nullsafe_compare.reference.j2 create mode 100644 tests/queries/0_stateless/02861_join_on_nullsafe_compare.sql.j2 diff --git a/src/Functions/isNotDistinctFrom.cpp b/src/Functions/isNotDistinctFrom.cpp new file mode 100644 index 00000000000..308b13552f8 --- /dev/null +++ b/src/Functions/isNotDistinctFrom.cpp @@ -0,0 +1,12 @@ +#include + + +namespace DB +{ + +REGISTER_FUNCTION(IsNotDistinctFrom) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/isNotDistinctFrom.h b/src/Functions/isNotDistinctFrom.h new file mode 100644 index 00000000000..56199050207 --- /dev/null +++ b/src/Functions/isNotDistinctFrom.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +/** + * Performs null-safe comparison. + * euqals(NULL, NULL) is NULL, while isNotDistinctFrom(NULL, NULL) is true. + * Currecntly can be used only in JOIN ON section. + * This wrapper is needed to register function to make possible query analysis, syntax completion and so on. + */ +class FunctionIsNotDistinctFrom : public IFunction +{ +public: + static constexpr auto name = "isNotDistinctFrom"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return false; } + + size_t getNumberOfArguments() const override { return 2; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + bool useDefaultImplementationForNulls() const override { return false; } + + bool useDefaultImplementationForNothing() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & ) const override { return std::make_shared();} + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & /* arguments */, const DataTypePtr &, size_t /* rows_count */) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} can be used only in JOIN ON section", getName()); + } +}; + +} diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp index f6c2831365f..85c250cce41 100644 --- a/src/Functions/tuple.cpp +++ b/src/Functions/tuple.cpp @@ -1,88 +1,33 @@ -#include +#include #include -#include -#include -#include - namespace DB { -namespace ErrorCodes + +DataTypePtr FunctionTuple::getReturnTypeImpl(const DataTypes & arguments) const { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + if (arguments.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); + + return std::make_shared(arguments); } -namespace +ColumnPtr FunctionTuple::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const { - -/** tuple(x, y, ...) is a function that allows you to group several columns - * tupleElement(tuple, n) is a function that allows you to retrieve a column from tuple. - */ - -class FunctionTuple : public IFunction -{ -public: - static constexpr auto name = "tuple"; - - static FunctionPtr create(ContextPtr) + size_t tuple_size = arguments.size(); + Columns tuple_columns(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) { - return std::make_shared(); + /** If tuple is mixed of constant and not constant columns, + * convert all to non-constant columns, + * because many places in code expect all non-constant columns in non-constant tuple. + */ + tuple_columns[i] = arguments[i].column->convertToFullColumnIfConst(); } - - String getName() const override - { - return name; - } - - bool isVariadic() const override - { - return true; - } - - size_t getNumberOfArguments() const override - { - return 0; - } - - bool isInjective(const ColumnsWithTypeAndName &) const override - { - return true; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - bool useDefaultImplementationForNulls() const override { return false; } - /// tuple(..., Nothing, ...) -> Tuple(..., Nothing, ...) - bool useDefaultImplementationForNothing() const override { return false; } - bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); - - return std::make_shared(arguments); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - size_t tuple_size = arguments.size(); - Columns tuple_columns(tuple_size); - for (size_t i = 0; i < tuple_size; ++i) - { - /** If tuple is mixed of constant and not constant columns, - * convert all to non-constant columns, - * because many places in code expect all non-constant columns in non-constant tuple. - */ - tuple_columns[i] = arguments[i].column->convertToFullColumnIfConst(); - } - return ColumnTuple::create(tuple_columns); - } -}; - + return ColumnTuple::create(tuple_columns); } + REGISTER_FUNCTION(Tuple) { factory.registerFunction(); diff --git a/src/Functions/tuple.h b/src/Functions/tuple.h new file mode 100644 index 00000000000..2d457e4697e --- /dev/null +++ b/src/Functions/tuple.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +/** tuple(x, y, ...) is a function that allows you to group several columns + * tupleElement(tuple, n) is a function that allows you to retrieve a column from tuple. + */ +class FunctionTuple : public IFunction +{ +public: + static constexpr auto name = "tuple"; + + /// [[maybe_unused]]: false positive warning `unused-member-function` + [[maybe_unused]] static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + bool useDefaultImplementationForNulls() const override { return false; } + + /// tuple(..., Nothing, ...) -> Tuple(..., Nothing, ...) + bool useDefaultImplementationForNothing() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override; +}; + +} diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index fd6fc27faec..5dcd699d795 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -38,15 +38,15 @@ bool isRightIdentifier(JoinIdentifierPos pos) } -void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos) +void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos, bool null_safe_comparison) { ASTPtr left = left_ast->clone(); ASTPtr right = right_ast->clone(); if (isLeftIdentifier(table_pos.first) && isRightIdentifier(table_pos.second)) - analyzed_join.addOnKeys(left, right); + analyzed_join.addOnKeys(left, right, null_safe_comparison); else if (isRightIdentifier(table_pos.first) && isLeftIdentifier(table_pos.second)) - analyzed_join.addOnKeys(right, left); + analyzed_join.addOnKeys(right, left, null_safe_comparison); else throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot detect left and right JOIN keys. JOIN ON section is ambiguous."); } @@ -78,7 +78,7 @@ void CollectJoinOnKeysMatcher::Data::asofToJoinKeys() { if (!asof_left_key || !asof_right_key) throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "No inequality in ASOF JOIN ON section."); - addJoinKeys(asof_left_key, asof_right_key, {JoinIdentifierPos::Left, JoinIdentifierPos::Right}); + addJoinKeys(asof_left_key, asof_right_key, {JoinIdentifierPos::Left, JoinIdentifierPos::Right}, false); } void CollectJoinOnKeysMatcher::visit(const ASTIdentifier & ident, const ASTPtr & ast, CollectJoinOnKeysMatcher::Data & data) @@ -96,14 +96,14 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASOFJoinInequality inequality = getASOFJoinInequality(func.name); - if (func.name == "equals" || inequality != ASOFJoinInequality::None) + if (func.name == "equals" || func.name == "isNotDistinctFrom" || inequality != ASOFJoinInequality::None) { if (func.arguments->children.size() != 2) throw Exception(ErrorCodes::SYNTAX_ERROR, "Function {} takes two arguments, got '{}' instead", func.name, func.formatForErrorMessage()); } - if (func.name == "equals") + if (func.name == "equals" || func.name == "isNotDistinctFrom") { ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); @@ -121,7 +121,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as if ((isLeftIdentifier(table_numbers.first) && isRightIdentifier(table_numbers.second)) || (isRightIdentifier(table_numbers.first) && isLeftIdentifier(table_numbers.second))) { - data.addJoinKeys(left, right, table_numbers); + bool null_safe_comparison = func.name == "isNotDistinctFrom"; + data.addJoinKeys(left, right, table_numbers, null_safe_comparison); return; } } diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h index 4f4e886099e..194ec01bcd5 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -54,7 +54,7 @@ public: ASTPtr asof_left_key{}; ASTPtr asof_right_key{}; - void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos); + void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos, bool null_safe_comparison); void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos, const ASOFJoinInequality & asof_inequality); void asofToJoinKeys(); diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index be08b7cbe1e..585e99e5034 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -11,9 +11,12 @@ #include #include #include +#include + #include #include +#include #include #include @@ -28,6 +31,9 @@ #include #include +#include + + namespace DB { @@ -303,6 +309,8 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s for (auto & maps : data->maps) dataMapInit(maps, reserve_num); + + LOG_DEBUG(log, "({}) Method: {}", fmt::ptr(this), data->type); } HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes) diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index cabd0be1aa3..33f514618a9 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -10,6 +10,9 @@ #include #include +#include +#include +#include #include @@ -40,6 +43,7 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; + extern const int NOT_FOUND_COLUMN_IN_BLOCK; } namespace @@ -135,7 +139,8 @@ void TableJoin::resetCollected() void TableJoin::addUsingKey(const ASTPtr & ast) { - addKey(ast->getColumnName(), renamedRightColumnName(ast->getAliasOrColumnName()), ast); + /// For USING key and right key AST is the same. + addKey(ast->getColumnName(), renamedRightColumnName(ast->getAliasOrColumnName()), ast, ast); } void TableJoin::addDisjunct() @@ -146,9 +151,9 @@ void TableJoin::addDisjunct() throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); } -void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast) +void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast, bool null_safe_comparison) { - addKey(left_table_ast->getColumnName(), right_table_ast->getAliasOrColumnName(), left_table_ast, right_table_ast); + addKey(left_table_ast->getColumnName(), right_table_ast->getAliasOrColumnName(), left_table_ast, right_table_ast, null_safe_comparison); right_key_aliases[right_table_ast->getColumnName()] = right_table_ast->getAliasOrColumnName(); } @@ -425,55 +430,180 @@ static void renameIfNeeded(String & name, const NameToNameMap & renames) name = it->second; } +static void makeColumnNameUnique(const ColumnsWithTypeAndName & source_coulmns, String & name) +{ + for (const auto & source_col : source_coulmns) + { + if (source_col.name != name) + continue; + + /// Duplicate found, slow path + NameSet names; + for (const auto & col : source_coulmns) + names.insert(col.name); + + String base_name = name; + for (size_t i = 0; ; ++i) + { + name = base_name + "_" + toString(i); + if (!names.contains(name)) + return; + } + } +} + +static ActionsDAGPtr createWrapWithTupleActions( + const ColumnsWithTypeAndName & source_coulmns, + std::unordered_set && column_names_to_wrap, + NameToNameMap & new_names) +{ + if (column_names_to_wrap.empty()) + return nullptr; + + auto actions_dag = std::make_shared(source_coulmns); + + FunctionOverloadResolverPtr func_builder = std::make_unique(std::make_shared()); + + for (const auto * input_node : actions_dag->getInputs()) + { + const auto & column_name = input_node->result_name; + auto it = column_names_to_wrap.find(column_name); + if (it == column_names_to_wrap.end()) + continue; + column_names_to_wrap.erase(it); + + String node_name = "__wrapNullsafe(" + column_name + ")"; + makeColumnNameUnique(source_coulmns, node_name); + + const auto & dst_node = actions_dag->addFunction(func_builder, {input_node}, node_name); + new_names[column_name] = dst_node.result_name; + actions_dag->addOrReplaceInOutputs(dst_node); + } + + if (!column_names_to_wrap.empty()) + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Can't find columns {} in input columns [{}]", + fmt::join(column_names_to_wrap, ", "), Block(source_coulmns).dumpNames()); + + return actions_dag; +} + +/// Wrap only those keys that are nullable on both sides +std::pair TableJoin::getKeysForNullSafeComparion(const ColumnsWithTypeAndName & left_sample_columns, const ColumnsWithTypeAndName & right_sample_columns) +{ + std::unordered_map left_idx; + for (size_t i = 0; i < left_sample_columns.size(); ++i) + left_idx[left_sample_columns[i].name] = i; + + std::unordered_map right_idx; + for (size_t i = 0; i < right_sample_columns.size(); ++i) + right_idx[right_sample_columns[i].name] = i; + + NameSet left_keys_to_wrap; + NameSet right_keys_to_wrap; + + for (const auto & clause : clauses) + { + for (size_t i : clause.nullsafe_compare_key_indexes) + { + const auto & left_key = clause.key_names_left[i]; + const auto & right_key = clause.key_names_right[i]; + auto lit = left_idx.find(left_key); + if (lit == left_idx.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't find key {} in left columns [{}]", + left_key, Block(left_sample_columns).dumpNames()); + auto rit = right_idx.find(right_key); + if (rit == right_idx.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't find key {} in right columns [{}]", + right_key, Block(right_sample_columns).dumpNames()); + + if (!left_sample_columns[lit->second].type->isNullable() || !right_sample_columns[rit->second].type->isNullable()) + continue; + + left_keys_to_wrap.insert(left_key); + right_keys_to_wrap.insert(right_key); + } + } + + return {left_keys_to_wrap, right_keys_to_wrap}; +} + +static void mergeDags(ActionsDAGPtr & result_dag, ActionsDAGPtr && new_dag) +{ + if (result_dag) + result_dag->mergeInplace(std::move(*new_dag)); + else + result_dag = std::move(new_dag); +} + std::pair TableJoin::createConvertingActions( const ColumnsWithTypeAndName & left_sample_columns, const ColumnsWithTypeAndName & right_sample_columns) { + ActionsDAGPtr left_dag = nullptr; + ActionsDAGPtr right_dag = nullptr; + /** If the types are not equal, we need to convert them to a common type. + * Example: + * SELECT * FROM t1 JOIN t2 ON t1.a = t2.b + * Assume that t1.a is UInt16 and t2.b is Int8. The supertype for them is Int32. + * The query will be semantically transformed to: + * SELECT * FROM t1 JOIN t2 ON CAST(t1.a AS 'Int32') = CAST(t2.b AS 'Int32') + * As a result, the user will get the original columns `a` and `b` without `CAST`. + * + */ + NameToNameMap left_column_rename; + NameToNameMap right_column_rename; inferJoinKeyCommonType(left_sample_columns, right_sample_columns, !isSpecialStorage(), isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE)); - - NameToNameMap left_key_column_rename; - NameToNameMap right_key_column_rename; - auto left_converting_actions = applyKeyConvertToTable( - left_sample_columns, left_type_map, left_key_column_rename, forceNullableLeft()); - auto right_converting_actions = applyKeyConvertToTable( - right_sample_columns, right_type_map, right_key_column_rename, forceNullableRight()); - + if (!left_type_map.empty() || !right_type_map.empty()) { - auto log_actions = [](const String & side, const ActionsDAGPtr & dag) - { - if (dag) - { - /// Just debug message - std::vector input_cols; - for (const auto & col : dag->getRequiredColumns()) - input_cols.push_back(col.name + ": " + col.type->getName()); - - std::vector output_cols; - for (const auto & col : dag->getResultColumns()) - output_cols.push_back(col.name + ": " + col.type->getName()); - - LOG_DEBUG(&Poco::Logger::get("TableJoin"), "{} JOIN converting actions: [{}] -> [{}]", - side, fmt::join(input_cols, ", "), fmt::join(output_cols, ", ")); - } - else - { - LOG_DEBUG(&Poco::Logger::get("TableJoin"), "{} JOIN converting actions: empty", side); - return; - } - }; - log_actions("Left", left_converting_actions); - log_actions("Right", right_converting_actions); + left_dag = applyKeyConvertToTable(left_sample_columns, left_type_map, JoinTableSide::Left, left_column_rename); + right_dag = applyKeyConvertToTable(right_sample_columns, right_type_map, JoinTableSide::Right, right_column_rename); } - forAllKeys(clauses, [&](auto & left_key, auto & right_key) + /** + * Similarly, when we have a null-safe comparison (a IS NOT DISTICT FROM b), + * we need to wrap keys with a non-nullable type. + * The type `tuple` can be used for this purpose, + * because value tuple(NULL) is not NULL itself (moreover it has type Tuple(Nullable(T) which is not Nullable). + * Thus, join algorithm will match keys with values tuple(NULL). + * Example: + * SELECT * FROM t1 JOIN t2 ON t1.a <=> t2.b + * This will be semantically transformed to: + * SELECT * FROM t1 JOIN t2 ON tuple(t1.a) == tuple(t2.b) + */ + auto [left_keys_nullsafe_comparison, right_keys_nullsafe_comparison] = getKeysForNullSafeComparion( + left_dag ? left_dag->getResultColumns() : left_sample_columns, + right_dag ? right_dag->getResultColumns() : right_sample_columns); + if (!left_keys_nullsafe_comparison.empty() || !right_keys_nullsafe_comparison.empty()) { - renameIfNeeded(left_key, left_key_column_rename); - renameIfNeeded(right_key, right_key_column_rename); - return true; - }); + auto new_left_dag = applyNullsafeWrapper( + left_dag ? left_dag->getResultColumns() : left_sample_columns, + left_keys_nullsafe_comparison, JoinTableSide::Left, left_column_rename); + mergeDags(left_dag, std::move(new_left_dag)); - return {left_converting_actions, right_converting_actions}; + auto new_right_dag = applyNullsafeWrapper( + right_dag ? right_dag->getResultColumns() : right_sample_columns, + right_keys_nullsafe_comparison, JoinTableSide::Right, right_column_rename); + mergeDags(right_dag, std::move(new_right_dag)); + } + + if (forceNullableLeft()) + { + auto new_left_dag = applyJoinUseNullsConversion( + left_dag ? left_dag->getResultColumns() : left_sample_columns, + left_column_rename); + mergeDags(left_dag, std::move(new_left_dag)); + } + + if (forceNullableRight()) + { + auto new_right_dag = applyJoinUseNullsConversion( + right_dag ? right_dag->getResultColumns() : right_sample_columns, + right_column_rename); + mergeDags(right_dag, std::move(new_right_dag)); + } + + return {left_dag, right_dag}; } template @@ -608,30 +738,66 @@ static ActionsDAGPtr changeTypesToNullable( ActionsDAGPtr TableJoin::applyKeyConvertToTable( const ColumnsWithTypeAndName & cols_src, const NameToTypeMap & type_mapping, - NameToNameMap & key_column_rename, - bool make_nullable) const + JoinTableSide table_side, + NameToNameMap & key_column_rename) { + if (type_mapping.empty()) + return nullptr; + /// Create DAG to convert key columns - ActionsDAGPtr dag_stage1 = changeKeyTypes(cols_src, type_mapping, !hasUsing(), key_column_rename); + ActionsDAGPtr convert_dag = changeKeyTypes(cols_src, type_mapping, !hasUsing(), key_column_rename); + applyRename(table_side, key_column_rename); + return convert_dag; +} + +ActionsDAGPtr TableJoin::applyNullsafeWrapper( + const ColumnsWithTypeAndName & cols_src, + const NameSet & columns_for_nullsafe_comparison, + JoinTableSide table_side, + NameToNameMap & key_column_rename) +{ + if (columns_for_nullsafe_comparison.empty()) + return nullptr; + + std::unordered_set column_names_to_wrap; + for (const auto & name : columns_for_nullsafe_comparison) + { + /// Take into account column renaming for type conversion + /// if we changed key `a == b` to `_CAST(a, 'UInt64') = b` we need to wrap `tuple(_CAST(a, 'UInt64')) = tuple(b)` + if (auto it = key_column_rename.find(name); it != key_column_rename.end()) + column_names_to_wrap.insert(it->second); + else + column_names_to_wrap.insert(name); + } + + /// Create DAG to wrap keys with tuple for null-safe comparison + ActionsDAGPtr null_safe_wrap_dag = createWrapWithTupleActions(cols_src, std::move(column_names_to_wrap), key_column_rename); + for (auto & clause : clauses) + { + for (size_t i : clause.nullsafe_compare_key_indexes) + { + if (table_side == JoinTableSide::Left) + renameIfNeeded(clause.key_names_left[i], key_column_rename); + else + renameIfNeeded(clause.key_names_right[i], key_column_rename); + } + } + + return null_safe_wrap_dag; +} + +ActionsDAGPtr TableJoin::applyJoinUseNullsConversion( + const ColumnsWithTypeAndName & cols_src, + const NameToNameMap & key_column_rename) +{ + /// Do not need to make nullable temporary columns that would be used only as join keys, but is not visible to user + NameSet exclude_columns; + for (const auto & it : key_column_rename) + exclude_columns.insert(it.second); /// Create DAG to make columns nullable if needed - if (make_nullable) - { - /// Do not need to make nullable temporary columns that would be used only as join keys, but is not visible to user - NameSet cols_not_nullable; - for (const auto & t : key_column_rename) - cols_not_nullable.insert(t.second); - - ColumnsWithTypeAndName input_cols = dag_stage1 ? dag_stage1->getResultColumns() : cols_src; - ActionsDAGPtr dag_stage2 = changeTypesToNullable(input_cols, cols_not_nullable); - - /// Merge dags if we got two ones - if (dag_stage1) - return ActionsDAG::merge(std::move(*dag_stage1), std::move(*dag_stage2)); - else - return dag_stage2; - } - return dag_stage1; + ActionsDAGPtr add_nullable_dag = changeTypesToNullable(cols_src, exclude_columns); + return add_nullable_dag; } void TableJoin::setStorageJoin(std::shared_ptr storage) @@ -674,12 +840,13 @@ void TableJoin::setRename(const String & from, const String & to) renames[from] = to; } -void TableJoin::addKey(const String & left_name, const String & right_name, const ASTPtr & left_ast, const ASTPtr & right_ast) +void TableJoin::addKey(const String & left_name, const String & right_name, + const ASTPtr & left_ast, const ASTPtr & right_ast, + bool null_safe_comparison) { - clauses.back().key_names_left.emplace_back(left_name); - key_asts_left.emplace_back(left_ast); + clauses.back().addKey(left_name, right_name, null_safe_comparison); - clauses.back().key_names_right.emplace_back(right_name); + key_asts_left.emplace_back(left_ast); key_asts_right.emplace_back(right_ast ? right_ast : left_ast); } @@ -731,6 +898,19 @@ Names TableJoin::getAllNames(JoinTableSide side) const return res; } +void TableJoin::applyRename(JoinTableSide side, const NameToNameMap & name_map) +{ + auto rename_callback = [&name_map](auto & key_name) + { + renameIfNeeded(key_name, name_map); + return true; + }; + if (side == JoinTableSide::Left) + forAllKeys(clauses, rename_callback); + else + forAllKeys(clauses, rename_callback); +} + void TableJoin::assertHasOneOnExpr() const { if (!oneDisjunct()) diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index af3b027b2db..815494f3961 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -51,6 +51,13 @@ public: Names key_names_left; Names key_names_right; /// Duplicating right key names are qualified + /** JOIN ON a1 == a2 AND b1 <=> b2 AND c1 == c2 AND d1 <=> d2 + * key_names_left: [a1, b1, c1, d1] + * key_names_right: [a2, b2, c2, d2] + * nullsafe_compare_key_indexes: {1, 3} + */ + std::unordered_set nullsafe_compare_key_indexes; + ASTPtr on_filter_condition_left; ASTPtr on_filter_condition_right; @@ -59,6 +66,14 @@ public: JoinOnClause() = default; + void addKey(const String & left_name, const String right_name, bool null_safe_comparison) + { + key_names_left.push_back(left_name); + key_names_right.push_back(right_name); + if (null_safe_comparison) + nullsafe_compare_key_indexes.insert(key_names_left.size() - 1); + } + std::pair condColumnNames() const { std::pair res; @@ -177,11 +192,24 @@ private: /// Create converting actions and change key column names if required ActionsDAGPtr applyKeyConvertToTable( - const ColumnsWithTypeAndName & cols_src, const NameToTypeMap & type_mapping, - NameToNameMap & key_column_rename, - bool make_nullable) const; + const ColumnsWithTypeAndName & cols_src, + const NameToTypeMap & type_mapping, + JoinTableSide table_side, + NameToNameMap & key_column_rename); - void addKey(const String & left_name, const String & right_name, const ASTPtr & left_ast, const ASTPtr & right_ast = nullptr); + ActionsDAGPtr applyNullsafeWrapper( + const ColumnsWithTypeAndName & cols_src, + const NameSet & columns_for_nullsafe_comparison, + JoinTableSide table_side, + NameToNameMap & key_column_rename); + + ActionsDAGPtr applyJoinUseNullsConversion( + const ColumnsWithTypeAndName & cols_src, + const NameToNameMap & key_column_rename); + + void applyRename(JoinTableSide side, const NameToNameMap & name_map); + + void addKey(const String & left_name, const String & right_name, const ASTPtr & left_ast, const ASTPtr & right_ast, bool null_safe_comparison = false); void assertHasOneOnExpr() const; @@ -189,9 +217,11 @@ private: template void inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right, bool strict); - void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); + std::pair + getKeysForNullSafeComparion(const ColumnsWithTypeAndName & left_sample_columns, const ColumnsWithTypeAndName & right_sample_columns); + public: TableJoin() = default; @@ -270,7 +300,7 @@ public: void addDisjunct(); - void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast); + void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast, bool null_safe_comparison); /* Conditions for left/right table from JOIN ON section. * diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 15d46403da9..4419190e12c 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -127,3 +127,4 @@ 02790_optimize_skip_unused_shards_join 01940_custom_tld_sharding_key 02815_range_dict_no_direct_join +02861_join_on_nullsafe_compare diff --git a/tests/queries/0_stateless/02861_join_on_nullsafe_compare.reference.j2 b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.reference.j2 new file mode 100644 index 00000000000..d97d6c2b314 --- /dev/null +++ b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.reference.j2 @@ -0,0 +1,671 @@ +{% for join_algorithm in ['default', 'grace_hash', 'full_sorting_merge'] -%} +join_algorithm = {{ join_algorithm }}, join_use_nulls = 0, t1 JOIN t2 +-- +\N 0 2 2 +\N 0 6 6 +\N 0 10 10 +\N 0 14 14 +\N 0 18 18 +1 1 1 1 +\N 2 \N 4 +\N 2 \N 8 +\N 2 \N 12 +\N 2 \N 16 +\N 2 \N 20 +3 3 3 3 +\N 4 \N 4 +\N 4 \N 8 +\N 4 \N 12 +\N 4 \N 16 +\N 4 \N 20 +5 5 5 5 +\N 6 \N 4 +\N 6 \N 8 +\N 6 \N 12 +\N 6 \N 16 +\N 6 \N 20 +7 7 7 7 +\N 8 \N 4 +\N 8 \N 8 +\N 8 \N 12 +\N 8 \N 16 +\N 8 \N 20 +9 9 9 9 +\N 10 \N 4 +\N 10 \N 8 +\N 10 \N 12 +\N 10 \N 16 +\N 10 \N 20 +11 11 11 11 +\N 12 \N 4 +\N 12 \N 8 +\N 12 \N 12 +\N 12 \N 16 +\N 12 \N 20 +13 13 13 13 +\N 14 \N 4 +\N 14 \N 8 +\N 14 \N 12 +\N 14 \N 16 +\N 14 \N 20 +15 15 15 15 +\N 16 \N 4 +\N 16 \N 8 +\N 16 \N 12 +\N 16 \N 16 +\N 16 \N 20 +17 17 17 17 +\N 18 \N 4 +\N 18 \N 8 +\N 18 \N 12 +\N 18 \N 16 +\N 18 \N 20 +19 19 19 19 +\N 20 \N 4 +\N 20 \N 8 +\N 20 \N 12 +\N 20 \N 16 +\N 20 \N 20 +-- +\N \N 0 2 2 2 +\N \N 0 3 \N 3 +\N \N 0 6 \N 6 +\N \N 0 9 \N 9 +\N \N 0 10 10 10 +\N \N 0 14 14 14 +\N \N 0 15 \N 15 +\N \N 0 18 \N 18 +1 1 1 1 1 1 +\N 2 2 \N \N 0 +3 3 3 \N \N 0 +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N \N 12 +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N \N 0 +\N 10 10 \N \N 0 +11 11 11 11 11 11 +\N \N 12 \N \N 12 +13 13 13 13 13 13 +\N 14 14 \N \N 0 +15 15 15 \N \N 0 +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N \N 12 +19 19 19 19 19 19 +\N 20 20 \N 20 20 +-- +\N \N 0 2 2 2 +\N \N 0 3 \N 3 +\N \N 0 \N 4 4 +\N \N 0 6 \N 6 +\N \N 0 \N 8 8 +\N \N 0 9 \N 9 +\N \N 0 10 10 10 +\N \N 0 \N \N 12 +\N \N 0 14 14 14 +\N \N 0 15 \N 15 +\N \N 0 \N 16 16 +\N \N 0 18 \N 18 +\N \N 0 \N 20 20 +1 1 1 1 1 1 +\N 2 2 \N \N 0 +3 3 3 \N \N 0 +\N 4 4 \N \N 0 +5 5 5 5 5 5 +\N \N 6 \N \N 0 +7 7 7 7 7 7 +\N 8 8 \N \N 0 +9 9 9 \N \N 0 +\N 10 10 \N \N 0 +11 11 11 11 11 11 +\N \N 12 \N \N 0 +13 13 13 13 13 13 +\N 14 14 \N \N 0 +15 15 15 \N \N 0 +\N 16 16 \N \N 0 +17 17 17 17 17 17 +\N \N 18 \N \N 0 +19 19 19 19 19 19 +\N 20 20 \N \N 0 +-- +\N \N 0 2 2 2 +\N \N 0 3 \N 3 +\N \N 0 6 \N 6 +\N \N 0 9 \N 9 +\N \N 0 10 10 10 +\N \N 0 \N \N 12 +\N \N 0 14 14 14 +\N \N 0 15 \N 15 +\N \N 0 18 \N 18 +1 1 1 1 1 1 +\N 2 2 \N \N 0 +3 3 3 \N \N 0 +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N \N 0 +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N \N 0 +\N 10 10 \N \N 0 +11 11 11 11 11 11 +\N \N 12 \N \N 0 +13 13 13 13 13 13 +\N 14 14 \N \N 0 +15 15 15 \N \N 0 +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N \N 0 +19 19 19 19 19 19 +\N 20 20 \N 20 20 +{% endfor -%} +join_algorithm = default, join_use_nulls = 1, t1 JOIN t2 +-- +\N \N 2 2 +\N \N 6 6 +\N \N 10 10 +\N \N 14 14 +\N \N 18 18 +1 1 1 1 +\N 2 \N 4 +\N 2 \N 8 +\N 2 \N 12 +\N 2 \N 16 +\N 2 \N 20 +3 3 3 3 +\N 4 \N 4 +\N 4 \N 8 +\N 4 \N 12 +\N 4 \N 16 +\N 4 \N 20 +5 5 5 5 +\N 6 \N 4 +\N 6 \N 8 +\N 6 \N 12 +\N 6 \N 16 +\N 6 \N 20 +7 7 7 7 +\N 8 \N 4 +\N 8 \N 8 +\N 8 \N 12 +\N 8 \N 16 +\N 8 \N 20 +9 9 9 9 +\N 10 \N 4 +\N 10 \N 8 +\N 10 \N 12 +\N 10 \N 16 +\N 10 \N 20 +11 11 11 11 +\N 12 \N 4 +\N 12 \N 8 +\N 12 \N 12 +\N 12 \N 16 +\N 12 \N 20 +13 13 13 13 +\N 14 \N 4 +\N 14 \N 8 +\N 14 \N 12 +\N 14 \N 16 +\N 14 \N 20 +15 15 15 15 +\N 16 \N 4 +\N 16 \N 8 +\N 16 \N 12 +\N 16 \N 16 +\N 16 \N 20 +17 17 17 17 +\N 18 \N 4 +\N 18 \N 8 +\N 18 \N 12 +\N 18 \N 16 +\N 18 \N 20 +19 19 19 19 +\N 20 \N 4 +\N 20 \N 8 +\N 20 \N 12 +\N 20 \N 16 +\N 20 \N 20 +-- +\N \N \N 2 2 2 +\N \N \N 3 \N 3 +\N \N \N 6 \N 6 +\N \N \N 9 \N 9 +\N \N \N 10 10 10 +\N \N \N 14 14 14 +\N \N \N 15 \N 15 +\N \N \N 18 \N 18 +1 1 1 1 1 1 +\N 2 2 \N \N \N +3 3 3 \N \N \N +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N \N 12 +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N \N \N +\N 10 10 \N \N \N +11 11 11 11 11 11 +\N \N 12 \N \N 12 +13 13 13 13 13 13 +\N 14 14 \N \N \N +15 15 15 \N \N \N +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N \N 12 +19 19 19 19 19 19 +\N 20 20 \N 20 20 +-- +\N \N \N 2 2 2 +\N \N \N 3 \N 3 +\N \N \N \N 4 4 +\N \N \N 6 \N 6 +\N \N \N \N 8 8 +\N \N \N 9 \N 9 +\N \N \N 10 10 10 +\N \N \N \N \N 12 +\N \N \N 14 14 14 +\N \N \N 15 \N 15 +\N \N \N \N 16 16 +\N \N \N 18 \N 18 +\N \N \N \N 20 20 +1 1 1 1 1 1 +\N 2 2 \N \N \N +3 3 3 \N \N \N +\N 4 4 \N \N \N +5 5 5 5 5 5 +\N \N 6 \N \N \N +7 7 7 7 7 7 +\N 8 8 \N \N \N +9 9 9 \N \N \N +\N 10 10 \N \N \N +11 11 11 11 11 11 +\N \N 12 \N \N \N +13 13 13 13 13 13 +\N 14 14 \N \N \N +15 15 15 \N \N \N +\N 16 16 \N \N \N +17 17 17 17 17 17 +\N \N 18 \N \N \N +19 19 19 19 19 19 +\N 20 20 \N \N \N +-- +\N \N \N 2 2 2 +\N \N \N 3 \N 3 +\N \N \N 6 \N 6 +\N \N \N 9 \N 9 +\N \N \N 10 10 10 +\N \N \N \N \N 12 +\N \N \N 14 14 14 +\N \N \N 15 \N 15 +\N \N \N 18 \N 18 +1 1 1 1 1 1 +\N 2 2 \N \N \N +3 3 3 \N \N \N +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N \N \N +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N \N \N +\N 10 10 \N \N \N +11 11 11 11 11 11 +\N \N 12 \N \N \N +13 13 13 13 13 13 +\N 14 14 \N \N \N +15 15 15 \N \N \N +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N \N \N +19 19 19 19 19 19 +\N 20 20 \N 20 20 +join_algorithm = default, join_use_nulls = 0, t1 JOIN t3 +-- +\N 0 2 2 +\N 0 6 6 +\N 0 10 10 +\N 0 14 14 +\N 0 18 18 +1 1 1 1 +\N 2 \N 4 +\N 2 \N 8 +\N 2 \N 12 +\N 2 \N 16 +\N 2 \N 20 +3 3 3 3 +\N 4 \N 4 +\N 4 \N 8 +\N 4 \N 12 +\N 4 \N 16 +\N 4 \N 20 +5 5 5 5 +\N 6 \N 4 +\N 6 \N 8 +\N 6 \N 12 +\N 6 \N 16 +\N 6 \N 20 +7 7 7 7 +\N 8 \N 4 +\N 8 \N 8 +\N 8 \N 12 +\N 8 \N 16 +\N 8 \N 20 +9 9 9 9 +\N 10 \N 4 +\N 10 \N 8 +\N 10 \N 12 +\N 10 \N 16 +\N 10 \N 20 +11 11 11 11 +\N 12 \N 4 +\N 12 \N 8 +\N 12 \N 12 +\N 12 \N 16 +\N 12 \N 20 +13 13 13 13 +\N 14 \N 4 +\N 14 \N 8 +\N 14 \N 12 +\N 14 \N 16 +\N 14 \N 20 +15 15 15 15 +\N 16 \N 4 +\N 16 \N 8 +\N 16 \N 12 +\N 16 \N 16 +\N 16 \N 20 +17 17 17 17 +\N 18 \N 4 +\N 18 \N 8 +\N 18 \N 12 +\N 18 \N 16 +\N 18 \N 20 +19 19 19 19 +\N 20 \N 4 +\N 20 \N 8 +\N 20 \N 12 +\N 20 \N 16 +\N 20 \N 20 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 6 0 6 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 \N 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 18 0 18 +1 1 1 1 1 1 +\N 2 2 \N 0 0 +3 3 3 \N 0 0 +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N 0 0 +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N 0 0 +\N 10 10 \N 0 0 +11 11 11 11 11 11 +\N \N 12 \N 0 0 +13 13 13 13 13 13 +\N 14 14 \N 0 0 +15 15 15 \N 0 0 +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N 0 0 +19 19 19 19 19 19 +\N 20 20 \N 20 20 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 \N 4 4 +\N \N 0 6 0 6 +\N \N 0 \N 8 8 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 \N 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 \N 16 16 +\N \N 0 18 0 18 +\N \N 0 \N 20 20 +1 1 1 1 1 1 +\N 2 2 \N 0 0 +3 3 3 \N 0 0 +\N 4 4 \N 0 0 +5 5 5 5 5 5 +\N \N 6 \N 0 0 +7 7 7 7 7 7 +\N 8 8 \N 0 0 +9 9 9 \N 0 0 +\N 10 10 \N 0 0 +11 11 11 11 11 11 +\N \N 12 \N 0 0 +13 13 13 13 13 13 +\N 14 14 \N 0 0 +15 15 15 \N 0 0 +\N 16 16 \N 0 0 +17 17 17 17 17 17 +\N \N 18 \N 0 0 +19 19 19 19 19 19 +\N 20 20 \N 0 0 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 6 0 6 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 \N 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 18 0 18 +1 1 1 1 1 1 +\N 2 2 \N 0 0 +3 3 3 \N 0 0 +\N 4 4 \N 4 4 +5 5 5 5 5 5 +\N \N 6 \N 0 0 +7 7 7 7 7 7 +\N 8 8 \N 8 8 +9 9 9 \N 0 0 +\N 10 10 \N 0 0 +11 11 11 11 11 11 +\N \N 12 \N 0 0 +13 13 13 13 13 13 +\N 14 14 \N 0 0 +15 15 15 \N 0 0 +\N 16 16 \N 16 16 +17 17 17 17 17 17 +\N \N 18 \N 0 0 +19 19 19 19 19 19 +\N 20 20 \N 20 20 +join_algorithm = default, join_use_nulls = 0, t1 JOIN t4 +-- +\N 0 2 2 +\N 0 0 4 +\N 0 6 6 +\N 0 0 8 +\N 0 10 10 +\N 0 0 12 +\N 0 14 14 +\N 0 0 16 +\N 0 18 18 +\N 0 0 20 +1 1 1 1 +\N 2 0 0 +3 3 3 3 +\N 4 0 0 +5 5 5 5 +\N 6 0 0 +7 7 7 7 +\N 8 0 0 +9 9 9 9 +\N 10 0 0 +11 11 11 11 +\N 12 0 0 +13 13 13 13 +\N 14 0 0 +15 15 15 15 +\N 16 0 0 +17 17 17 17 +\N 18 0 0 +19 19 19 19 +\N 20 0 0 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 0 4 4 +\N \N 0 6 0 6 +\N \N 0 0 8 8 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 0 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 0 16 16 +\N \N 0 18 0 18 +\N \N 0 0 20 20 +1 1 1 1 1 1 +\N 2 2 0 0 0 +3 3 3 0 0 0 +\N 4 4 0 0 0 +5 5 5 5 5 5 +\N \N 6 0 0 0 +7 7 7 7 7 7 +\N 8 8 0 0 0 +9 9 9 0 0 0 +\N 10 10 0 0 0 +11 11 11 11 11 11 +\N \N 12 0 0 0 +13 13 13 13 13 13 +\N 14 14 0 0 0 +15 15 15 0 0 0 +\N 16 16 0 0 0 +17 17 17 17 17 17 +\N \N 18 0 0 0 +19 19 19 19 19 19 +\N 20 20 0 0 0 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 0 4 4 +\N \N 0 6 0 6 +\N \N 0 0 8 8 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 0 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 0 16 16 +\N \N 0 18 0 18 +\N \N 0 0 20 20 +1 1 1 1 1 1 +\N 2 2 0 0 0 +3 3 3 0 0 0 +\N 4 4 0 0 0 +5 5 5 5 5 5 +\N \N 6 0 0 0 +7 7 7 7 7 7 +\N 8 8 0 0 0 +9 9 9 0 0 0 +\N 10 10 0 0 0 +11 11 11 11 11 11 +\N \N 12 0 0 0 +13 13 13 13 13 13 +\N 14 14 0 0 0 +15 15 15 0 0 0 +\N 16 16 0 0 0 +17 17 17 17 17 17 +\N \N 18 0 0 0 +19 19 19 19 19 19 +\N 20 20 0 0 0 +-- +\N \N 0 2 2 2 +\N \N 0 3 0 3 +\N \N 0 0 4 4 +\N \N 0 6 0 6 +\N \N 0 0 8 8 +\N \N 0 9 0 9 +\N \N 0 10 10 10 +\N \N 0 0 0 12 +\N \N 0 14 14 14 +\N \N 0 15 0 15 +\N \N 0 0 16 16 +\N \N 0 18 0 18 +\N \N 0 0 20 20 +1 1 1 1 1 1 +\N 2 2 0 0 0 +3 3 3 0 0 0 +\N 4 4 0 0 0 +5 5 5 5 5 5 +\N \N 6 0 0 0 +7 7 7 7 7 7 +\N 8 8 0 0 0 +9 9 9 0 0 0 +\N 10 10 0 0 0 +11 11 11 11 11 11 +\N \N 12 0 0 0 +13 13 13 13 13 13 +\N 14 14 0 0 0 +15 15 15 0 0 0 +\N 16 16 0 0 0 +17 17 17 17 17 17 +\N \N 18 0 0 0 +19 19 19 19 19 19 +\N 20 20 0 0 0 +-- +\N 0 2 2 +\N 0 \N 4 +\N 0 6 6 +\N 0 \N 8 +\N 0 10 10 +\N 0 \N 12 +\N 0 14 14 +\N 0 \N 16 +\N 0 18 18 +\N 0 \N 20 +1 1 1 1 +\N 2 \N 0 +3 3 3 3 +\N 4 \N 0 +5 5 5 5 +\N 6 \N 0 +7 7 7 7 +\N 8 \N 0 +9 9 9 9 +\N 10 \N 0 +11 11 11 11 +\N 12 \N 0 +13 13 13 13 +\N 14 \N 0 +15 15 15 15 +\N 16 \N 0 +17 17 17 17 +\N 18 \N 0 +19 19 19 19 +\N 20 \N 0 +-- +1 42 420 1 1 43 430 1 +\N 42 420 2 \N 43 430 4 +\N 42 420 2 \N 43 430 8 +\N 42 420 2 \N 43 430 12 +\N 42 420 2 \N 43 430 16 +\N 42 420 2 \N 43 430 20 +3 42 420 3 3 43 430 3 +\N 42 420 4 \N 43 430 4 +\N 42 420 4 \N 43 430 8 +\N 42 420 4 \N 43 430 12 +-- +1 42 420 1 1 43 430 1 +\N 42 420 2 \N 43 430 4 +\N 42 420 2 \N 43 430 8 +\N 42 420 2 \N 43 430 12 +\N 42 420 2 \N 43 430 16 +\N 42 420 2 \N 43 430 20 +3 42 420 3 3 43 430 3 +\N 42 420 4 \N 43 430 4 +\N 42 420 4 \N 43 430 8 +\N 42 420 4 \N 43 430 12 +-- diff --git a/tests/queries/0_stateless/02861_join_on_nullsafe_compare.sql.j2 b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.sql.j2 new file mode 100644 index 00000000000..5fa435a7217 --- /dev/null +++ b/tests/queries/0_stateless/02861_join_on_nullsafe_compare.sql.j2 @@ -0,0 +1,101 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (a Nullable(UInt32), b Nullable(Int16), val UInt32) ENGINE = MergeTree ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1; +INSERT INTO t1 SELECT if(number % 2 == 0, NULL, number), if(number % 6 == 0, NULL, number), number, FROM numbers(1, 20); + +CREATE TABLE t2 (a Nullable(UInt32), b Nullable(UInt16), val UInt32) ENGINE = MergeTree ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1; +INSERT INTO t2 SELECT if(number % 4 == 0, NULL, number), if(number % 3 == 0, NULL, number), number, FROM numbers(1, 20); + +CREATE TABLE t3 (a Nullable(UInt32), b UInt16, val UInt32) ENGINE = MergeTree ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1; +INSERT INTO t3 SELECT if(number % 4 == 0, NULL, number), if(number % 3 == 0, NULL, number), number, FROM numbers(1, 20); + +CREATE TABLE t4 (a UInt32, b UInt16, val UInt32) ENGINE = MergeTree ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1; +INSERT INTO t4 SELECT if(number % 4 == 0, NULL, number), if(number % 3 == 0, NULL, number), number, FROM numbers(1, 20); + +{% for join_algorithm, join_use_nulls, t1, t2 in [ + ('default', 0, 't1', 't2'), + ('grace_hash', 0, 't1', 't2'), + ('full_sorting_merge', 0, 't1', 't2'), + ('default', 1, 't1', 't2'), + ('default', 0, 't1', 't3'), + ('default', 0, 't1', 't4'), +] -%} + +SET join_algorithm = '{{ join_algorithm }}'; +SET join_use_nulls = {{ join_use_nulls }}; + +SELECT 'join_algorithm = {{ join_algorithm }}, join_use_nulls = {{ join_use_nulls }}, {{ t1 }} JOIN {{ t2 }}'; + +SELECT '--'; + +SELECT {{ t1 }}.a, {{ t1 }}.val, {{ t2 }}.a, {{ t2 }}.val FROM {{ t1 }} FULL JOIN {{ t2 }} +ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) +ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST +; + +SELECT '--'; + +SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }} +ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) AND isNotDistinctFrom({{ t1 }}.b, {{ t2 }}.b) +ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST +; + +SELECT '--'; + +SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }} +ON {{ t1 }}.a == {{ t2 }}.a AND isNotDistinctFrom({{ t1 }}.b, {{ t2 }}.b) +ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST +; + +SELECT '--'; + +SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }} +ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) AND {{ t1 }}.b == {{ t2 }}.b +ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST +; + +{% endfor -%} + +SELECT '--'; + +SET join_use_nulls = 0; +SET join_algorithm = 'hash'; +SELECT t1.a, t1.val, t2.a, t2.val FROM t1 FULL JOIN t2 +ON isNotDistinctFrom(t1.a, t2.a) AND t1.b < 2 OR t1.a == t2.a +ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST +; + +SELECT '--'; + +SET join_algorithm = 'default'; +SET join_use_nulls = 1; + +-- try to cause column name clash intentionally using internal name + +SELECT * +FROM (SELECT a, 42 as `__wrapNullsafe(a)`, 420 as `tuple(a)`, val FROM t1) t1 +JOIN (SELECT a, 43 as `__wrapNullsafe(t2.a)`, 430 as `tuple(t2.a)`, val FROM t2) t2 +ON isNotDistinctFrom(t1.a, t2.a) +ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST +LIMIT 10; + +SELECT '--'; + +SELECT a, 42 as `__wrapNullsafe(a)`, 420 as `tuple(a)`, val, t2.a, 43 as `__wrapNullsafe(t2.a)`, 430 as `tuple(t2.a)`, t2.val +FROM (SELECT a, val, 111 as `__wrapNullsafe(a)_0` FROM t1) t1 +JOIN (SELECT a, val, 111 as `__wrapNullsafe(t2.a)_0` FROM t2) t2 +ON isNotDistinctFrom(t1.a, t2.a) +ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST +LIMIT 10; + +SELECT '--'; + +-- check illegal queries + +SELECT * FROM t1 JOIN t2 ON isNotDistinctFrom(); -- { serverError SYNTAX_ERROR } +SELECT * FROM t1 JOIN t2 ON isNotDistinctFrom(t1.a); -- { serverError SYNTAX_ERROR } +SELECT * FROM t1 JOIN t2 ON isNotDistinctFrom(t1.a, t2.a, t2.b); -- { serverError SYNTAX_ERROR } + +SELECT isNotDistinctFrom(a) from t1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT isNotDistinctFrom(a, b) from t1; -- { serverError NOT_IMPLEMENTED } From 1cf68baad70ee0dccf2f0139c2d493f9a6526537 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 24 Aug 2023 09:28:02 +0000 Subject: [PATCH 0328/1687] Add NULL values in JOIN keys section to join doc --- .../sql-reference/statements/select/join.md | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 7971b3ba275..38922e964b8 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -163,6 +163,61 @@ Result: │ 4 │ -4 │ 4 │ └───┴────┴─────┘ ``` + +## NULL values in JOIN keys + +The NULL is not equal to any value, including itself. It means that if a JOIN key has a NULL value in one table, it won't match a NULL value in the other table. + +**Example** + +Table `A`: + +``` +┌───id─┬─name────┐ +│ 1 │ Alice │ +│ 2 │ Bob │ +│ ᴺᵁᴸᴸ │ Charlie │ +└──────┴─────────┘ +``` + +Table `B`: + +``` +┌───id─┬─score─┐ +│ 1 │ 90 │ +│ 3 │ 85 │ +│ ᴺᵁᴸᴸ │ 88 │ +└──────┴───────┘ +``` + +```sql +SELECT A.name, B.score FROM A LEFT JOIN B ON A.id = B.id +``` + +``` +┌─name────┬─score─┐ +│ Alice │ 90 │ +│ Bob │ 0 │ +│ Charlie │ 0 │ +└─────────┴───────┘ +``` + +Notice that the row with `Charlie` from table `A` and the row with score 88 from table `B` are not in the result because of the NULL value in the JOIN key. + +In case you want to match NULL values, use the `isNotDistinctFrom` function to compare the JOIN keys. + +```sql +SELECT A.name, B.score FROM A LEFT JOIN B ON isNotDistinctFrom(A.id, B.id) +``` + +``` +┌─name────┬─score─┐ +│ Alice │ 90 │ +│ Bob │ 0 │ +│ Charlie │ 88 │ +└─────────┴───────┘ +``` + ## ASOF JOIN Usage `ASOF JOIN` is useful when you need to join records that have no exact match. From 2447eb027c37a68f409fec1352e1abce0e6cbf54 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Mon, 28 Aug 2023 15:12:44 +0300 Subject: [PATCH 0329/1687] Update StorageHDFS.cpp --- src/Storages/HDFS/StorageHDFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index dafdba15549..09c205930dc 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -261,7 +261,7 @@ namespace HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); HDFSFSPtr fs = createHDFSFS(builder.get()); - auto res = LSWithRegexpMatching("/", fs, path_from_uri, context->getSettingsRef().ignore_eacces_multidirectory_globs) + auto res = LSWithRegexpMatching("/", fs, path_from_uri, context->getSettingsRef().ignore_eacces_multidirectory_globs); return res; } } From bf6c6ec3fe3711527fecd20b7b0eb656f0dc3d11 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 28 Aug 2023 12:42:38 +0000 Subject: [PATCH 0330/1687] Refactor hideElements() --- src/Common/Config/ConfigProcessor.cpp | 20 ++++++++++++-------- src/Common/Config/ConfigProcessor.h | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index b4c48a08cad..60cf2c94db4 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -812,19 +812,22 @@ void ConfigProcessor::decryptEncryptedElements(LoadedConfig & loaded_config) #endif -XMLDocumentPtr ConfigProcessor::hideElements(LoadedConfig & loaded_config) +XMLDocumentPtr ConfigProcessor::hideElements(XMLDocumentPtr xml_tree) { - XMLDocumentPtr new_preprocessed_xml = new Poco::XML::Document; + /// Create a copy of XML Document because hiding elements from preprocessed_xml document + /// also influences on configuration which has a pointer to preprocessed_xml document. - for (Node * node = loaded_config.preprocessed_xml->firstChild(); node; node = node->nextSibling()) + XMLDocumentPtr xml_tree_copy = new Poco::XML::Document; + + for (Node * node = xml_tree->firstChild(); node; node = node->nextSibling()) { - Node * new_node = new_preprocessed_xml->importNode(node, true); - new_preprocessed_xml->appendChild(new_node); + Node * new_node = xml_tree_copy->importNode(node, true); + xml_tree_copy->appendChild(new_node); } - Node * new_config_root = getRootNode(new_preprocessed_xml.get()); + Node * new_config_root = getRootNode(xml_tree_copy.get()); hideRecursive(new_config_root); - return new_preprocessed_xml; + return xml_tree_copy; } void ConfigProcessor::savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir) @@ -875,7 +878,8 @@ void ConfigProcessor::savePreprocessedConfig(LoadedConfig & loaded_config, std:: writer.setNewLine("\n"); writer.setIndent(" "); writer.setOptions(Poco::XML::XMLWriter::PRETTY_PRINT); - writer.writeNode(preprocessed_path, hideElements(loaded_config)); + XMLDocumentPtr preprocessed_xml_without_hidden_elements = hideElements(loaded_config.preprocessed_xml); + writer.writeNode(preprocessed_path, preprocessed_xml_without_hidden_elements); LOG_DEBUG(log, "Saved preprocessed configuration to '{}'.", preprocessed_path); } catch (Poco::Exception & e) diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 6d993d8f58b..98592d8846e 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -143,7 +143,7 @@ private: #endif void hideRecursive(Poco::XML::Node * config_root); - XMLDocumentPtr hideElements(LoadedConfig & loaded_config); + XMLDocumentPtr hideElements(XMLDocumentPtr xml_tree); void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); From 6ee36399663cfc7facb45a19e0b6c9a8ab6a20a0 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 28 Aug 2023 12:51:17 +0000 Subject: [PATCH 0331/1687] Rename hide attribute into hide_in_preprocessed --- src/Common/Config/ConfigProcessor.cpp | 2 +- .../test_config_hidden_attributes/configs/config.xml | 11 ----------- .../__init__.py | 0 .../configs/config.xml | 11 +++++++++++ .../configs/users.xml | 0 .../test.py | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) delete mode 100644 tests/integration/test_config_hidden_attributes/configs/config.xml rename tests/integration/{test_config_hidden_attributes => test_config_hide_in_preprocessed}/__init__.py (100%) create mode 100644 tests/integration/test_config_hide_in_preprocessed/configs/config.xml rename tests/integration/{test_config_hidden_attributes => test_config_hide_in_preprocessed}/configs/users.xml (100%) rename tests/integration/{test_config_hidden_attributes => test_config_hide_in_preprocessed}/test.py (95%) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 60cf2c94db4..8a912e1963c 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -263,7 +263,7 @@ void ConfigProcessor::hideRecursive(Poco::XML::Node * config_root) if (node->nodeType() == Node::ELEMENT_NODE) { Element & element = dynamic_cast(*node); - if (element.hasAttribute("hidden") && Poco::NumberParser::parseBool(element.getAttribute("hidden"))) + if (element.hasAttribute("hide_in_preprocessed") && Poco::NumberParser::parseBool(element.getAttribute("hide_in_preprocessed"))) { config_root->removeChild(node); } else diff --git a/tests/integration/test_config_hidden_attributes/configs/config.xml b/tests/integration/test_config_hidden_attributes/configs/config.xml deleted file mode 100644 index 8f5c6f2ed39..00000000000 --- a/tests/integration/test_config_hidden_attributes/configs/config.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - diff --git a/tests/integration/test_config_hidden_attributes/__init__.py b/tests/integration/test_config_hide_in_preprocessed/__init__.py similarity index 100% rename from tests/integration/test_config_hidden_attributes/__init__.py rename to tests/integration/test_config_hide_in_preprocessed/__init__.py diff --git a/tests/integration/test_config_hide_in_preprocessed/configs/config.xml b/tests/integration/test_config_hide_in_preprocessed/configs/config.xml new file mode 100644 index 00000000000..d7b06a28611 --- /dev/null +++ b/tests/integration/test_config_hide_in_preprocessed/configs/config.xml @@ -0,0 +1,11 @@ + + 60000000000 + 40000000000 + + + value + value_2 + https://connection.url/ + + + diff --git a/tests/integration/test_config_hidden_attributes/configs/users.xml b/tests/integration/test_config_hide_in_preprocessed/configs/users.xml similarity index 100% rename from tests/integration/test_config_hidden_attributes/configs/users.xml rename to tests/integration/test_config_hide_in_preprocessed/configs/users.xml diff --git a/tests/integration/test_config_hidden_attributes/test.py b/tests/integration/test_config_hide_in_preprocessed/test.py similarity index 95% rename from tests/integration/test_config_hidden_attributes/test.py rename to tests/integration/test_config_hide_in_preprocessed/test.py index 7ab2cb0e225..0abe6d4f5c7 100644 --- a/tests/integration/test_config_hidden_attributes/test.py +++ b/tests/integration/test_config_hide_in_preprocessed/test.py @@ -19,7 +19,7 @@ def started_cluster(): cluster.shutdown() -def test_hidden(started_cluster): +def test_hide_in_preprocessed(started_cluster): assert ( node.query( "select value from system.server_settings where name ='max_table_size_to_drop'" From 5f7036158373522eb99a44c77a9147372ff7bb20 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 28 Aug 2023 12:59:43 +0000 Subject: [PATCH 0332/1687] Add test for 1 value of hide_in_preprocessed --- .../test_config_hide_in_preprocessed/configs/config.xml | 1 + tests/integration/test_config_hide_in_preprocessed/test.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/tests/integration/test_config_hide_in_preprocessed/configs/config.xml b/tests/integration/test_config_hide_in_preprocessed/configs/config.xml index d7b06a28611..aac5f572964 100644 --- a/tests/integration/test_config_hide_in_preprocessed/configs/config.xml +++ b/tests/integration/test_config_hide_in_preprocessed/configs/config.xml @@ -1,4 +1,5 @@ + 2000 60000000000 40000000000 diff --git a/tests/integration/test_config_hide_in_preprocessed/test.py b/tests/integration/test_config_hide_in_preprocessed/test.py index 0abe6d4f5c7..f5e6d9e2bf5 100644 --- a/tests/integration/test_config_hide_in_preprocessed/test.py +++ b/tests/integration/test_config_hide_in_preprocessed/test.py @@ -20,6 +20,12 @@ def started_cluster(): def test_hide_in_preprocessed(started_cluster): + assert ( + node.query( + "select value from system.server_settings where name ='max_thread_pool_free_size'" + ) + == "2000\n" + ) assert ( node.query( "select value from system.server_settings where name ='max_table_size_to_drop'" @@ -36,6 +42,7 @@ def test_hide_in_preprocessed(started_cluster): out = node.exec_in_container( ["cat", "/var/lib/clickhouse/preprocessed_configs/config.xml"] ) + assert "max_thread_pool_free_size" not in out assert "max_table_size_to_drop" not in out assert "max_partition_size_to_drop" in out assert "named_collections" not in out From 87cf0e38fd4284f30eeb820cb9d82df22f9b320f Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 28 Aug 2023 13:11:44 +0000 Subject: [PATCH 0333/1687] Update MD docs --- docs/en/operations/configuration-files.md | 8 +++----- docs/ru/operations/configuration-files.md | 6 ++---- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index b091910ecaa..81a35ad1ea9 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -65,7 +65,7 @@ XML substitution example: Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. -## Encrypting Configuration {#encryption} +## Encrypting and Hiding Configuration {#encryption} You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encrypted_by` with the name of the encryption codec as value to the element to encrypt. @@ -102,16 +102,14 @@ Example: 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 ``` -## Hiding Configuration {#hidden} - -You can use `hidden` attribute to hide certain elements from the preprocessed file. +Even with applied encryption in the preprocessed file the elements are still saved in plain text. In case this is a problem, we suggest two alternatives: either set file permissions of the preprocessed file to 600 or use the `hide_in_preprocessed` attribute. Example: ```xml -