diff --git a/.clang-tidy b/.clang-tidy index 7f78143ec3d..85989d311a2 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -23,7 +23,7 @@ Checks: '*, -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, -bugprone-not-null-terminated-result, - -bugprone-reserved-identifier, + -bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged -bugprone-unchecked-optional-access, -cert-dcl16-c, @@ -111,6 +111,7 @@ Checks: '*, -misc-no-recursion, -misc-non-private-member-variables-in-classes, -misc-confusable-identifiers, # useful but slooow + -misc-use-anonymous-namespace, -modernize-avoid-c-arrays, -modernize-concat-nested-namespaces, @@ -136,7 +137,7 @@ Checks: '*, -readability-function-cognitive-complexity, -readability-function-size, -readability-identifier-length, - -readability-identifier-naming, + -readability-identifier-naming, # useful but too slow -readability-implicit-bool-conversion, -readability-isolate-declaration, -readability-magic-numbers, @@ -148,7 +149,7 @@ Checks: '*, -readability-uppercase-literal-suffix, -readability-use-anyofallof, - -zirkon-*, + -zircon-*, ' WarningsAsErrors: '*' @@ -168,11 +169,10 @@ CheckOptions: readability-identifier-naming.ParameterPackCase: lower_case readability-identifier-naming.StructCase: CamelCase readability-identifier-naming.TemplateTemplateParameterCase: CamelCase - readability-identifier-naming.TemplateUsingCase: lower_case + readability-identifier-naming.TemplateParameterCase: lower_case readability-identifier-naming.TypeTemplateParameterCase: CamelCase readability-identifier-naming.TypedefCase: CamelCase readability-identifier-naming.UnionCase: CamelCase - readability-identifier-naming.UsingCase: CamelCase modernize-loop-convert.UseCxx20ReverseRanges: false performance-move-const-arg.CheckTriviallyCopyableMove: false # Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097 diff --git a/.clangd b/.clangd new file mode 100644 index 00000000000..ad471db8d8b --- /dev/null +++ b/.clangd @@ -0,0 +1,16 @@ +Diagnostics: + # clangd does parse .clang-tidy, but some checks are too slow to run in + # clang-tidy build, so let's enable them explicitly for clangd at least. + ClangTidy: + # The following checks had been disabled due to slowliness with C++23, + # for more details see [1]. + # + # [1]: https://github.com/llvm/llvm-project/issues/61418 + # + # But the code base had been written in a style that had been checked + # by this check, so at least, let's enable it for clangd. + Add: [ + # configured in .clang-tidy + readability-identifier-naming, + bugprone-reserved-identifier, + ] diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 1182481c897..f0741b5465f 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -1341,6 +1341,40 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestReleaseAnalyzer: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_analyzer + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release, analyzer) + REPO_COPY=${{runner.temp}}/stateless_analyzer/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestAarch64: needs: [BuilderDebAarch64] runs-on: [self-hosted, func-tester-aarch64] diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 85d865252ad..acf6bbe8f6a 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -72,6 +72,9 @@ jobs: with: name: changed_images path: ${{ runner.temp }}/changed_images.json + Codebrowser: + needs: [DockerHubPush] + uses: ./.github/workflows/woboq.yml BuilderCoverity: needs: DockerHubPush runs-on: [self-hosted, builder] @@ -125,8 +128,8 @@ jobs: SONAR_SCANNER_VERSION: 4.8.0.2856 SONAR_SERVER_URL: "https://sonarcloud.io" BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed - CC: clang-15 - CXX: clang++-15 + CC: clang-16 + CXX: clang++-16 steps: - name: Check out repository code uses: ClickHouse/checkout@v1 diff --git a/.github/workflows/woboq.yml b/.github/workflows/woboq.yml index 363652c9f33..bdfbc8fef9c 100644 --- a/.github/workflows/woboq.yml +++ b/.github/workflows/woboq.yml @@ -6,9 +6,8 @@ env: concurrency: group: woboq on: # yamllint disable-line rule:truthy - schedule: - - cron: '0 */18 * * *' workflow_dispatch: + workflow_call: jobs: # don't use dockerhub push because this image updates so rarely WoboqCodebrowser: @@ -26,6 +25,10 @@ jobs: with: clear-repository: true submodules: 'true' + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.IMAGES_PATH }} - name: Codebrowser run: | sudo rm -fr "$TEMP_PATH" diff --git a/.gitignore b/.gitignore index 14b860244c2..a04c60d5ca3 100644 --- a/.gitignore +++ b/.gitignore @@ -129,7 +129,6 @@ website/package-lock.json /.ccls-cache # clangd cache -/.clangd /.cache /compile_commands.json diff --git a/.gitmodules b/.gitmodules index 56aeea2bfe8..dbca3f3f6bc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -267,7 +267,7 @@ url = https://github.com/ClickHouse/nats.c [submodule "contrib/vectorscan"] path = contrib/vectorscan - url = https://github.com/VectorCamp/vectorscan + url = https://github.com/ClickHouse/vectorscan.git [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/ClickHouse/c-ares @@ -338,6 +338,9 @@ [submodule "contrib/liburing"] path = contrib/liburing url = https://github.com/axboe/liburing +[submodule "contrib/libfiu"] + path = contrib/libfiu + url = https://github.com/ClickHouse/libfiu.git [submodule "contrib/isa-l"] path = contrib/isa-l url = https://github.com/ClickHouse/isa-l.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ff48ceacc2..26188cb7110 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -342,13 +342,6 @@ if (COMPILER_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers") - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16) - # Set new experimental pass manager, it's a performance, build time and binary size win. - # Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang. - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager") - endif () - # We cannot afford to use LTO when compiling unit tests, and it's not enough # to only supply -fno-lto at the final linking stage. So we disable it # completely. @@ -395,6 +388,8 @@ if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")) set(ENABLE_GWP_ASAN OFF) endif () +option (ENABLE_FIU "Enable Fiu" ON) + option(WERROR "Enable -Werror compiler option" ON) if (WERROR) diff --git a/README.md b/README.md index c82c64cfd22..4a619eb4fd3 100644 --- a/README.md +++ b/README.md @@ -21,11 +21,26 @@ curl https://clickhouse.com/ | sh * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming Events -* [**ClickHouse Spring Meetup in Manhattan**](https://www.meetup.com/clickhouse-new-york-user-group/events/292517734) - April 26 - It's spring, and it's time to meet again in the city! Talks include: "Building a domain specific query language on top of Clickhouse", "A Galaxy of Information", "Our Journey to ClickHouse Cloud from Redshift", and a ClickHouse update! -* [**v23.4 Release Webinar**](https://clickhouse.com/company/events/v23-4-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-04) - April 26 - 23.4 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. -* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16 - Save the date! ClickHouse is coming back to Berlin. We’re excited to announce an upcoming ClickHouse Meetup that you won’t want to miss. Join us as we gather together to discuss the latest in the world of ClickHouse and share user stories. + +* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - May 31 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. +* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16 +* [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25 +* [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25 +* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/293426725/) - Jun 7 +* [**ClickHouse Meetup in Stockholm**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - Jun 13 + +Also, keep an eye out for upcoming meetups in Amsterdam, Boston, NYC, Beijing, and Toronto. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" -* **Recording available**: [**v23.3 Release Webinar**](https://www.youtube.com/watch?v=ISaGUjvBNao) UNDROP TABLE, server settings introspection, nested dynamic disks, MySQL compatibility, parseDate Time, Lightweight Deletes, Parallel Replicas, integrations updates, and so much more! Watch it now! +* **Recording available**: [**v23.4 Release Webinar**](https://www.youtube.com/watch?v=4rrf6bk_mOg) Faster Parquet Reading, Asynchonous Connections to Reoplicas, Trailing Comma before FROM, extractKeyValuePairs, integrations updates, and so much more! Watch it now! * **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU) + + + ## Interested in joining ClickHouse and making it your full time job? + +We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker as well as a doer - we’ll definitely click! + +Check out our **current openings** here: https://clickhouse.com/company/careers + +Cant find what you are looking for, but want to let us know you are interested in joining ClickHouse? Email careers@clickhouse.com! diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index ed4570d5e3f..4a80c176829 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -314,7 +314,14 @@ struct integer::_impl const T alpha = t / static_cast(max_int); - if (alpha <= static_cast(max_int)) + /** Here we have to use strict comparison. + * The max_int is 2^64 - 1. + * When casted to floating point type, it will be rounded to the closest representable number, + * which is 2^64. + * But 2^64 is not representable in uint64_t, + * so the maximum representable number will be strictly less. + */ + if (alpha < static_cast(max_int)) self = static_cast(alpha); else // max(double) / 2^64 will surely contain less than 52 precision bits, so speed up computations. set_multiplier(self, static_cast(alpha)); diff --git a/base/glibc-compatibility/musl/logf.c b/base/glibc-compatibility/musl/logf.c index 7ee5d7fe623..e4c2237caa2 100644 --- a/base/glibc-compatibility/musl/logf.c +++ b/base/glibc-compatibility/musl/logf.c @@ -53,7 +53,7 @@ float logf(float x) tmp = ix - OFF; i = (tmp >> (23 - LOGF_TABLE_BITS)) % N; k = (int32_t)tmp >> 23; /* arithmetic shift */ - iz = ix - (tmp & 0x1ff << 23); + iz = ix - (tmp & 0xff800000); invc = T[i].invc; logc = T[i].logc; z = (double_t)asfloat(iz); diff --git a/cmake/linux/toolchain-riscv64.cmake b/cmake/linux/toolchain-riscv64.cmake index ea57c3b2c42..7f876f88d72 100644 --- a/cmake/linux/toolchain-riscv64.cmake +++ b/cmake/linux/toolchain-riscv64.cmake @@ -21,7 +21,7 @@ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") -set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=bfd") +set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=bfd") # Currently, lld does not work with the error: # ld.lld: error: section size decrease is too large diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index bf5eddf09f5..b2fbdb256fd 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -10,9 +10,16 @@ set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER") if (SANITIZE) if (SANITIZE STREQUAL "address") - # LLVM-15 has a bug in Address Sanitizer, preventing the usage of 'sanitize-address-use-after-scope', - # see https://github.com/llvm/llvm-project/issues/58633 - set (ASAN_FLAGS "-fsanitize=address -fno-sanitize-address-use-after-scope") + set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope") + if (COMPILER_CLANG) + if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 15 AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 16) + # LLVM-15 has a bug in Address Sanitizer, preventing the usage + # of 'sanitize-address-use-after-scope', see [1]. + # + # [1]: https://github.com/llvm/llvm-project/issues/58633 + set (ASAN_FLAGS "${ASAN_FLAGS} -fno-sanitize-address-use-after-scope") + endif() + endif() set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}") diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 8e6da1051bc..802907c9dda 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -70,13 +70,15 @@ if (LINKER_NAME) if (NOT LLD_PATH) message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.") endif () - if (COMPILER_CLANG) - # This a temporary quirk to emit .debug_aranges with ThinLTO, can be removed after upgrade to clang-16 + # This a temporary quirk to emit .debug_aranges with ThinLTO, it is only the case clang/llvm <16 + if (COMPILER_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16) set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld") configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}") - endif () + else () + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}") + endif() endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 57ce93d45f7..020fe1e1c5a 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -105,6 +105,7 @@ add_contrib (libfarmhash) add_contrib (icu-cmake icu) add_contrib (h3-cmake h3) add_contrib (mariadb-connector-c-cmake mariadb-connector-c) +add_contrib (libfiu-cmake libfiu) if (ENABLE_TESTS) add_contrib (googletest-cmake googletest) @@ -177,7 +178,19 @@ endif() add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) add_contrib (c-ares-cmake c-ares) -add_contrib (qpl-cmake qpl) + +if (OS_LINUX AND ARCH_AMD64 AND ENABLE_SSE42) + option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES}) +elseif(ENABLE_QPL) + message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with SSE 4.2 or higher") +endif() +if (ENABLE_QPL) + add_contrib (idxd-config-cmake idxd-config) + add_contrib (qpl-cmake qpl) # requires: idxd-config +else() + message(STATUS "Not using QPL") +endif () + add_contrib (morton-nd-cmake morton-nd) if (ARCH_S390X) add_contrib(crc32-s390x-cmake crc32-s390x) diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index c9a759eab9c..6f9dce0b042 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -103,11 +103,19 @@ set (SRCS_CONTEXT ) if (ARCH_AARCH64) - set (SRCS_CONTEXT ${SRCS_CONTEXT} - "${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S" - ) + if (OS_DARWIN) + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_macho_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_macho_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_macho_gas.S" + ) + else() + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S" + ) + endif() elseif (ARCH_PPC64LE) set (SRCS_CONTEXT ${SRCS_CONTEXT} "${LIBRARY_DIR}/libs/context/src/asm/jump_ppc64_sysv_elf_gas.S" diff --git a/contrib/boringssl-cmake/CMakeLists.txt b/contrib/boringssl-cmake/CMakeLists.txt index 828919476a7..51137f6d04e 100644 --- a/contrib/boringssl-cmake/CMakeLists.txt +++ b/contrib/boringssl-cmake/CMakeLists.txt @@ -111,6 +111,8 @@ elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips") set(ARCH "generic") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le") set(ARCH "ppc64le") +elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64") + set(ARCH "riscv64") else() message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR}) endif() diff --git a/contrib/googletest-cmake/CMakeLists.txt b/contrib/googletest-cmake/CMakeLists.txt index 90fdde0c185..3905df03155 100644 --- a/contrib/googletest-cmake/CMakeLists.txt +++ b/contrib/googletest-cmake/CMakeLists.txt @@ -1,15 +1,30 @@ -set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest") +set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest") -add_library(_gtest "${SRC_DIR}/src/gtest-all.cc") +add_library(_gtest "${SRC_DIR}/googletest/src/gtest-all.cc") set_target_properties(_gtest PROPERTIES VERSION "1.0.0") target_compile_definitions (_gtest PUBLIC GTEST_HAS_POSIX_RE=0) -target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/include") -target_include_directories(_gtest PRIVATE "${SRC_DIR}") +target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/googletest/include") +target_include_directories(_gtest PRIVATE "${SRC_DIR}/googletest") -add_library(_gtest_main "${SRC_DIR}/src/gtest_main.cc") +add_library(_gtest_main "${SRC_DIR}/googletest/src/gtest_main.cc") set_target_properties(_gtest_main PROPERTIES VERSION "1.0.0") target_link_libraries(_gtest_main PUBLIC _gtest) add_library(_gtest_all INTERFACE) target_link_libraries(_gtest_all INTERFACE _gtest _gtest_main) add_library(ch_contrib::gtest_all ALIAS _gtest_all) + + +add_library(_gmock "${SRC_DIR}/googlemock/src/gmock-all.cc") +set_target_properties(_gmock PROPERTIES VERSION "1.0.0") +target_compile_definitions (_gmock PUBLIC GTEST_HAS_POSIX_RE=0) +target_include_directories(_gmock SYSTEM PUBLIC "${SRC_DIR}/googlemock/include" "${SRC_DIR}/googletest/include") +target_include_directories(_gmock PRIVATE "${SRC_DIR}/googlemock") + +add_library(_gmock_main "${SRC_DIR}/googlemock/src/gmock_main.cc") +set_target_properties(_gmock_main PROPERTIES VERSION "1.0.0") +target_link_libraries(_gmock_main PUBLIC _gmock) + +add_library(_gmock_all INTERFACE) +target_link_libraries(_gmock_all INTERFACE _gmock _gmock_main) +add_library(ch_contrib::gmock_all ALIAS _gmock_all) diff --git a/contrib/idxd-config-cmake/CMakeLists.txt b/contrib/idxd-config-cmake/CMakeLists.txt new file mode 100644 index 00000000000..030252ec8e6 --- /dev/null +++ b/contrib/idxd-config-cmake/CMakeLists.txt @@ -0,0 +1,23 @@ +## accel_config is the utility library required by QPL-Deflate codec for controlling and configuring Intel® In-Memory Analytics Accelerator (Intel® IAA). +set (LIBACCEL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config") +set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake") +set (LIBACCEL_HEADER_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config-cmake/include") +set (SRCS + "${LIBACCEL_SOURCE_DIR}/accfg/lib/libaccfg.c" + "${LIBACCEL_SOURCE_DIR}/util/log.c" + "${LIBACCEL_SOURCE_DIR}/util/sysfs.c" +) + +add_library(_accel-config ${SRCS}) + +target_compile_options(_accel-config PRIVATE "-D_GNU_SOURCE") + +target_include_directories(_accel-config BEFORE + PRIVATE ${UUID_DIR} + PRIVATE ${LIBACCEL_HEADER_DIR} + PRIVATE ${LIBACCEL_SOURCE_DIR}) + +target_include_directories(_accel-config SYSTEM BEFORE + PUBLIC ${LIBACCEL_SOURCE_DIR}/accfg) + +add_library(ch_contrib::accel-config ALIAS _accel-config) diff --git a/contrib/qpl-cmake/idxd-header/config.h b/contrib/idxd-config-cmake/include/config.h similarity index 100% rename from contrib/qpl-cmake/idxd-header/config.h rename to contrib/idxd-config-cmake/include/config.h diff --git a/contrib/libfiu b/contrib/libfiu new file mode 160000 index 00000000000..b85edbde4cf --- /dev/null +++ b/contrib/libfiu @@ -0,0 +1 @@ +Subproject commit b85edbde4cf974b1b40d27828a56f0505f4e2ee5 diff --git a/contrib/libfiu-cmake/CMakeLists.txt b/contrib/libfiu-cmake/CMakeLists.txt new file mode 100644 index 00000000000..e805491edbb --- /dev/null +++ b/contrib/libfiu-cmake/CMakeLists.txt @@ -0,0 +1,20 @@ +if (NOT ENABLE_FIU) + message (STATUS "Not using fiu") + return () +endif () + +set(FIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/") + +set(FIU_SOURCES + ${FIU_DIR}/libfiu/fiu.c + ${FIU_DIR}/libfiu/fiu-rc.c + ${FIU_DIR}/libfiu/backtrace.c + ${FIU_DIR}/libfiu/wtable.c +) + +set(FIU_HEADERS "${FIU_DIR}/libfiu") + +add_library(_fiu ${FIU_SOURCES}) +target_compile_definitions(_fiu PUBLIC DUMMY_BACKTRACE) +target_include_directories(_fiu PUBLIC ${FIU_HEADERS}) +add_library(ch_contrib::fiu ALIAS _fiu) diff --git a/contrib/qpl b/contrib/qpl index 0bce2b03423..3f8f5cea277 160000 --- a/contrib/qpl +++ b/contrib/qpl @@ -1 +1 @@ -Subproject commit 0bce2b03423f6fbeb8bce66cc8be0bf558058848 +Subproject commit 3f8f5cea27739f5261e8fd577dc233ffe88bf679 diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt index d2be8add3c7..21d665d12e4 100644 --- a/contrib/qpl-cmake/CMakeLists.txt +++ b/contrib/qpl-cmake/CMakeLists.txt @@ -1,36 +1,5 @@ ## The Intel® QPL provides high performance implementations of data processing functions for existing hardware accelerator, and/or software path in case if hardware accelerator is not available. -if (OS_LINUX AND ARCH_AMD64 AND (ENABLE_AVX2 OR ENABLE_AVX512)) - option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES}) -elseif(ENABLE_QPL) - message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with avx2/avx512 support") -endif() - -if (NOT ENABLE_QPL) - message(STATUS "Not using QPL") - return() -endif() - -## QPL has build dependency on libaccel-config. Here is to build libaccel-config which is required by QPL. -## libaccel-config is the utility library for controlling and configuring Intel® In-Memory Analytics Accelerator (Intel® IAA). -set (LIBACCEL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config") set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake") -set (LIBACCEL_HEADER_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake/idxd-header") -set (SRCS - "${LIBACCEL_SOURCE_DIR}/accfg/lib/libaccfg.c" - "${LIBACCEL_SOURCE_DIR}/util/log.c" - "${LIBACCEL_SOURCE_DIR}/util/sysfs.c" -) - -add_library(accel-config ${SRCS}) - -target_compile_options(accel-config PRIVATE "-D_GNU_SOURCE") - -target_include_directories(accel-config BEFORE - PRIVATE ${UUID_DIR} - PRIVATE ${LIBACCEL_HEADER_DIR} - PRIVATE ${LIBACCEL_SOURCE_DIR}) - -## QPL build start here. set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl") set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources") set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl") @@ -53,8 +22,8 @@ GetLibraryVersion("${HEADER_CONTENT}" QPL_VERSION) message(STATUS "Intel QPL version: ${QPL_VERSION}") # There are 5 source subdirectories under $QPL_SRC_DIR: isal, c_api, core-sw, middle-layer, c_api. -# Generate 7 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, core_iaa, middle_layer_lib. -# Output ch_contrib::qpl by linking with 7 library targets. +# Generate 8 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, core_iaa, middle_layer_lib. +# Output ch_contrib::qpl by linking with 8 library targets. include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake") @@ -119,31 +88,36 @@ set(ISAL_ASM_SRC ${QPL_SRC_DIR}/isal/igzip/igzip_body.asm add_library(isal OBJECT ${ISAL_C_SRC}) add_library(isal_asm OBJECT ${ISAL_ASM_SRC}) +set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) + +set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) + # Setting external and internal interfaces for ISA-L library target_include_directories(isal - PUBLIC $ - PRIVATE ${QPL_SRC_DIR}/isal/include - PUBLIC ${QPL_SRC_DIR}/isal/igzip) + PUBLIC $ + PRIVATE ${QPL_SRC_DIR}/isal/include + PUBLIC ${QPL_SRC_DIR}/isal/igzip) + +set_target_properties(isal PROPERTIES + CXX_STANDARD 11 + C_STANDARD 99) target_compile_options(isal PRIVATE "$<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}>" "$<$:>" "$<$:>") +# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available". +# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system. target_compile_options(isal_asm PRIVATE "-I${QPL_SRC_DIR}/isal/include/" PRIVATE "-I${QPL_SRC_DIR}/isal/igzip/" PRIVATE "-I${QPL_SRC_DIR}/isal/crc/" + PRIVATE "-DHAVE_AS_KNOWS_AVX512" + PRIVATE "-DAS_FEATURE_LEVEL=10" PRIVATE "-DQPL_LIB") -# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available". -# AS_FEATURE_LEVEL=5 means "Check SIMD capabilities of the target system at runtime and use up to AVX2 if available". -# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system. -if (ENABLE_AVX512) - target_compile_options(isal_asm PRIVATE "-DHAVE_AS_KNOWS_AVX512" "-DAS_FEATURE_LEVEL=10") -else() - target_compile_options(isal_asm PRIVATE "-DAS_FEATURE_LEVEL=5") -endif() - # Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS. # Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined" if (SANITIZE STREQUAL "undefined") @@ -157,78 +131,97 @@ target_compile_definitions(isal PUBLIC NDEBUG) # [SUBDIR]core-sw -# Two libraries:qplcore_avx512/qplcore_px for SW fallback will be created which are implemented by AVX512 and non-AVX512 instructions respectively. +# Create set of libraries corresponding to supported platforms for SW fallback which are implemented by AVX512 and non-AVX512 instructions respectively. # The upper level QPL API will check SIMD capabilities of the target system at runtime and decide to call AVX512 function or non-AVX512 function. -# Hence, here we don't need put qplcore_avx512 under an ENABLE_AVX512 CMake switch. -# Actually, if we do that, some undefined symbols errors would happen because both of AVX512 function and non-AVX512 function are referenced by QPL API. -# PLATFORM=2 means AVX512 implementation; PLATFORM=0 means non-AVX512 implementation. +# Hence, here we don't need put ENABLE_AVX512 CMake switch. -# Find Core Sources -file(GLOB SOURCES - ${QPL_SRC_DIR}/core-sw/src/checksums/*.c - ${QPL_SRC_DIR}/core-sw/src/filtering/*.c - ${QPL_SRC_DIR}/core-sw/src/other/*.c - ${QPL_SRC_DIR}/core-sw/src/compression/*.c) +get_list_of_supported_optimizations(PLATFORMS_LIST) -file(GLOB DATA_SOURCES - ${QPL_SRC_DIR}/core-sw/src/data/*.c) +foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST) + # Find Core Sources + file(GLOB SOURCES + ${QPL_SRC_DIR}/core-sw/src/checksums/*.c + ${QPL_SRC_DIR}/core-sw/src/filtering/*.c + ${QPL_SRC_DIR}/core-sw/src/other/*.c + ${QPL_SRC_DIR}/core-sw/src/compression/*.c) -# Create avx512 library -add_library(qplcore_avx512 OBJECT ${SOURCES}) + file(GLOB DATA_SOURCES + ${QPL_SRC_DIR}/core-sw/src/data/*.c) -target_compile_definitions(qplcore_avx512 PRIVATE PLATFORM=2) + # Create library + add_library(qplcore_${PLATFORM_ID} OBJECT ${SOURCES}) -target_include_directories(qplcore_avx512 - PUBLIC $ - PUBLIC $ - PUBLIC $ - PRIVATE $) + set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) -set_target_properties(qplcore_avx512 PROPERTIES - $<$:C_STANDARD 17>) + target_include_directories(qplcore_${PLATFORM_ID} + PUBLIC $ + PUBLIC $ + PUBLIC $ + PUBLIC $ + PRIVATE $) -target_link_libraries(qplcore_avx512 - PRIVATE isal - PRIVATE ${CMAKE_DL_LIBS}) + set_target_properties(qplcore_${PLATFORM_ID} PROPERTIES + $<$:C_STANDARD 17>) -target_compile_options(qplcore_avx512 - PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS} - PRIVATE -march=skylake-avx512 - PRIVATE "$<$:>" - PRIVATE "$<$:-O3;-D_FORTIFY_SOURCE=2>") + target_compile_options(qplcore_${PLATFORM_ID} + PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS} + PRIVATE "$<$:>" + PRIVATE "$<$:-O3;-D_FORTIFY_SOURCE=2>") + # Set specific compiler options and/or definitions based on a platform + if (${PLATFORM_ID} MATCHES "avx512") + target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=2) + target_compile_options(qplcore_${PLATFORM_ID} PRIVATE -march=skylake-avx512) + else() # Create default px library + target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=0) + endif() -target_compile_definitions(qplcore_avx512 PUBLIC QPL_BADARG_CHECK) + target_link_libraries(qplcore_${PLATFORM_ID} isal) +endforeach() # -# Create px library +# Create dispatcher between platforms and auto-generated wrappers # -#set(CMAKE_INCLUDE_CURRENT_DIR ON) +file(GLOB SW_DISPATCHER_SOURCES ${QPL_SRC_DIR}/core-sw/dispatcher/*.cpp) -# Create library -add_library(qplcore_px OBJECT ${SOURCES} ${DATA_SOURCES}) +add_library(qplcore_sw_dispatcher OBJECT ${SW_DISPATCHER_SOURCES}) -target_compile_definitions(qplcore_px PRIVATE PLATFORM=0) +set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) -target_include_directories(qplcore_px - PUBLIC $ - PUBLIC $ - PUBLIC $ - PRIVATE $) +target_include_directories(qplcore_sw_dispatcher + PUBLIC $) -set_target_properties(qplcore_px PROPERTIES - $<$:C_STANDARD 17>) +# Generate kernel wrappers +generate_unpack_kernel_arrays(${QPL_BINARY_DIR} "${PLATFORMS_LIST}") -target_link_libraries(qplcore_px - PRIVATE isal - PRIVATE ${CMAKE_DL_LIBS}) +foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST) + file(GLOB GENERATED_${PLATFORM_ID}_TABLES_SRC ${QPL_BINARY_DIR}/generated/${PLATFORM_ID}_*.cpp) -target_compile_options(qplcore_px - PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS} - PRIVATE "$<$:>" - PRIVATE "$<$:-O3;-D_FORTIFY_SOURCE=2>") + target_sources(qplcore_sw_dispatcher PRIVATE ${GENERATED_${PLATFORM_ID}_TABLES_SRC}) -target_compile_definitions(qplcore_px PUBLIC QPL_BADARG_CHECK) + # Set specific compiler options and/or definitions based on a platform + if (${PLATFORM_ID} MATCHES "avx512") + set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2) + else() + set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0) + endif() + + target_include_directories(qplcore_sw_dispatcher + PUBLIC $) +endforeach() + +set_target_properties(qplcore_sw_dispatcher PROPERTIES CXX_STANDARD 17) + +# w/a for build compatibility with ISAL codebase +target_compile_definitions(qplcore_sw_dispatcher PUBLIC -DQPL_LIB) + +target_compile_options(qplcore_sw_dispatcher + PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; + ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS}; + $<$:-O3;-D_FORTIFY_SOURCE=2>> + PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>) # [SUBDIR]core-iaa file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c @@ -242,13 +235,20 @@ file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c # Create library add_library(core_iaa OBJECT ${HW_PATH_SRC}) +set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) + target_include_directories(core_iaa PRIVATE ${UUID_DIR} PUBLIC $ PUBLIC $ PRIVATE $ # status.h in own_checkers.h PRIVATE $ # own_checkers.h - PRIVATE $) + PRIVATE $) + +set_target_properties(core_iaa PROPERTIES + $<$:C_STANDARD 17> + CXX_STANDARD 17) target_compile_options(core_iaa PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; @@ -258,11 +258,10 @@ target_compile_features(core_iaa PRIVATE c_std_11) target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK PRIVATE $<$: BLOCK_ON_FAULT_ENABLED> - PRIVATE $<$:LOG_HW_INIT>) + PRIVATE $<$:LOG_HW_INIT> + PRIVATE $<$:DYNAMIC_LOADING_LIBACCEL_CONFIG>) # [SUBDIR]middle-layer -generate_unpack_kernel_arrays(${QPL_BINARY_DIR}) - file(GLOB MIDDLE_LAYER_SRC ${QPL_SRC_DIR}/middle-layer/analytics/*.cpp ${QPL_SRC_DIR}/middle-layer/c_wrapper/*.cpp @@ -277,14 +276,12 @@ file(GLOB MIDDLE_LAYER_SRC ${QPL_SRC_DIR}/middle-layer/inflate/*.cpp ${QPL_SRC_DIR}/core-iaa/sources/accelerator/*.cpp) # todo -file(GLOB GENERATED_PX_TABLES_SRC ${QPL_BINARY_DIR}/generated/px_*.cpp) -file(GLOB GENERATED_AVX512_TABLES_SRC ${QPL_BINARY_DIR}/generated/avx512_*.cpp) - add_library(middle_layer_lib OBJECT - ${GENERATED_PX_TABLES_SRC} - ${GENERATED_AVX512_TABLES_SRC} ${MIDDLE_LAYER_SRC}) +set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) + target_compile_options(middle_layer_lib PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS}; @@ -295,17 +292,16 @@ target_compile_definitions(middle_layer_lib PUBLIC QPL_VERSION="${QPL_VERSION}" PUBLIC $<$:LOG_HW_INIT> PUBLIC $<$:QPL_EFFICIENT_WAIT> - PUBLIC QPL_BADARG_CHECK) + PUBLIC QPL_BADARG_CHECK + PUBLIC $<$:DYNAMIC_LOADING_LIBACCEL_CONFIG>) -set_source_files_properties(${GENERATED_PX_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0) -set_source_files_properties(${GENERATED_AVX512_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2) +set_target_properties(middle_layer_lib PROPERTIES CXX_STANDARD 17) target_include_directories(middle_layer_lib PRIVATE ${UUID_DIR} PUBLIC $ PUBLIC $ - PUBLIC $ - PUBLIC $ + PUBLIC $ PUBLIC $ PUBLIC $) @@ -316,20 +312,19 @@ file(GLOB_RECURSE QPL_C_API_SRC ${QPL_SRC_DIR}/c_api/*.c ${QPL_SRC_DIR}/c_api/*.cpp) -add_library(_qpl STATIC ${QPL_C_API_SRC} - $ - $ - $ - $ - $ - $ - $) +get_property(LIB_DEPS GLOBAL PROPERTY QPL_LIB_DEPS) + +add_library(_qpl STATIC ${QPL_C_API_SRC} ${LIB_DEPS}) target_include_directories(_qpl - PUBLIC $ + PUBLIC $ $ PRIVATE $ PRIVATE $) +set_target_properties(_qpl PROPERTIES + $<$:C_STANDARD 17> + CXX_STANDARD 17) + target_compile_options(_qpl PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS}; @@ -339,15 +334,15 @@ target_compile_options(_qpl target_compile_definitions(_qpl PRIVATE -DQPL_LIB PRIVATE -DQPL_BADARG_CHECK + PRIVATE $<$:DYNAMIC_LOADING_LIBACCEL_CONFIG> PUBLIC -DENABLE_QPL_COMPRESSION) target_link_libraries(_qpl - PRIVATE accel-config - PRIVATE ch_contrib::isal - PRIVATE ${CMAKE_DL_LIBS}) + PRIVATE ch_contrib::accel-config + PRIVATE ch_contrib::isal) -add_library (ch_contrib::qpl ALIAS _qpl) target_include_directories(_qpl SYSTEM BEFORE PUBLIC "${QPL_PROJECT_DIR}/include" - PUBLIC "${LIBACCEL_SOURCE_DIR}/accfg" PUBLIC ${UUID_DIR}) + +add_library (ch_contrib::qpl ALIAS _qpl) diff --git a/contrib/vectorscan b/contrib/vectorscan index b4bba94b1a2..1f4d448314e 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit b4bba94b1a250603b0b198e0394946e32f6c3f30 +Subproject commit 1f4d448314e581473103187765e4c949d01b4259 diff --git a/docker/packager/packager b/docker/packager/packager index 7d022df52e6..a894fe2d8e9 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -362,17 +362,16 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--compiler", choices=( - "clang-15", - "clang-15-darwin", - "clang-15-darwin-aarch64", - "clang-15-aarch64", - "clang-15-aarch64-v80compat", - "clang-15-ppc64le", - "clang-15-amd64-compat", - "clang-15-freebsd", - "gcc-11", + "clang-16", + "clang-16-darwin", + "clang-16-darwin-aarch64", + "clang-16-aarch64", + "clang-16-aarch64-v80compat", + "clang-16-ppc64le", + "clang-16-amd64-compat", + "clang-16-freebsd", ), - default="clang-15", + default="clang-16", help="a compiler to use", ) parser.add_argument( diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index b76b8234c81..8136fd1fbbc 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -10,53 +10,21 @@ RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev -# libclang-15-dev does not contain proper symlink: -# -# This is what cmake will search for: -# -# # readlink -f /usr/lib/llvm-15/lib/libclang-15.so.1 -# /usr/lib/x86_64-linux-gnu/libclang-15.so.1 -# -# This is what exists: -# -# # ls -l /usr/lib/x86_64-linux-gnu/libclang-15* -# lrwxrwxrwx 1 root root 16 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so -> libclang-15.so.1 -# lrwxrwxrwx 1 root root 21 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15 -> libclang-15.so.15.0.0 -# -rw-r--r-- 1 root root 31835760 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15.0.0 -# ARG TARGETARCH RUN arch=${TARGETARCH:-amd64} \ && case $arch in \ amd64) rarch=x86_64 ;; \ arm64) rarch=aarch64 ;; \ *) exit 1 ;; \ - esac \ - && ln -rsf /usr/lib/$rarch-linux-gnu/libclang-15.so.15 /usr/lib/$rarch-linux-gnu/libclang-15.so.1 + esac # repo versions doesn't work correctly with C++17 # also we push reports to s3, so we add index.html to subfolder urls -# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b -RUN git clone https://github.com/ClickHouse/woboq_codebrowser \ - && cd woboq_codebrowser \ - && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \ - && ninja \ - && cd .. \ - && rm -rf woboq_codebrowser +# https://github.com/ClickHouse/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b +RUN git clone --branch=master --depth=1 https://github.com/ClickHouse/woboq_codebrowser /woboq_codebrowser \ + && cd /woboq_codebrowser \ + && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} -DCLANG_BUILTIN_HEADERS_DIR=/usr/lib/llvm-${LLVM_VERSION}/lib/clang/${LLVM_VERSION}/include \ + && ninja -ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator -ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator -ENV STATIC_DATA=/woboq_codebrowser/data - -ENV SOURCE_DIRECTORY=/repo_folder -ENV BUILD_DIRECTORY=/build -ENV HTML_RESULT_DIRECTORY=$BUILD_DIRECTORY/html_report -ENV SHA=nosha -ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data" - -CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \ - cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=/usr/bin/clang-${LLVM_VERSION} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \ - mkdir -p $HTML_RESULT_DIRECTORY && \ - $CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \ - cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\ - $CODEINDEX $HTML_RESULT_DIRECTORY -d "$DATA" | ts '%Y-%m-%d %H:%M:%S' && \ - mv $HTML_RESULT_DIRECTORY /test_output +COPY build.sh / +CMD ["bash", "-c", "/build.sh 2>&1"] diff --git a/docker/test/codebrowser/build.sh b/docker/test/codebrowser/build.sh new file mode 100755 index 00000000000..5ab9de5a453 --- /dev/null +++ b/docker/test/codebrowser/build.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -x -e + + +STATIC_DATA=${STATIC_DATA:-/woboq_codebrowser/data} +SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-/build} +BUILD_DIRECTORY=${BUILD_DIRECTORY:-/workdir/build} +OUTPUT_DIRECTORY=${OUTPUT_DIRECTORY:-/workdir/output} +HTML_RESULT_DIRECTORY=${HTML_RESULT_DIRECTORY:-$OUTPUT_DIRECTORY/html_report} +SHA=${SHA:-nosha} +DATA=${DATA:-https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data} +nproc=$(($(nproc) + 2)) # increase parallelism + +read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" + +mkdir -p "$BUILD_DIRECTORY" && cd "$BUILD_DIRECTORY" +cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 "${CMAKE_FLAGS[@]}" +mkdir -p "$HTML_RESULT_DIRECTORY" +echo 'Filter out too noisy "Error: filename" lines and keep them in full codebrowser_generator.log' +/woboq_codebrowser/generator/codebrowser_generator -b "$BUILD_DIRECTORY" -a \ + -o "$HTML_RESULT_DIRECTORY" --execute-concurrency="$nproc" -p "ClickHouse:$SOURCE_DIRECTORY:$SHA" \ + -d "$DATA" \ + |& ts '%Y-%m-%d %H:%M:%S' \ + | tee "$OUTPUT_DIRECTORY/codebrowser_generator.log" \ + | grep --line-buffered -v ':[0-9]* Error: ' +cp -r "$STATIC_DATA" "$HTML_RESULT_DIRECTORY/" +/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator "$HTML_RESULT_DIRECTORY" \ + -d "$DATA" |& ts '%Y-%m-%d %H:%M:%S' diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 3ed0c4df093..dab873377ce 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT stage=${stage:-} # Compiler version, normally set by Dockerfile -export LLVM_VERSION=${LLVM_VERSION:-13} +export LLVM_VERSION=${LLVM_VERSION:-16} # A variable to pass additional flags to CMake. # Here we explicitly default it to nothing so that bash doesn't complain about @@ -147,6 +147,7 @@ function clone_submodules contrib/xxHash contrib/simdjson contrib/liburing + contrib/libfiu ) git submodule sync diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 75f2a0af358..d2c8de7a211 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -15,7 +15,7 @@ stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" echo "$script_dir" repo_dir=ch -BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"} +BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"} BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} function git_clone_with_retry diff --git a/docker/test/keeper-jepsen/run.sh b/docker/test/keeper-jepsen/run.sh index 5e321b7c347..694d7fcd916 100644 --- a/docker/test/keeper-jepsen/run.sh +++ b/docker/test/keeper-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} diff --git a/docker/test/server-jepsen/run.sh b/docker/test/server-jepsen/run.sh index c11a48f6d4c..0c3768df813 100644 --- a/docker/test/server-jepsen/run.sh +++ b/docker/test/server-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index bfad2c9a7c5..4926967d2d2 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -20,31 +20,27 @@ install_packages package_folder # Thread Fuzzer allows to check more permutations of possible thread scheduling # and find more potential issues. -# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540 -is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'") -if [ "$is_tsan_build" -eq "0" ]; then - export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 - export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 - export THREAD_FUZZER_SLEEP_TIME_US=100000 +export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 +export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 +export THREAD_FUZZER_SLEEP_TIME_US=100000 - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 +export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 +export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 +export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 +export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 +export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 +export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 +export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 +export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 +export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 +export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 +export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 +export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 - export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01 - export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01 -fi +export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01 +export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01 export ZOOKEEPER_FAULT_INJECTION=1 # Initial run without S3 to create system.*_log on local file system to make it diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 911cadc3c58..a49278e960b 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list # 15.0.2 -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=16 RUN apt-get update \ && apt-get install \ @@ -52,6 +52,7 @@ RUN apt-get update \ lld-${LLVM_VERSION} \ llvm-${LLVM_VERSION} \ llvm-${LLVM_VERSION}-dev \ + libclang-${LLVM_VERSION}-dev \ moreutils \ nasm \ ninja-build \ diff --git a/docs/en/development/build-cross-osx.md b/docs/en/development/build-cross-osx.md index e6e5bd6ca4d..ce8d1b77526 100644 --- a/docs/en/development/build-cross-osx.md +++ b/docs/en/development/build-cross-osx.md @@ -11,14 +11,14 @@ This is intended for continuous integration checks that run on Linux servers. If The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-15 +## Install Clang-16 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup. For example the commands for Bionic are like: ``` bash -sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-15 main" >> /etc/apt/sources.list -sudo apt-get install clang-15 +sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-16 main" >> /etc/apt/sources.list +sudo apt-get install clang-16 ``` ## Install Cross-Compilation Toolset {#install-cross-compilation-toolset} @@ -55,7 +55,7 @@ curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX1 cd ClickHouse mkdir build-darwin cd build-darwin -CC=clang-15 CXX=clang++-15 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake .. +CC=clang-16 CXX=clang++-16 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake .. ninja ``` diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index a20913e7a32..e3550a046c7 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -11,7 +11,7 @@ This is for the case when you have Linux machine and want to use it to build `cl The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-13 +## Install Clang-16 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do ``` @@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` bash cd ClickHouse mkdir build-riscv64 -CC=clang-14 CXX=clang++-14 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF +CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF ninja -C build-riscv64 ``` diff --git a/docs/en/development/build.md b/docs/en/development/build.md index a55d44bdf93..26ee9ce581a 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -47,8 +47,8 @@ GCC as a compiler is not supported To build with a specific Clang version: ``` bash -export CC=clang-15 -export CXX=clang++-15 +export CC=clang-16 +export CXX=clang++-16 ``` ### Checkout ClickHouse Sources {#checkout-clickhouse-sources} diff --git a/docs/en/development/building_and_benchmarking_deflate_qpl.md b/docs/en/development/building_and_benchmarking_deflate_qpl.md index 60d6b1c7b76..0501c1cbdcb 100644 --- a/docs/en/development/building_and_benchmarking_deflate_qpl.md +++ b/docs/en/development/building_and_benchmarking_deflate_qpl.md @@ -4,20 +4,22 @@ sidebar_position: 73 sidebar_label: Building and Benchmarking DEFLATE_QPL description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec --- + # Build Clickhouse with DEFLATE_QPL -- Make sure your target machine meet the QPL required [Prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites) -- Pass the following flag to CMake when building ClickHouse, depending on the capabilities of your target machine: + +- Make sure your target machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites) +- Pass the following flag to CMake when building ClickHouse: + ``` bash -cmake -DENABLE_AVX2=1 -DENABLE_QPL=1 .. -``` -or -``` bash -cmake -DENABLE_AVX512=1 -DENABLE_QPL=1 .. +cmake -DENABLE_QPL=1 .. ``` + - For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md) # Run Benchmark with DEFLATE_QPL + ## Files list + The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/ClickHouse/tree/master/contrib/qpl-cmake) give example to run benchmark with python scripts: `client_scripts` contains python scripts for running typical benchmark, for example: @@ -28,48 +30,60 @@ The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/C `database_files` means it will store database files according to lz4/deflate/zstd codec. ## Run benchmark automatically for Star Schema: + ``` bash $ cd ./benchmark_sample/client_scripts $ sh run_ssb.sh ``` + After complete, please check all the results in this folder:`./output/` In case you run into failure, please manually run benchmark as below sections. ## Definition + [CLICKHOUSE_EXE] means the path of clickhouse executable program. ## Environment + - CPU: Sapphire Rapid - OS Requirements refer to [System Requirements for QPL](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#system-requirements) - IAA Setup refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) - Install python modules: + ``` bash pip3 install clickhouse_driver numpy ``` + [Self-check for IAA] + ``` bash $ accel-config list | grep -P 'iax|state' ``` + Expected output like this: ``` bash "dev":"iax1", "state":"enabled", "state":"enabled", ``` + If you see nothing output, it means IAA is not ready to work. Please check IAA setup again. ## Generate raw data + ``` bash $ cd ./benchmark_sample $ mkdir rawdata_dir && cd rawdata_dir ``` + Use [`dbgen`](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) to generate 100 million rows data with the parameters: -s 20 The files like `*.tbl` are expected to output under `./benchmark_sample/rawdata_dir/ssb-dbgen`: ## Database setup + Set up database with LZ4 codec ``` bash @@ -77,6 +91,7 @@ $ cd ./database_dir/lz4 $ [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& $ [CLICKHOUSE_EXE] client ``` + Here you should see the message `Connected to ClickHouse server` from console which means client successfully setup connection with server. Complete below three steps mentioned in [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) @@ -114,6 +129,7 @@ You are expected to see below output: └───────────┘ ``` [Self-check for IAA Deflate codec] + At the first time you execute insertion or query from client, clickhouse server console is expected to print this log: ```text Hardware-assisted DeflateQpl codec is ready! @@ -125,17 +141,21 @@ Initialization of hardware-assisted DeflateQpl codec failed That means IAA devices is not ready, you need check IAA setup again. ## Benchmark with single instance + - Before start benchmark, Please disable C6 and set CPU frequency governor to be `performance` + ``` bash $ cpupower idle-set -d 3 $ cpupower frequency-set -g performance ``` + - To eliminate impact of memory bound on cross sockets, we use `numactl` to bind server on one socket and client on another socket. - Single instance means single server connected with single client Now run benchmark for LZ4/Deflate/ZSTD respectively: LZ4: + ``` bash $ cd ./database_dir/lz4 $ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& @@ -144,13 +164,16 @@ $ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > lz4.log ``` IAA deflate: + ``` bash $ cd ./database_dir/deflate $ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& $ cd ./client_scripts $ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > deflate.log ``` + ZSTD: + ``` bash $ cd ./database_dir/zstd $ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& @@ -170,6 +193,7 @@ How to check performance metrics: We focus on QPS, please search the keyword: `QPS_Final` and collect statistics ## Benchmark with multi-instances + - To reduce impact of memory bound on too much threads, We recommend run benchmark with multi-instances. - Multi-instance means multiple(2 or 4)servers connected with respective client. - The cores of one socket need to be divided equally and assigned to the servers respectively. @@ -182,35 +206,46 @@ There are 2 differences: Here we assume there are 60 cores per socket and take 2 instances for example. Launch server for first instance LZ4: + ``` bash $ cd ./database_dir/lz4 $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& ``` + ZSTD: + ``` bash $ cd ./database_dir/zstd $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& ``` + IAA Deflate: + ``` bash $ cd ./database_dir/deflate $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& ``` + [Launch server for second instance] LZ4: + ``` bash $ cd ./database_dir && mkdir lz4_s2 && cd lz4_s2 $ cp ../../server_config/config_lz4_s2.xml ./ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null& ``` + ZSTD: + ``` bash $ cd ./database_dir && mkdir zstd_s2 && cd zstd_s2 $ cp ../../server_config/config_zstd_s2.xml ./ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null& ``` + IAA Deflate: + ``` bash $ cd ./database_dir && mkdir deflate_s2 && cd deflate_s2 $ cp ../../server_config/config_deflate_s2.xml ./ @@ -220,19 +255,24 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/d Creating tables && Inserting data for second instance Creating tables: + ``` bash $ [CLICKHOUSE_EXE] client -m --port=9001 ``` + Inserting data: + ``` bash $ [CLICKHOUSE_EXE] client --query "INSERT INTO [TBL_FILE_NAME] FORMAT CSV" < [TBL_FILE_NAME].tbl --port=9001 ``` + - [TBL_FILE_NAME] represents the name of a file named with the regular expression: *. tbl under `./benchmark_sample/rawdata_dir/ssb-dbgen`. - `--port=9001` stands for the assigned port for server instance which is also defined in config_lz4_s2.xml/config_zstd_s2.xml/config_deflate_s2.xml. For even more instances, you need replace it with the value: 9002/9003 which stand for s3/s4 instance respectively. If you don't assign it, the port is 9000 by default which has been used by first instance. Benchmarking with 2 instances LZ4: + ``` bash $ cd ./database_dir/lz4 $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& @@ -241,7 +281,9 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/n $ cd ./client_scripts $ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > lz4_2insts.log ``` + ZSTD: + ``` bash $ cd ./database_dir/zstd $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& @@ -250,7 +292,9 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/ $ cd ./client_scripts $ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > zstd_2insts.log ``` + IAA deflate + ``` bash $ cd ./database_dir/deflate $ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& @@ -259,9 +303,11 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/d $ cd ./client_scripts $ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > deflate_2insts.log ``` + Here the last argument: `2` of client_stressing_test.py stands for the number of instances. For more instances, you need replace it with the value: 3 or 4. This script support up to 4 instances/ Now three logs should be output as expected: + ``` text lz4_2insts.log deflate_2insts.log @@ -275,7 +321,9 @@ Benchmark setup for 4 instances is similar with 2 instances above. We recommend use 2 instances benchmark data as final report for review. ## Tips + Each time before launch new clickhouse server, please make sure no background clickhouse process running, please check and kill old one: + ``` bash $ ps -aux| grep clickhouse $ kill -9 [PID] diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index 232eee5b3cf..738c5458cc3 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -102,7 +102,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t ### Report Details -- **Compiler**: `clang-15`, optionally with the name of a target platform +- **Compiler**: `clang-16`, optionally with the name of a target platform - **Build type**: `Debug` or `RelWithDebInfo` (cmake). - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan). - **Status**: `success` or `fail` diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 6bcdadeb1eb..1f3ab1aae2c 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -152,7 +152,7 @@ While inside the `build` directory, configure your build by running CMake. Befor export CC=clang CXX=clang++ cmake .. -If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-15 CXX=clang++-15`. The clang version will be in the script output. +If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-16 CXX=clang++-16`. The clang version will be in the script output. The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building. diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index ab69e4e90ce..ccfca4c1f1f 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -19,8 +19,8 @@ Kafka lets you: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1], - name2 [type2], + name1 [type1] [ALIAS expr1], + name2 [type2] [ALIAS expr2], ... ) ENGINE = Kafka() SETTINGS diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index fbd6d944363..f27d4d48f75 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -13,8 +13,8 @@ The PostgreSQL engine allows to perform `SELECT` and `INSERT` queries on data th ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], + name1 type1 [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], + name2 type2 [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], ... ) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]); ``` diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 81d8cc2d3ca..7db2f3b465a 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -90,15 +90,17 @@ SELECT * FROM mySecondReplacingMT FINAL; ### is_deleted -`is_deleted` — Name of the column with the type of row: `1` is a “deleted“ row, `0` is a “state“ row. +`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a “deleted“ row, `0` is a “state“ row. - Column data type — `Int8`. + Column data type — `UInt8`. - Can only be enabled when `ver` is used. - The row is deleted when use the `OPTIMIZE ... FINAL CLEANUP`, or `OPTIMIZE ... FINAL` if the engine settings `clean_deleted_rows` has been set to `Always`. - No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted one is the one kept. +:::note +`is_deleted` can only be enabled when `ver` is used. +The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used, or if the engine setting `clean_deleted_rows` has been set to `Always`. +No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept. +::: ## Query clauses diff --git a/docs/en/getting-started/example-datasets/reddit-comments.md b/docs/en/getting-started/example-datasets/reddit-comments.md new file mode 100644 index 00000000000..e1e372746c9 --- /dev/null +++ b/docs/en/getting-started/example-datasets/reddit-comments.md @@ -0,0 +1,636 @@ +--- +slug: /en/getting-started/example-datasets/reddit-comments +sidebar_label: Reddit comments +--- + +# Reddit comments dataset + +This dataset contains publicly-available comments on Reddit that go back to December, 2005, to March, 2023, and contains over 7B rows of data. The raw data is in JSON format in compressed `.zst` files and the rows look like the following: + +```json +{"controversiality":0,"body":"A look at Vietnam and Mexico exposes the myth of market liberalisation.","subreddit_id":"t5_6","link_id":"t3_17863","stickied":false,"subreddit":"reddit.com","score":2,"ups":2,"author_flair_css_class":null,"created_utc":1134365188,"author_flair_text":null,"author":"frjo","id":"c13","edited":false,"parent_id":"t3_17863","gilded":0,"distinguished":null,"retrieved_on":1473738411} +{"created_utc":1134365725,"author_flair_css_class":null,"score":1,"ups":1,"subreddit":"reddit.com","stickied":false,"link_id":"t3_17866","subreddit_id":"t5_6","controversiality":0,"body":"The site states \"What can I use it for? Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more...\", just like any other new breeed of sites that want us to store everything we have on the web. And they even guarantee multiple levels of security and encryption etc. But what prevents these web site operators fom accessing and/or stealing Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more, for competitive or personal gains...? I am pretty sure that most of them are honest, but what's there to prevent me from setting up a good useful site and stealing all your data? Call me paranoid - I am.","retrieved_on":1473738411,"distinguished":null,"gilded":0,"id":"c14","edited":false,"parent_id":"t3_17866","author":"zse7zse","author_flair_text":null} +{"gilded":0,"distinguished":null,"retrieved_on":1473738411,"author":"[deleted]","author_flair_text":null,"edited":false,"id":"c15","parent_id":"t3_17869","subreddit":"reddit.com","score":0,"ups":0,"created_utc":1134366848,"author_flair_css_class":null,"body":"Jython related topics by Frank Wierzbicki","controversiality":0,"subreddit_id":"t5_6","stickied":false,"link_id":"t3_17869"} +{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"[deleted]","edited":false,"parent_id":"t3_17870","id":"c16","subreddit":"reddit.com","created_utc":1134367660,"author_flair_css_class":null,"score":1,"ups":1,"body":"[deleted]","controversiality":0,"stickied":false,"link_id":"t3_17870","subreddit_id":"t5_6"} +{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"rjoseph","edited":false,"id":"c17","parent_id":"t3_17817","subreddit":"reddit.com","author_flair_css_class":null,"created_utc":1134367754,"score":1,"ups":1,"body":"Saft is by far the best extension you could tak onto your Safari","controversiality":0,"link_id":"t3_17817","stickied":false,"subreddit_id":"t5_6"} +``` + +A shoutout to Percona for the [motivation behind ingesting this dataset](https://www.percona.com/blog/big-data-set-reddit-comments-analyzing-clickhouse/), which we have downloaded and stored in an S3 bucket. + +:::note +The following commands were executed on ClickHouse Cloud. To run this on your own cluster, replace `default` in the `s3Cluster` function call with the name of your cluster. If you do not have a cluster, then replace the `s3Cluster` function with the `s3` function. +::: + +1. Let's create a table for the Reddit data: + +```sql +CREATE TABLE reddit +( + subreddit LowCardinality(String), + subreddit_id LowCardinality(String), + subreddit_type Enum('public' = 1, 'restricted' = 2, 'user' = 3, 'archived' = 4, 'gold_restricted' = 5, 'private' = 6), + author LowCardinality(String), + body String CODEC(ZSTD(6)), + created_date Date DEFAULT toDate(created_utc), + created_utc DateTime, + retrieved_on DateTime, + id String, + parent_id String, + link_id String, + score Int32, + total_awards_received UInt16, + controversiality UInt8, + gilded UInt8, + collapsed_because_crowd_control UInt8, + collapsed_reason Enum('' = 0, 'comment score below threshold' = 1, 'may be sensitive content' = 2, 'potentially toxic' = 3, 'potentially toxic content' = 4), + distinguished Enum('' = 0, 'moderator' = 1, 'admin' = 2, 'special' = 3), + removal_reason Enum('' = 0, 'legal' = 1), + author_created_utc DateTime, + author_fullname LowCardinality(String), + author_patreon_flair UInt8, + author_premium UInt8, + can_gild UInt8, + can_mod_post UInt8, + collapsed UInt8, + is_submitter UInt8, + _edited String, + locked UInt8, + quarantined UInt8, + no_follow UInt8, + send_replies UInt8, + stickied UInt8, + author_flair_text LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (subreddit, created_date, author); +``` + +:::note +The names of the files in S3 start with `RC_YYYY-MM` where `YYYY-MM` goes from `2005-12` to `2023-02`. The compression changes a couple of times though, so the file extensions are not consistent. For example: + +- the file names are initially `RC_2005-12.bz2` to `RC_2017-11.bz2` +- then they look like `RC_2017-12.xz` to `RC_2018-09.xz` +- and finally `RC_2018-10.zst` to `RC_2023-02.zst` +::: + +2. We are going to start with one month of data, but if you want to simply insert every row - skip ahead to step 8 below. The following file has 86M records from December, 2017: + +```sql +INSERT INTO reddit + SELECT * + FROM s3Cluster( + 'default', + 'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz', + 'JSONEachRow' + ); +``` + +If you do not have a cluster, use `s3` instead of `s3Cluster`: + +```sql +INSERT INTO reddit + SELECT * + FROM s3( + 'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz', + 'JSONEachRow' + ); +``` + +3. It will take a while depending on your resources, but when it's done verify it worked: + +```sql +SELECT formatReadableQuantity(count()) +FROM reddit; +``` + +```response +┌─formatReadableQuantity(count())─┐ +│ 85.97 million │ +└─────────────────────────────────┘ +``` + +4. Let's see how many unique subreddits were in December of 2017: + +```sql +SELECT uniqExact(subreddit) +FROM reddit; +``` + +```response +┌─uniqExact(subreddit)─┐ +│ 91613 │ +└──────────────────────┘ + +1 row in set. Elapsed: 1.572 sec. Processed 85.97 million rows, 367.43 MB (54.71 million rows/s., 233.80 MB/s.) +``` + +5. This query returns the top 10 subreddits (in terms of number of comments): + +```sql +SELECT + subreddit, + count() AS c +FROM reddit +GROUP BY subreddit +ORDER BY c DESC +LIMIT 20; +``` + +```response +┌─subreddit───────┬───────c─┐ +│ AskReddit │ 5245881 │ +│ politics │ 1753120 │ +│ nfl │ 1220266 │ +│ nba │ 960388 │ +│ The_Donald │ 931857 │ +│ news │ 796617 │ +│ worldnews │ 765709 │ +│ CFB │ 710360 │ +│ gaming │ 602761 │ +│ movies │ 601966 │ +│ soccer │ 590628 │ +│ Bitcoin │ 583783 │ +│ pics │ 563408 │ +│ StarWars │ 562514 │ +│ funny │ 547563 │ +│ leagueoflegends │ 517213 │ +│ teenagers │ 492020 │ +│ DestinyTheGame │ 477377 │ +│ todayilearned │ 472650 │ +│ videos │ 450581 │ +└─────────────────┴─────────┘ + +20 rows in set. Elapsed: 0.368 sec. Processed 85.97 million rows, 367.43 MB (233.34 million rows/s., 997.25 MB/s.) +``` + +6. Here are the top 10 authors in December of 2017, in terms of number of comments posted: + +```sql +SELECT + author, + count() AS c +FROM reddit +GROUP BY author +ORDER BY c DESC +LIMIT 10; +``` + +```response +┌─author──────────┬───────c─┐ +│ [deleted] │ 5913324 │ +│ AutoModerator │ 784886 │ +│ ImagesOfNetwork │ 83241 │ +│ BitcoinAllBot │ 54484 │ +│ imguralbumbot │ 45822 │ +│ RPBot │ 29337 │ +│ WikiTextBot │ 25982 │ +│ Concise_AMA_Bot │ 19974 │ +│ MTGCardFetcher │ 19103 │ +│ TotesMessenger │ 19057 │ +└─────────────────┴─────────┘ + +10 rows in set. Elapsed: 8.143 sec. Processed 85.97 million rows, 711.05 MB (10.56 million rows/s., 87.32 MB/s.) +``` + +7. We already inserted some data, but we will start over: + +```sql +TRUNCATE TABLE reddit; +``` + +8. This is a fun dataset and it looks like we can find some great information, so let's go ahead and insert the entire dataset from 2005 to 2023. When you're ready, run this command to insert all the rows. (It takes a while - up to 17 hours!) + +```sql +INSERT INTO reddit +SELECT * +FROM s3Cluster( + 'default', + 'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC*', + 'JSONEachRow' + ) +SETTINGS zstd_window_log_max = 31; +``` + +The response looks like: + +```response +0 rows in set. Elapsed: 61187.839 sec. Processed 6.74 billion rows, 2.06 TB (110.17 thousand rows/s., 33.68 MB/s.) +``` + +8. Let's see how many rows were inserted and how much disk space the table is using: + + +```sql +SELECT + sum(rows) AS count, + formatReadableQuantity(count), + formatReadableSize(sum(bytes)) AS disk_size, + formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size +FROM system.parts +WHERE (table = 'reddit') AND active +``` + +Notice the compression of disk storage is about 1/3 of the uncompressed size: + +```response +┌──────count─┬─formatReadableQuantity(sum(rows))─┬─disk_size──┬─uncompressed_size─┐ +│ 6739503568 │ 6.74 billion │ 501.10 GiB │ 1.51 TiB │ +└────────────┴───────────────────────────────────┴────────────┴───────────────────┘ + +1 row in set. Elapsed: 0.010 sec. +``` + +9. The following query shows how many comments, authors and subreddits we have for each month: + +```sql +SELECT + toStartOfMonth(created_utc) AS firstOfMonth, + count() AS c, + bar(c, 0, 50000000, 25) AS bar_count, + uniq(author) AS authors, + bar(authors, 0, 5000000, 25) AS bar_authors, + uniq(subreddit) AS subreddits, + bar(subreddits, 0, 100000, 25) AS bar_subreddits +FROM reddit +GROUP BY firstOfMonth +ORDER BY firstOfMonth ASC; +``` + +This is a substantial query that has to process all 6.74 billion rows, but we still get an impressive response time (about 3 minutes): + +```response +┌─firstOfMonth─┬─────────c─┬─bar_count─────────────────┬─authors─┬─bar_authors───────────────┬─subreddits─┬─bar_subreddits────────────┐ +│ 2005-12-01 │ 1075 │ │ 394 │ │ 1 │ │ +│ 2006-01-01 │ 3666 │ │ 791 │ │ 2 │ │ +│ 2006-02-01 │ 9095 │ │ 1464 │ │ 18 │ │ +│ 2006-03-01 │ 13859 │ │ 1958 │ │ 15 │ │ +│ 2006-04-01 │ 19090 │ │ 2334 │ │ 21 │ │ +│ 2006-05-01 │ 26859 │ │ 2698 │ │ 21 │ │ +│ 2006-06-01 │ 29163 │ │ 3043 │ │ 19 │ │ +│ 2006-07-01 │ 37031 │ │ 3532 │ │ 22 │ │ +│ 2006-08-01 │ 50559 │ │ 4750 │ │ 24 │ │ +│ 2006-09-01 │ 50675 │ │ 4908 │ │ 21 │ │ +│ 2006-10-01 │ 54148 │ │ 5654 │ │ 31 │ │ +│ 2006-11-01 │ 62021 │ │ 6490 │ │ 23 │ │ +│ 2006-12-01 │ 61018 │ │ 6707 │ │ 24 │ │ +│ 2007-01-01 │ 81341 │ │ 7931 │ │ 23 │ │ +│ 2007-02-01 │ 95634 │ │ 9020 │ │ 21 │ │ +│ 2007-03-01 │ 112444 │ │ 10842 │ │ 23 │ │ +│ 2007-04-01 │ 126773 │ │ 10701 │ │ 26 │ │ +│ 2007-05-01 │ 170097 │ │ 11365 │ │ 25 │ │ +│ 2007-06-01 │ 178800 │ │ 11267 │ │ 22 │ │ +│ 2007-07-01 │ 203319 │ │ 12482 │ │ 25 │ │ +│ 2007-08-01 │ 225111 │ │ 14124 │ │ 30 │ │ +│ 2007-09-01 │ 259497 │ ▏ │ 15416 │ │ 33 │ │ +│ 2007-10-01 │ 274170 │ ▏ │ 15302 │ │ 36 │ │ +│ 2007-11-01 │ 372983 │ ▏ │ 15134 │ │ 43 │ │ +│ 2007-12-01 │ 363390 │ ▏ │ 15915 │ │ 31 │ │ +│ 2008-01-01 │ 452990 │ ▏ │ 18857 │ │ 126 │ │ +│ 2008-02-01 │ 441768 │ ▏ │ 18266 │ │ 173 │ │ +│ 2008-03-01 │ 463728 │ ▏ │ 18947 │ │ 292 │ │ +│ 2008-04-01 │ 468317 │ ▏ │ 18590 │ │ 323 │ │ +│ 2008-05-01 │ 536380 │ ▎ │ 20861 │ │ 375 │ │ +│ 2008-06-01 │ 577684 │ ▎ │ 22557 │ │ 575 │ ▏ │ +│ 2008-07-01 │ 592610 │ ▎ │ 23123 │ │ 657 │ ▏ │ +│ 2008-08-01 │ 595959 │ ▎ │ 23729 │ │ 707 │ ▏ │ +│ 2008-09-01 │ 680892 │ ▎ │ 26374 │ ▏ │ 801 │ ▏ │ +│ 2008-10-01 │ 789874 │ ▍ │ 28970 │ ▏ │ 893 │ ▏ │ +│ 2008-11-01 │ 792310 │ ▍ │ 30272 │ ▏ │ 1024 │ ▎ │ +│ 2008-12-01 │ 850359 │ ▍ │ 34073 │ ▏ │ 1103 │ ▎ │ +│ 2009-01-01 │ 1051649 │ ▌ │ 38978 │ ▏ │ 1316 │ ▎ │ +│ 2009-02-01 │ 944711 │ ▍ │ 43390 │ ▏ │ 1132 │ ▎ │ +│ 2009-03-01 │ 1048643 │ ▌ │ 46516 │ ▏ │ 1203 │ ▎ │ +│ 2009-04-01 │ 1094599 │ ▌ │ 48284 │ ▏ │ 1334 │ ▎ │ +│ 2009-05-01 │ 1201257 │ ▌ │ 52512 │ ▎ │ 1395 │ ▎ │ +│ 2009-06-01 │ 1258750 │ ▋ │ 57728 │ ▎ │ 1473 │ ▎ │ +│ 2009-07-01 │ 1470290 │ ▋ │ 60098 │ ▎ │ 1686 │ ▍ │ +│ 2009-08-01 │ 1750688 │ ▉ │ 67347 │ ▎ │ 1777 │ ▍ │ +│ 2009-09-01 │ 2032276 │ █ │ 78051 │ ▍ │ 1784 │ ▍ │ +│ 2009-10-01 │ 2242017 │ █ │ 93409 │ ▍ │ 2071 │ ▌ │ +│ 2009-11-01 │ 2207444 │ █ │ 95940 │ ▍ │ 2141 │ ▌ │ +│ 2009-12-01 │ 2560510 │ █▎ │ 104239 │ ▌ │ 2141 │ ▌ │ +│ 2010-01-01 │ 2884096 │ █▍ │ 114314 │ ▌ │ 2313 │ ▌ │ +│ 2010-02-01 │ 2687779 │ █▎ │ 115683 │ ▌ │ 2522 │ ▋ │ +│ 2010-03-01 │ 3228254 │ █▌ │ 125775 │ ▋ │ 2890 │ ▋ │ +│ 2010-04-01 │ 3209898 │ █▌ │ 128936 │ ▋ │ 3170 │ ▊ │ +│ 2010-05-01 │ 3267363 │ █▋ │ 131851 │ ▋ │ 3166 │ ▊ │ +│ 2010-06-01 │ 3532867 │ █▊ │ 139522 │ ▋ │ 3301 │ ▊ │ +│ 2010-07-01 │ 4032737 │ ██ │ 153451 │ ▊ │ 3662 │ ▉ │ +│ 2010-08-01 │ 4247982 │ ██ │ 164071 │ ▊ │ 3653 │ ▉ │ +│ 2010-09-01 │ 4704069 │ ██▎ │ 186613 │ ▉ │ 4009 │ █ │ +│ 2010-10-01 │ 5032368 │ ██▌ │ 203800 │ █ │ 4154 │ █ │ +│ 2010-11-01 │ 5689002 │ ██▊ │ 226134 │ █▏ │ 4383 │ █ │ +│ 2010-12-01 │ 5972642 │ ██▉ │ 245824 │ █▏ │ 4692 │ █▏ │ +│ 2011-01-01 │ 6603329 │ ███▎ │ 270025 │ █▎ │ 5141 │ █▎ │ +│ 2011-02-01 │ 6363114 │ ███▏ │ 277593 │ █▍ │ 5202 │ █▎ │ +│ 2011-03-01 │ 7556165 │ ███▊ │ 314748 │ █▌ │ 5445 │ █▎ │ +│ 2011-04-01 │ 7571398 │ ███▊ │ 329920 │ █▋ │ 6128 │ █▌ │ +│ 2011-05-01 │ 8803949 │ ████▍ │ 365013 │ █▊ │ 6834 │ █▋ │ +│ 2011-06-01 │ 9766511 │ ████▉ │ 393945 │ █▉ │ 7519 │ █▉ │ +│ 2011-07-01 │ 10557466 │ █████▎ │ 424235 │ ██ │ 8293 │ ██ │ +│ 2011-08-01 │ 12316144 │ ██████▏ │ 475326 │ ██▍ │ 9657 │ ██▍ │ +│ 2011-09-01 │ 12150412 │ ██████ │ 503142 │ ██▌ │ 10278 │ ██▌ │ +│ 2011-10-01 │ 13470278 │ ██████▋ │ 548801 │ ██▋ │ 10922 │ ██▋ │ +│ 2011-11-01 │ 13621533 │ ██████▊ │ 574435 │ ██▊ │ 11572 │ ██▉ │ +│ 2011-12-01 │ 14509469 │ ███████▎ │ 622849 │ ███ │ 12335 │ ███ │ +│ 2012-01-01 │ 16350205 │ ████████▏ │ 696110 │ ███▍ │ 14281 │ ███▌ │ +│ 2012-02-01 │ 16015695 │ ████████ │ 722892 │ ███▌ │ 14949 │ ███▋ │ +│ 2012-03-01 │ 17881943 │ ████████▉ │ 789664 │ ███▉ │ 15795 │ ███▉ │ +│ 2012-04-01 │ 19044534 │ █████████▌ │ 842491 │ ████▏ │ 16440 │ ████ │ +│ 2012-05-01 │ 20388260 │ ██████████▏ │ 886176 │ ████▍ │ 16974 │ ████▏ │ +│ 2012-06-01 │ 21897913 │ ██████████▉ │ 946798 │ ████▋ │ 17952 │ ████▍ │ +│ 2012-07-01 │ 24087517 │ ████████████ │ 1018636 │ █████ │ 19069 │ ████▊ │ +│ 2012-08-01 │ 25703326 │ ████████████▊ │ 1094445 │ █████▍ │ 20553 │ █████▏ │ +│ 2012-09-01 │ 23419524 │ ███████████▋ │ 1088491 │ █████▍ │ 20831 │ █████▏ │ +│ 2012-10-01 │ 24788236 │ ████████████▍ │ 1131885 │ █████▋ │ 21868 │ █████▍ │ +│ 2012-11-01 │ 24648302 │ ████████████▎ │ 1167608 │ █████▊ │ 21791 │ █████▍ │ +│ 2012-12-01 │ 26080276 │ █████████████ │ 1218402 │ ██████ │ 22622 │ █████▋ │ +│ 2013-01-01 │ 30365867 │ ███████████████▏ │ 1341703 │ ██████▋ │ 24696 │ ██████▏ │ +│ 2013-02-01 │ 27213960 │ █████████████▌ │ 1304756 │ ██████▌ │ 24514 │ ██████▏ │ +│ 2013-03-01 │ 30771274 │ ███████████████▍ │ 1391703 │ ██████▉ │ 25730 │ ██████▍ │ +│ 2013-04-01 │ 33259557 │ ████████████████▋ │ 1485971 │ ███████▍ │ 27294 │ ██████▊ │ +│ 2013-05-01 │ 33126225 │ ████████████████▌ │ 1506473 │ ███████▌ │ 27299 │ ██████▊ │ +│ 2013-06-01 │ 32648247 │ ████████████████▎ │ 1506650 │ ███████▌ │ 27450 │ ██████▊ │ +│ 2013-07-01 │ 34922133 │ █████████████████▍ │ 1561771 │ ███████▊ │ 28294 │ ███████ │ +│ 2013-08-01 │ 34766579 │ █████████████████▍ │ 1589781 │ ███████▉ │ 28943 │ ███████▏ │ +│ 2013-09-01 │ 31990369 │ ███████████████▉ │ 1570342 │ ███████▊ │ 29408 │ ███████▎ │ +│ 2013-10-01 │ 35940040 │ █████████████████▉ │ 1683770 │ ████████▍ │ 30273 │ ███████▌ │ +│ 2013-11-01 │ 37396497 │ ██████████████████▋ │ 1757467 │ ████████▊ │ 31173 │ ███████▊ │ +│ 2013-12-01 │ 39810216 │ ███████████████████▉ │ 1846204 │ █████████▏ │ 32326 │ ████████ │ +│ 2014-01-01 │ 42420655 │ █████████████████████▏ │ 1927229 │ █████████▋ │ 35603 │ ████████▉ │ +│ 2014-02-01 │ 38703362 │ ███████████████████▎ │ 1874067 │ █████████▎ │ 37007 │ █████████▎ │ +│ 2014-03-01 │ 42459956 │ █████████████████████▏ │ 1959888 │ █████████▊ │ 37948 │ █████████▍ │ +│ 2014-04-01 │ 42440735 │ █████████████████████▏ │ 1951369 │ █████████▊ │ 38362 │ █████████▌ │ +│ 2014-05-01 │ 42514094 │ █████████████████████▎ │ 1970197 │ █████████▊ │ 39078 │ █████████▊ │ +│ 2014-06-01 │ 41990650 │ ████████████████████▉ │ 1943850 │ █████████▋ │ 38268 │ █████████▌ │ +│ 2014-07-01 │ 46868899 │ ███████████████████████▍ │ 2059346 │ ██████████▎ │ 40634 │ ██████████▏ │ +│ 2014-08-01 │ 46990813 │ ███████████████████████▍ │ 2117335 │ ██████████▌ │ 41764 │ ██████████▍ │ +│ 2014-09-01 │ 44992201 │ ██████████████████████▍ │ 2124708 │ ██████████▌ │ 41890 │ ██████████▍ │ +│ 2014-10-01 │ 47497520 │ ███████████████████████▋ │ 2206535 │ ███████████ │ 43109 │ ██████████▊ │ +│ 2014-11-01 │ 46118074 │ ███████████████████████ │ 2239747 │ ███████████▏ │ 43718 │ ██████████▉ │ +│ 2014-12-01 │ 48807699 │ ████████████████████████▍ │ 2372945 │ ███████████▊ │ 43823 │ ██████████▉ │ +│ 2015-01-01 │ 53851542 │ █████████████████████████ │ 2499536 │ ████████████▍ │ 47172 │ ███████████▊ │ +│ 2015-02-01 │ 48342747 │ ████████████████████████▏ │ 2448496 │ ████████████▏ │ 47229 │ ███████████▊ │ +│ 2015-03-01 │ 54564441 │ █████████████████████████ │ 2550534 │ ████████████▊ │ 48156 │ ████████████ │ +│ 2015-04-01 │ 55005780 │ █████████████████████████ │ 2609443 │ █████████████ │ 49865 │ ████████████▍ │ +│ 2015-05-01 │ 54504410 │ █████████████████████████ │ 2585535 │ ████████████▉ │ 50137 │ ████████████▌ │ +│ 2015-06-01 │ 54258492 │ █████████████████████████ │ 2595129 │ ████████████▉ │ 49598 │ ████████████▍ │ +│ 2015-07-01 │ 58451788 │ █████████████████████████ │ 2720026 │ █████████████▌ │ 55022 │ █████████████▊ │ +│ 2015-08-01 │ 58075327 │ █████████████████████████ │ 2743994 │ █████████████▋ │ 55302 │ █████████████▊ │ +│ 2015-09-01 │ 55574825 │ █████████████████████████ │ 2672793 │ █████████████▎ │ 53960 │ █████████████▍ │ +│ 2015-10-01 │ 59494045 │ █████████████████████████ │ 2816426 │ ██████████████ │ 70210 │ █████████████████▌ │ +│ 2015-11-01 │ 57117500 │ █████████████████████████ │ 2847146 │ ██████████████▏ │ 71363 │ █████████████████▊ │ +│ 2015-12-01 │ 58523312 │ █████████████████████████ │ 2854840 │ ██████████████▎ │ 94559 │ ███████████████████████▋ │ +│ 2016-01-01 │ 61991732 │ █████████████████████████ │ 2920366 │ ██████████████▌ │ 108438 │ █████████████████████████ │ +│ 2016-02-01 │ 59189875 │ █████████████████████████ │ 2854683 │ ██████████████▎ │ 109916 │ █████████████████████████ │ +│ 2016-03-01 │ 63918864 │ █████████████████████████ │ 2969542 │ ██████████████▊ │ 84787 │ █████████████████████▏ │ +│ 2016-04-01 │ 64271256 │ █████████████████████████ │ 2999086 │ ██████████████▉ │ 61647 │ ███████████████▍ │ +│ 2016-05-01 │ 65212004 │ █████████████████████████ │ 3034674 │ ███████████████▏ │ 67465 │ ████████████████▊ │ +│ 2016-06-01 │ 65867743 │ █████████████████████████ │ 3057604 │ ███████████████▎ │ 75170 │ ██████████████████▊ │ +│ 2016-07-01 │ 66974735 │ █████████████████████████ │ 3199374 │ ███████████████▉ │ 77732 │ ███████████████████▍ │ +│ 2016-08-01 │ 69654819 │ █████████████████████████ │ 3239957 │ ████████████████▏ │ 63080 │ ███████████████▊ │ +│ 2016-09-01 │ 67024973 │ █████████████████████████ │ 3190864 │ ███████████████▉ │ 62324 │ ███████████████▌ │ +│ 2016-10-01 │ 71826553 │ █████████████████████████ │ 3284340 │ ████████████████▍ │ 62549 │ ███████████████▋ │ +│ 2016-11-01 │ 71022319 │ █████████████████████████ │ 3300822 │ ████████████████▌ │ 69718 │ █████████████████▍ │ +│ 2016-12-01 │ 72942967 │ █████████████████████████ │ 3430324 │ █████████████████▏ │ 71705 │ █████████████████▉ │ +│ 2017-01-01 │ 78946585 │ █████████████████████████ │ 3572093 │ █████████████████▊ │ 78198 │ ███████████████████▌ │ +│ 2017-02-01 │ 70609487 │ █████████████████████████ │ 3421115 │ █████████████████ │ 69823 │ █████████████████▍ │ +│ 2017-03-01 │ 79723106 │ █████████████████████████ │ 3638122 │ ██████████████████▏ │ 73865 │ ██████████████████▍ │ +│ 2017-04-01 │ 77478009 │ █████████████████████████ │ 3620591 │ ██████████████████ │ 74387 │ ██████████████████▌ │ +│ 2017-05-01 │ 79810360 │ █████████████████████████ │ 3650820 │ ██████████████████▎ │ 74356 │ ██████████████████▌ │ +│ 2017-06-01 │ 79901711 │ █████████████████████████ │ 3737614 │ ██████████████████▋ │ 72114 │ ██████████████████ │ +│ 2017-07-01 │ 81798725 │ █████████████████████████ │ 3872330 │ ███████████████████▎ │ 76052 │ ███████████████████ │ +│ 2017-08-01 │ 84658503 │ █████████████████████████ │ 3960093 │ ███████████████████▊ │ 77798 │ ███████████████████▍ │ +│ 2017-09-01 │ 83165192 │ █████████████████████████ │ 3880501 │ ███████████████████▍ │ 78402 │ ███████████████████▌ │ +│ 2017-10-01 │ 85828912 │ █████████████████████████ │ 3980335 │ ███████████████████▉ │ 80685 │ ████████████████████▏ │ +│ 2017-11-01 │ 84965681 │ █████████████████████████ │ 4026749 │ ████████████████████▏ │ 82659 │ ████████████████████▋ │ +│ 2017-12-01 │ 85973810 │ █████████████████████████ │ 4196354 │ ████████████████████▉ │ 91984 │ ██████████████████████▉ │ +│ 2018-01-01 │ 91558594 │ █████████████████████████ │ 4364443 │ █████████████████████▊ │ 102577 │ █████████████████████████ │ +│ 2018-02-01 │ 86467179 │ █████████████████████████ │ 4277899 │ █████████████████████▍ │ 104610 │ █████████████████████████ │ +│ 2018-03-01 │ 96490262 │ █████████████████████████ │ 4422470 │ ██████████████████████ │ 112559 │ █████████████████████████ │ +│ 2018-04-01 │ 98101232 │ █████████████████████████ │ 4572434 │ ██████████████████████▊ │ 105284 │ █████████████████████████ │ +│ 2018-05-01 │ 100109100 │ █████████████████████████ │ 4698908 │ ███████████████████████▍ │ 103910 │ █████████████████████████ │ +│ 2018-06-01 │ 100009462 │ █████████████████████████ │ 4697426 │ ███████████████████████▍ │ 101107 │ █████████████████████████ │ +│ 2018-07-01 │ 108151359 │ █████████████████████████ │ 5099492 │ █████████████████████████ │ 106184 │ █████████████████████████ │ +│ 2018-08-01 │ 107330940 │ █████████████████████████ │ 5084082 │ █████████████████████████ │ 109985 │ █████████████████████████ │ +│ 2018-09-01 │ 104473929 │ █████████████████████████ │ 5011953 │ █████████████████████████ │ 109710 │ █████████████████████████ │ +│ 2018-10-01 │ 112346556 │ █████████████████████████ │ 5320405 │ █████████████████████████ │ 112533 │ █████████████████████████ │ +│ 2018-11-01 │ 112573001 │ █████████████████████████ │ 5353282 │ █████████████████████████ │ 112211 │ █████████████████████████ │ +│ 2018-12-01 │ 121953600 │ █████████████████████████ │ 5611543 │ █████████████████████████ │ 118291 │ █████████████████████████ │ +│ 2019-01-01 │ 129386587 │ █████████████████████████ │ 6016687 │ █████████████████████████ │ 125725 │ █████████████████████████ │ +│ 2019-02-01 │ 120645639 │ █████████████████████████ │ 5974488 │ █████████████████████████ │ 125420 │ █████████████████████████ │ +│ 2019-03-01 │ 137650471 │ █████████████████████████ │ 6410197 │ █████████████████████████ │ 135924 │ █████████████████████████ │ +│ 2019-04-01 │ 138473643 │ █████████████████████████ │ 6416384 │ █████████████████████████ │ 139844 │ █████████████████████████ │ +│ 2019-05-01 │ 142463421 │ █████████████████████████ │ 6574836 │ █████████████████████████ │ 142012 │ █████████████████████████ │ +│ 2019-06-01 │ 134172939 │ █████████████████████████ │ 6601267 │ █████████████████████████ │ 140997 │ █████████████████████████ │ +│ 2019-07-01 │ 145965083 │ █████████████████████████ │ 6901822 │ █████████████████████████ │ 147802 │ █████████████████████████ │ +│ 2019-08-01 │ 146854393 │ █████████████████████████ │ 6993882 │ █████████████████████████ │ 151888 │ █████████████████████████ │ +│ 2019-09-01 │ 137540219 │ █████████████████████████ │ 7001362 │ █████████████████████████ │ 148839 │ █████████████████████████ │ +│ 2019-10-01 │ 129771456 │ █████████████████████████ │ 6825690 │ █████████████████████████ │ 144453 │ █████████████████████████ │ +│ 2019-11-01 │ 107990259 │ █████████████████████████ │ 6368286 │ █████████████████████████ │ 141768 │ █████████████████████████ │ +│ 2019-12-01 │ 112895934 │ █████████████████████████ │ 6640902 │ █████████████████████████ │ 148277 │ █████████████████████████ │ +│ 2020-01-01 │ 54354879 │ █████████████████████████ │ 4782339 │ ███████████████████████▉ │ 111658 │ █████████████████████████ │ +│ 2020-02-01 │ 22696923 │ ███████████▎ │ 3135175 │ ███████████████▋ │ 79521 │ ███████████████████▉ │ +│ 2020-03-01 │ 3466677 │ █▋ │ 987960 │ ████▉ │ 40901 │ ██████████▏ │ +└──────────────┴───────────┴───────────────────────────┴─────────┴───────────────────────────┴────────────┴───────────────────────────┘ + +172 rows in set. Elapsed: 184.809 sec. Processed 6.74 billion rows, 89.56 GB (36.47 million rows/s., 484.62 MB/s.) +``` + +10. Here are the top 10 subreddits of 2022: + +```sql +SELECT + subreddit, + count() AS count +FROM reddit +WHERE toYear(created_utc) = 2022 +GROUP BY subreddit +ORDER BY count DESC +LIMIT 10; +``` + +The response is: + +```response +┌─subreddit────────┬───count─┐ +│ AskReddit │ 3858203 │ +│ politics │ 1356782 │ +│ memes │ 1249120 │ +│ nfl │ 883667 │ +│ worldnews │ 866065 │ +│ teenagers │ 777095 │ +│ AmItheAsshole │ 752720 │ +│ dankmemes │ 657932 │ +│ nba │ 514184 │ +│ unpopularopinion │ 473649 │ +└──────────────────┴─────────┘ + +10 rows in set. Elapsed: 27.824 sec. Processed 6.74 billion rows, 53.26 GB (242.22 million rows/s., 1.91 GB/s.) +``` + +11. Let's see which subreddits had the biggest increase in commnents from 2018 to 2019: + +```sql +SELECT + subreddit, + newcount - oldcount AS diff +FROM +( + SELECT + subreddit, + count(*) AS newcount + FROM reddit + WHERE toYear(created_utc) = 2019 + GROUP BY subreddit +) +ALL INNER JOIN +( + SELECT + subreddit, + count(*) AS oldcount + FROM reddit + WHERE toYear(created_utc) = 2018 + GROUP BY subreddit +) USING (subreddit) +ORDER BY diff DESC +LIMIT 50 +SETTINGS joined_subquery_requires_alias = 0; +``` + +It looks like memes and teenagers were busy on Reddit in 2019: + +```response +┌─subreddit────────────┬─────diff─┐ +│ memes │ 15368369 │ +│ AskReddit │ 14663662 │ +│ teenagers │ 12266991 │ +│ AmItheAsshole │ 11561538 │ +│ dankmemes │ 11305158 │ +│ unpopularopinion │ 6332772 │ +│ PewdiepieSubmissions │ 5930818 │ +│ Market76 │ 5014668 │ +│ relationship_advice │ 3776383 │ +│ freefolk │ 3169236 │ +│ Minecraft │ 3160241 │ +│ classicwow │ 2907056 │ +│ Animemes │ 2673398 │ +│ gameofthrones │ 2402835 │ +│ PublicFreakout │ 2267605 │ +│ ShitPostCrusaders │ 2207266 │ +│ RoastMe │ 2195715 │ +│ gonewild │ 2148649 │ +│ AnthemTheGame │ 1803818 │ +│ entitledparents │ 1706270 │ +│ MortalKombat │ 1679508 │ +│ Cringetopia │ 1620555 │ +│ pokemon │ 1615266 │ +│ HistoryMemes │ 1608289 │ +│ Brawlstars │ 1574977 │ +│ iamatotalpieceofshit │ 1558315 │ +│ trashy │ 1518549 │ +│ ChapoTrapHouse │ 1505748 │ +│ Pikabu │ 1501001 │ +│ Showerthoughts │ 1475101 │ +│ cursedcomments │ 1465607 │ +│ ukpolitics │ 1386043 │ +│ wallstreetbets │ 1384431 │ +│ interestingasfuck │ 1378900 │ +│ wholesomememes │ 1353333 │ +│ AskOuija │ 1233263 │ +│ borderlands3 │ 1197192 │ +│ aww │ 1168257 │ +│ insanepeoplefacebook │ 1155473 │ +│ FortniteCompetitive │ 1122778 │ +│ EpicSeven │ 1117380 │ +│ FreeKarma4U │ 1116423 │ +│ YangForPresidentHQ │ 1086700 │ +│ SquaredCircle │ 1044089 │ +│ MurderedByWords │ 1042511 │ +│ AskMen │ 1024434 │ +│ thedivision │ 1016634 │ +│ barstoolsports │ 985032 │ +│ nfl │ 978340 │ +│ BattlefieldV │ 971408 │ +└──────────────────────┴──────────┘ + +50 rows in set. Elapsed: 65.954 sec. Processed 13.48 billion rows, 79.67 GB (204.37 million rows/s., 1.21 GB/s.) +``` + +12. One more query: let's compare ClickHouse mentions to other technologies like Snowflake and Postgres. This query is a big one because it has to search all the comments three times for a substring, and unfortunately ClickHouse user are obviously not very active on Reddit yet: + +```sql +SELECT + toStartOfQuarter(created_utc) AS quarter, + sum(if(positionCaseInsensitive(body, 'clickhouse') > 0, 1, 0)) AS clickhouse, + sum(if(positionCaseInsensitive(body, 'snowflake') > 0, 1, 0)) AS snowflake, + sum(if(positionCaseInsensitive(body, 'postgres') > 0, 1, 0)) AS postgres +FROM reddit +GROUP BY quarter +ORDER BY quarter ASC; +``` + +```response +┌────Quarter─┬─clickhouse─┬─snowflake─┬─postgres─┐ +│ 2005-10-01 │ 0 │ 0 │ 0 │ +│ 2006-01-01 │ 0 │ 2 │ 23 │ +│ 2006-04-01 │ 0 │ 2 │ 24 │ +│ 2006-07-01 │ 0 │ 4 │ 13 │ +│ 2006-10-01 │ 0 │ 23 │ 73 │ +│ 2007-01-01 │ 0 │ 14 │ 91 │ +│ 2007-04-01 │ 0 │ 10 │ 59 │ +│ 2007-07-01 │ 0 │ 39 │ 116 │ +│ 2007-10-01 │ 0 │ 45 │ 125 │ +│ 2008-01-01 │ 0 │ 53 │ 234 │ +│ 2008-04-01 │ 0 │ 79 │ 303 │ +│ 2008-07-01 │ 0 │ 102 │ 174 │ +│ 2008-10-01 │ 0 │ 156 │ 323 │ +│ 2009-01-01 │ 0 │ 206 │ 208 │ +│ 2009-04-01 │ 0 │ 178 │ 417 │ +│ 2009-07-01 │ 0 │ 300 │ 295 │ +│ 2009-10-01 │ 0 │ 633 │ 589 │ +│ 2010-01-01 │ 0 │ 555 │ 501 │ +│ 2010-04-01 │ 0 │ 587 │ 469 │ +│ 2010-07-01 │ 0 │ 770 │ 821 │ +│ 2010-10-01 │ 0 │ 1480 │ 550 │ +│ 2011-01-01 │ 0 │ 1482 │ 568 │ +│ 2011-04-01 │ 0 │ 1558 │ 406 │ +│ 2011-07-01 │ 0 │ 2163 │ 628 │ +│ 2011-10-01 │ 0 │ 4064 │ 566 │ +│ 2012-01-01 │ 0 │ 4621 │ 662 │ +│ 2012-04-01 │ 0 │ 5737 │ 785 │ +│ 2012-07-01 │ 0 │ 6097 │ 1127 │ +│ 2012-10-01 │ 0 │ 7986 │ 600 │ +│ 2013-01-01 │ 0 │ 9704 │ 839 │ +│ 2013-04-01 │ 0 │ 8161 │ 853 │ +│ 2013-07-01 │ 0 │ 9704 │ 1028 │ +│ 2013-10-01 │ 0 │ 12879 │ 1404 │ +│ 2014-01-01 │ 0 │ 12317 │ 1548 │ +│ 2014-04-01 │ 0 │ 13181 │ 1577 │ +│ 2014-07-01 │ 0 │ 15640 │ 1710 │ +│ 2014-10-01 │ 0 │ 19479 │ 1959 │ +│ 2015-01-01 │ 0 │ 20411 │ 2104 │ +│ 2015-04-01 │ 1 │ 20309 │ 9112 │ +│ 2015-07-01 │ 0 │ 20325 │ 4771 │ +│ 2015-10-01 │ 0 │ 25087 │ 3030 │ +│ 2016-01-01 │ 0 │ 23462 │ 3126 │ +│ 2016-04-01 │ 3 │ 25496 │ 2757 │ +│ 2016-07-01 │ 4 │ 28233 │ 2928 │ +│ 2016-10-01 │ 2 │ 45445 │ 2449 │ +│ 2017-01-01 │ 9 │ 76019 │ 2808 │ +│ 2017-04-01 │ 9 │ 67919 │ 2803 │ +│ 2017-07-01 │ 13 │ 68974 │ 2771 │ +│ 2017-10-01 │ 12 │ 69730 │ 2906 │ +│ 2018-01-01 │ 17 │ 67476 │ 3152 │ +│ 2018-04-01 │ 3 │ 67139 │ 3986 │ +│ 2018-07-01 │ 14 │ 67979 │ 3609 │ +│ 2018-10-01 │ 28 │ 74147 │ 3850 │ +│ 2019-01-01 │ 14 │ 80250 │ 4305 │ +│ 2019-04-01 │ 30 │ 70307 │ 3872 │ +│ 2019-07-01 │ 33 │ 77149 │ 4164 │ +│ 2019-10-01 │ 13 │ 76746 │ 3541 │ +│ 2020-01-01 │ 16 │ 54475 │ 846 │ +└────────────┴────────────┴───────────┴──────────┘ + +58 rows in set. Elapsed: 2663.751 sec. Processed 6.74 billion rows, 1.21 TB (2.53 million rows/s., 454.37 MB/s.) +``` \ No newline at end of file diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 308e3197ad7..3f6c2577c94 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -143,8 +143,9 @@ You can also download and install packages manually from [here](https://packages #### Install standalone ClickHouse Keeper :::tip -If you are going to run ClickHouse Keeper on the same server as ClickHouse server you -do not need to install ClickHouse Keeper as it is included with ClickHouse server. This command is only needed on standalone ClickHouse Keeper servers. +In production environment we [strongly recommend](/docs/en/operations/tips.md#L143-L144) running ClickHouse Keeper on dedicated nodes. +In test environments, if you decide to run ClickHouse Server and ClickHouse Keeper on the same server, you do not need to install ClickHouse Keeper as it is included with ClickHouse server. +This command is only needed on standalone ClickHouse Keeper servers. ::: ```bash @@ -211,8 +212,9 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password. #### Install standalone ClickHouse Keeper :::tip -If you are going to run ClickHouse Keeper on the same server as ClickHouse server you -do not need to install ClickHouse Keeper as it is included with ClickHouse server. This command is only needed on standalone ClickHouse Keeper servers. +In production environment we [strongly recommend](/docs/en/operations/tips.md#L143-L144) running ClickHouse Keeper on dedicated nodes. +In test environments, if you decide to run ClickHouse Server and ClickHouse Keeper on the same server, you do not need to install ClickHouse Keeper as it is included with ClickHouse server. +This command is only needed on standalone ClickHouse Keeper servers. ::: ```bash diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index a31a52f509e..6da61833c12 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -30,7 +30,7 @@ description: In order to effectively mitigate possible human errors, you should ``` :::note ALL -`ALL` is only applicable to the `RESTORE` command. +`ALL` is only applicable to the `RESTORE` command prior to version 23.4 of Clickhouse. ::: ## Background diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 3e3cd89a9e0..e3ca04f5b9b 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1045,7 +1045,7 @@ Default value: `0`. ## background_pool_size {#background_pool_size} -Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance. +Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance. Before changing it, please also take a look at related MergeTree settings, such as [number_of_free_entries_in_pool_to_lower_max_size_of_merge](../../operations/settings/merge-tree-settings.md#number-of-free-entries-in-pool-to-lower-max-size-of-merge) and [number_of_free_entries_in_pool_to_execute_mutation](../../operations/settings/merge-tree-settings.md#number-of-free-entries-in-pool-to-execute-mutation). @@ -1063,8 +1063,8 @@ Default value: 16. ## background_merges_mutations_concurrency_ratio {#background_merges_mutations_concurrency_ratio} -Sets a ratio between the number of threads and the number of background merges and mutations that can be executed concurrently. For example if the ratio equals to 2 and -`background_pool_size` is set to 16 then ClickHouse can execute 32 background merges concurrently. This is possible, because background operation could be suspended and postponed. This is needed to give small merges more execution priority. You can only increase this ratio at runtime. To lower it you have to restart the server. +Sets a ratio between the number of threads and the number of background merges and mutations that can be executed concurrently. For example, if the ratio equals to 2 and +`background_pool_size` is set to 16 then ClickHouse can execute 32 background merges concurrently. This is possible, because background operations could be suspended and postponed. This is needed to give small merges more execution priority. You can only increase this ratio at runtime. To lower it you have to restart the server. The same as for `background_pool_size` setting `background_merges_mutations_concurrency_ratio` could be applied from the `default` profile for backward compatibility. Possible values: @@ -1079,6 +1079,33 @@ Default value: 2. 3 ``` +## merges_mutations_memory_usage_soft_limit {#merges_mutations_memory_usage_soft_limit} + +Sets the limit on how much RAM is allowed to use for performing merge and mutation operations. +Zero means unlimited. +If ClickHouse reaches this limit, it won't schedule any new background merge or mutation operations but will continue to execute already scheduled tasks. + +Possible values: + +- Any positive integer. + +**Example** + +```xml +0 +``` + +## merges_mutations_memory_usage_to_ram_ratio {#merges_mutations_memory_usage_to_ram_ratio} + +The default `merges_mutations_memory_usage_soft_limit` value is calculated as `memory_amount * merges_mutations_memory_usage_to_ram_ratio`. + +Default value: `0.5`. + +**See also** + +- [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) +- [merges_mutations_memory_usage_soft_limit](#merges_mutations_memory_usage_soft_limit) + ## background_merges_mutations_scheduling_policy {#background_merges_mutations_scheduling_policy} Algorithm used to select next merge or mutation to be executed by background thread pool. Policy may be changed at runtime without server restart. diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index 9f6fe87ae5f..1f41eafd02e 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -38,6 +38,10 @@ Structure of the `users` section: + + + GRANT SELECT ON system.* + @@ -86,6 +90,28 @@ Possible values: Default value: 0. +### grants {#grants-user-setting} + +This setting allows to grant any rights to selected user. +Each element of the list should be `GRANT` query without any grantees specified. + +Example: + +```xml + + + GRANT SHOW ON *.* + GRANT CREATE ON *.* WITH GRANT OPTION + GRANT SELECT ON system.* + + +``` + +This setting can't be specified at the same time with +`dictionaries`, `access_management`, `named_collection_control`, `show_named_collections_secrets` +and `allow_databases` settings. + + ### user_name/networks {#user-namenetworks} List of networks from which the user can connect to the ClickHouse server. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index f21dff9fbb7..cddde2090f8 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -608,6 +608,17 @@ See also: - [JOIN strictness](../../sql-reference/statements/select/join.md/#join-settings) +## max_rows_in_set_to_optimize_join + +Maximal size of the set to filter joined tables by each other's row sets before joining. + +Possible values: + +- 0 — Disable. +- Any positive integer. + +Default value: 100000. + ## temporary_files_codec {#temporary_files_codec} Sets compression codec for temporary files used in sorting and joining operations on disk. @@ -1125,6 +1136,12 @@ If unsuccessful, several attempts are made to connect to various replicas. Default value: 1000. +## connect_timeout_with_failover_secure_ms + +Connection timeout for selecting first healthy replica (for secure connections) + +Default value: 1000. + ## connection_pool_max_wait_ms {#connection-pool-max-wait-ms} The wait time in milliseconds for a connection when the connection pool is full. @@ -1410,8 +1427,8 @@ and [enable_writes_to_query_cache](#enable-writes-to-query-cache) control in mor Possible values: -- 0 - Yes -- 1 - No +- 0 - Disabled +- 1 - Enabled Default value: `0`. @@ -1630,7 +1647,7 @@ For not replicated tables see [non_replicated_deduplication_window](merge-tree-s ### async_insert {#async-insert} -Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts. +Enables or disables asynchronous inserts. Note that deduplication is disabled by default, see [async_insert_deduplicate](#async-insert-deduplicate). If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables. @@ -3562,7 +3579,7 @@ Default value: `1`. If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays. -## allow_experimental_projection_optimization {#allow-experimental-projection-optimization} +## optimize_use_projections {#optimize_use_projections} Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md/#projections) optimization when processing `SELECT` queries. @@ -3575,7 +3592,7 @@ Default value: `1`. ## force_optimize_projection {#force-optimize-projection} -Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting). +Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [optimize_use_projections](#optimize_use_projections) setting). Possible values: diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index ac6ea22ab75..5804ad8545b 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -172,7 +172,9 @@ Example of configuration for versions earlier than 22.8: ``` -Cache **configuration settings**: +File Cache **disk configuration settings**: + +These settings should be defined in the disk configuration section. - `path` - path to the directory with cache. Default: None, this setting is obligatory. @@ -182,7 +184,7 @@ Cache **configuration settings**: - `enable_filesystem_query_cache_limit` - allow to limit the size of cache which is downloaded within each query (depends on user setting `max_query_cache_size`). Default: `false`. -- `enable_cache_hits_threshold` - a number, which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it. +- `enable_cache_hits_threshold` - number which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it. - `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading. @@ -190,21 +192,23 @@ Cache **configuration settings**: - `max_elements` - a limit for a number of cache files. Default: `1048576`. -Cache **query settings**: +File Cache **query/profile settings**: + +Some of these settings will disable cache features per query/profile that are enabled by default or in disk configuration settings. For example, you can enable cache in disk configuration and disable it per query/profile setting `enable_filesystem_cache` to `false`. Also setting `cache_on_write_operations` to `true` in disk configuration means that "write-though" cache is enabled. But if you need to disable this general setting per specific queries then setting `enable_filesystem_cache_on_write_operations` to `false` means that write operations cache will be disabled for a specific query/profile. - `enable_filesystem_cache` - allows to disable cache per query even if storage policy was configured with `cache` disk type. Default: `true`. - `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` - allows to use cache in query only if it already exists, otherwise query data will not be written to local cache storage. Default: `false`. -- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. +- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. Default: `false`. -- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. Default: `false`. +- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. It can be turn on for specific queries or enabled in a profile. Default: `false`. - `max_query_cache_size` - a limit for the cache size, which can be written to local cache storage. Requires enabled `enable_filesystem_query_cache_limit` in cache configuration. Default: `false`. -- `skip_download_if_exceeds_query_cache` - allows to change the behaviour of setting `max_query_cache_size`. Default: `true`. If this setting is turned on and cache download limit during query was reached, no more cache will be downloaded to cache storage. If this setting is turned off and cache download limit during query was reached, cache will still be written by cost of evicting previously downloaded (within current query) data, e.g. second behaviour allows to preserve `last recentltly used` behaviour while keeping query cache limit. +- `skip_download_if_exceeds_query_cache` - allows to change the behaviour of setting `max_query_cache_size`. Default: `true`. If this setting is turned on and cache download limit during query was reached, no more cache will be downloaded to cache storage. If this setting is turned off and cache download limit during query was reached, cache will still be written by cost of evicting previously downloaded (within current query) data, e.g. second behaviour allows to preserve `last recently used` behaviour while keeping query cache limit. -** Warning ** +**Warning** Cache configuration settings and cache query settings correspond to the latest ClickHouse version, for earlier versions something might not be supported. Cache **system tables**: @@ -215,7 +219,7 @@ Cache **system tables**: Cache **commands**: -- `SYSTEM DROP FILESYSTEM CACHE () (ON CLUSTER)` +- `SYSTEM DROP FILESYSTEM CACHE () (ON CLUSTER)` -- `ON CLUSTER` is only supported when no `` is provided - `SHOW FILESYSTEM CACHES` -- show list of filesystem caches which were configured on the server. (For versions <= `22.8` the command is named `SHOW CACHES`) @@ -231,10 +235,10 @@ Result: └───────────┘ ``` -- `DESCRIBE CACHE ''` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`) +- `DESCRIBE FILESYSTEM CACHE ''` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW FILESYSTEM CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`) ```sql -DESCRIBE CACHE 's3_cache' +DESCRIBE FILESYSTEM CACHE 's3_cache' ``` ``` text diff --git a/docs/en/operations/system-tables/row_policies.md b/docs/en/operations/system-tables/row_policies.md index 2c4d060ce66..e92ba1ece74 100644 --- a/docs/en/operations/system-tables/row_policies.md +++ b/docs/en/operations/system-tables/row_policies.md @@ -12,7 +12,7 @@ Columns: - `database` ([String](../../sql-reference/data-types/string.md)) — Database name. -- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. Empty if policy for database. - `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Row policy ID. diff --git a/docs/en/sql-reference/aggregate-functions/reference/first_value.md b/docs/en/sql-reference/aggregate-functions/reference/first_value.md new file mode 100644 index 00000000000..e163bd62a45 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/first_value.md @@ -0,0 +1,55 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/first_value +sidebar_position: 7 +--- + +# first_value + +Selects the first encountered value, similar to `any`, but could accept NULL. + +## examples + +```sql +insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null) +``` + +### example1 +The NULL value is ignored at default. +```sql +select first_value(b) from test_data +``` + +```text +┌─first_value_ignore_nulls(b)─┐ +│ 3 │ +└─────────────────────────────┘ + +``` + +### example2 +The NULL value is ignored. +```sql +select first_value(b) ignore nulls sfrom test_data +``` + +```text +┌─first_value_ignore_nulls(b)─┐ +│ 3 │ +└─────────────────────────────┘ + +``` + +### example3 +The NULL value is accepted. +```sql +select first_value(b) respect nulls from test_data +``` + +```text + +┌─first_value_respect_nulls(b)─┐ +│ ᴺᵁᴸᴸ │ +└──────────────────────────────┘ +``` + + diff --git a/docs/en/sql-reference/aggregate-functions/reference/greatest.md b/docs/en/sql-reference/aggregate-functions/reference/greatest.md new file mode 100644 index 00000000000..d5efea44790 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/greatest.md @@ -0,0 +1,48 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/greatest +title: greatest +--- + +Aggregate function that returns the greatest across a list of values. All of the list members must be of comparable types. + +Examples: + +```sql +SELECT + toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.)), + greatest(1, 2, toUInt8(3), 3.) +``` +```response +┌─toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.))─┬─greatest(1, 2, toUInt8(3), 3.)─┐ +│ Float64 │ 3 │ +└─────────────────────────────────────────────────────┴────────────────────────────────┘ +``` + +:::note +The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison. +::: + +```sql +SELECT greatest(['hello'], ['there'], ['world']) +``` +```response +┌─greatest(['hello'], ['there'], ['world'])─┐ +│ ['world'] │ +└───────────────────────────────────────────┘ +``` + +```sql +SELECT greatest(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3)) +``` +```response +┌─greatest(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐ +│ 2023-05-12 01:16:59.000 │ +└──---──────────────────────────────────────────────────────────────────────────┘ +``` + +:::note +The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison. +::: + +Also see [least](/docs/en/sql-reference/aggregate-functions/reference/least.md). + diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index e25e3a54356..50208352f38 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -26,6 +26,8 @@ ClickHouse-specific aggregate functions: - [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md) - [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md) +- [first_value](../../../sql-reference/aggregate-functions/reference/first_value.md) +- [last_value](../../../sql-reference/aggregate-functions/reference/last_value.md) - [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md) - [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md) - [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/last_value.md b/docs/en/sql-reference/aggregate-functions/reference/last_value.md new file mode 100644 index 00000000000..ebf002e6ae2 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/last_value.md @@ -0,0 +1,53 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/last_value +sidebar_position: 8 +--- + +# first_value + +Selects the last encountered value, similar to `anyLast`, but could accept NULL. + + +## examples + +```sql +insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null) +``` + +### example1 +The NULL value is ignored at default. +```sql +select last_value(b) from test_data +``` + +```text +┌─last_value_ignore_nulls(b)─┐ +│ 5 │ +└────────────────────────────┘ +``` + +### example2 +The NULL value is ignored. +```sql +select last_value(b) ignore nulls from test_data +``` + +```text +┌─last_value_ignore_nulls(b)─┐ +│ 5 │ +└────────────────────────────┘ +``` + +### example3 +The NULL value is accepted. +```sql +select last_value(b) respect nulls from test_data +``` + +```text +┌─last_value_respect_nulls(b)─┐ +│ ᴺᵁᴸᴸ │ +└─────────────────────────────┘ +``` + + diff --git a/docs/en/sql-reference/aggregate-functions/reference/least.md b/docs/en/sql-reference/aggregate-functions/reference/least.md new file mode 100644 index 00000000000..ae4b1d43182 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/least.md @@ -0,0 +1,48 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/least +title: least +--- + +Aggregate function that returns the least across a list of values. All of the list members must be of comparable types. + +Examples: + +```sql +SELECT + toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.)), + least(1, 2, toUInt8(3), 3.) +``` +```response +┌─toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.))─┬─least(1, 2, toUInt8(3), 3.)─┐ +│ Float64 │ 1 │ +└──────────────────────────────────────────────────┴─────────────────────────────┘ +``` + +:::note +The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison. +::: + +```sql +SELECT least(['hello'], ['there'], ['world']) +``` +```response +┌─least(['hello'], ['there'], ['world'])─┐ +│ ['hello'] │ +└────────────────────────────────────────┘ +``` + +```sql +SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3)) +``` +```response +┌─least(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐ +│ 2023-05-12 01:16:59.000 │ +└────────────────────────────────────────────────────────────────────────────┘ +``` + +:::note +The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison. +::: + +Also see [greatest](/docs/en/sql-reference/aggregate-functions/reference/greatest.md). + diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 707acbda760..20ce7d2ed52 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -46,8 +46,6 @@ SELECT [1, 2] AS x, toTypeName(x) ## Working with Data Types -The maximum size of an array is limited to one million elements. - When creating an array on the fly, ClickHouse automatically defines the argument type as the narrowest data type that can store all the listed arguments. If there are any [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) or literal [NULL](../../sql-reference/syntax.md#null-literal) values, the type of an array element also becomes [Nullable](../../sql-reference/data-types/nullable.md). If ClickHouse couldn’t determine the data type, it generates an exception. For instance, this happens when trying to create an array with strings and numbers simultaneously (`SELECT array(1, 'a')`). diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md index c89c2e78752..bedbcf0bd28 100644 --- a/docs/en/sql-reference/data-types/special-data-types/interval.md +++ b/docs/en/sql-reference/data-types/special-data-types/interval.md @@ -8,10 +8,6 @@ sidebar_label: Interval The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator. -:::note -`Interval` data type values can’t be stored in tables. -::: - Structure: - Time interval as an unsigned integer value. @@ -19,6 +15,9 @@ Structure: Supported interval types: +- `NANOSECOND` +- `MICROSECOND` +- `MILLISECOND` - `SECOND` - `MINUTE` - `HOUR` diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 74ab7e3c948..5801b7866cb 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -2218,8 +2218,6 @@ LAYOUT(regexp_tree) ... ``` -We only allow `YAMLRegExpTree` to work with regexp_tree dicitionary layout. If you want to use other sources, please set variable `regexp_dict_allow_other_sources` true. - **Source** We introduce a type of source called `YAMLRegExpTree` representing the structure of Regexp Tree dictionary. An Example of a valid yaml config is like: diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 599b64ef43f..4710a5617c3 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1218,12 +1218,16 @@ Rounds the time to the half hour. Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant. -### example -```sql +**Example** + +``` sql SELECT toYYYYMM(now(), 'US/Eastern') ``` -```response + +Result: + +``` text ┌─toYYYYMM(now(), 'US/Eastern')─┐ │ 202303 │ └───────────────────────────────┘ @@ -1233,11 +1237,15 @@ SELECT Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant. -### example +**Example** + ```sql SELECT toYYYYMMDD(now(), 'US/Eastern') ``` + +Result: + ```response ┌─toYYYYMMDD(now(), 'US/Eastern')─┐ │ 20230302 │ @@ -1248,11 +1256,15 @@ SELECT Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant. -### example +**Example** + ```sql SELECT toYYYYMMDDhhmmss(now(), 'US/Eastern') ``` + +Result: + ```response ┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐ │ 20230302112209 │ diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index fe842732b89..635c8f4e0ec 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -279,6 +279,8 @@ cityHash64(par1,...) This is a fast non-cryptographic hash function. It uses the CityHash algorithm for string parameters and implementation-specific fast non-cryptographic hash function for parameters with other data types. The function uses the CityHash combinator to get the final results. +Note that Google changed the algorithm of CityHash after it has been added to ClickHouse. In other words, ClickHouse's cityHash64 and Google's upstream CityHash now produce different results. ClickHouse cityHash64 corresponds to CityHash v1.0.2. + **Arguments** The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index 1577c01eec9..42d402e9d44 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -59,244 +59,6 @@ A lambda function that accepts multiple arguments can also be passed to a higher For some functions the first argument (the lambda function) can be omitted. In this case, identical mapping is assumed. -## SQL User Defined Functions +## User Defined Functions (UDFs) -Custom functions from lambda expressions can be created using the [CREATE FUNCTION](../statements/create/function.md) statement. To delete these functions use the [DROP FUNCTION](../statements/drop.md#drop-function) statement. - -## Executable User Defined Functions -ClickHouse can call any external executable program or script to process data. - -The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter. - -A function configuration contains the following settings: - -- `name` - a function name. -- `command` - script name to execute or command if `execute_direct` is false. -- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number. -- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command. -- `return_type` - the type of a returned value. -- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`. -- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created. -- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`. -- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`. -- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. -- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. -- `pool_size` - the size of a command pool. Optional. Default value is `16`. -- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. -- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter. -- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter. - -The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk. - -**Example** - -Creating `test_function` using XML configuration. -File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings). -```xml - - - executable - test_function_python - String - - UInt64 - value - - TabSeparated - test_function.py - - -``` - -Script file inside `user_scripts` folder `test_function.py` (`/var/lib/clickhouse/user_scripts/test_function.py` with default path settings). - -```python -#!/usr/bin/python3 - -import sys - -if __name__ == '__main__': - for line in sys.stdin: - print("Value " + line, end='') - sys.stdout.flush() -``` - -Query: - -``` sql -SELECT test_function_python(toUInt64(2)); -``` - -Result: - -``` text -┌─test_function_python(2)─┐ -│ Value 2 │ -└─────────────────────────┘ -``` - -Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration. -File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings). -```xml - - - executable - test_function_sum - UInt64 - - UInt64 - lhs - - - UInt64 - rhs - - TabSeparated - cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table" - 0 - - -``` - -Query: - -``` sql -SELECT test_function_sum(2, 2); -``` - -Result: - -``` text -┌─test_function_sum(2, 2)─┐ -│ 4 │ -└─────────────────────────┘ -``` - -Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration. -File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings). -```xml - - - executable - test_function_sum_json - UInt64 - result_name - - UInt64 - argument_1 - - - UInt64 - argument_2 - - JSONEachRow - test_function_sum_json.py - - -``` - -Script file inside `user_scripts` folder `test_function_sum_json.py` (`/var/lib/clickhouse/user_scripts/test_function_sum_json.py` with default path settings). - -```python -#!/usr/bin/python3 - -import sys -import json - -if __name__ == '__main__': - for line in sys.stdin: - value = json.loads(line) - first_arg = int(value['argument_1']) - second_arg = int(value['argument_2']) - result = {'result_name': first_arg + second_arg} - print(json.dumps(result), end='\n') - sys.stdout.flush() -``` - -Query: - -``` sql -SELECT test_function_sum_json(2, 2); -``` - -Result: - -``` text -┌─test_function_sum_json(2, 2)─┐ -│ 4 │ -└──────────────────────────────┘ -``` - -Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). -File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings). -```xml - - - executable - test_function_parameter_python - String - - UInt64 - - TabSeparated - test_function_parameter_python.py {test_parameter:UInt64} - - -``` - -Script file inside `user_scripts` folder `test_function_parameter_python.py` (`/var/lib/clickhouse/user_scripts/test_function_parameter_python.py` with default path settings). - -```python -#!/usr/bin/python3 - -import sys - -if __name__ == "__main__": - for line in sys.stdin: - print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="") - sys.stdout.flush() -``` - -Query: - -``` sql -SELECT test_function_parameter_python(1)(2); -``` - -Result: - -``` text -┌─test_function_parameter_python(1)(2)─┐ -│ Parameter 1 value 2 │ -└──────────────────────────────────────┘ -``` - -## Error Handling - -Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query. - -## Evaluation of Argument Expressions - -In almost all programming languages, one of the arguments might not be evaluated for certain operators. This is usually the operators `&&`, `||`, and `?:`. -But in ClickHouse, arguments of functions (operators) are always evaluated. This is because entire parts of columns are evaluated at once, instead of calculating each row separately. - -## Performing Functions for Distributed Query Processing - -For distributed query processing, as many stages of query processing as possible are performed on remote servers, and the rest of the stages (merging intermediate results and everything after that) are performed on the requestor server. - -This means that functions can be performed on different servers. -For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),` - -- if a `distributed_table` has at least two shards, the functions ‘g’ and ‘h’ are performed on remote servers, and the function ‘f’ is performed on the requestor server. -- if a `distributed_table` has only one shard, all the ‘f’, ‘g’, and ‘h’ functions are performed on this shard’s server. - -The result of a function usually does not depend on which server it is performed on. However, sometimes this is important. -For example, functions that work with dictionaries use the dictionary that exists on the server they are running on. -Another example is the `hostName` function, which returns the name of the server it is running on in order to make `GROUP BY` by servers in a `SELECT` query. - -If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an ‘any’ aggregate function or add it to a key in `GROUP BY`. - - -## Related Content - -- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) +ClickHouse supports user-defined functions. See [UDFs](/docs/en/sql-reference/functions/udf.md). diff --git a/docs/en/sql-reference/functions/udf.md b/docs/en/sql-reference/functions/udf.md new file mode 100644 index 00000000000..a58c1364780 --- /dev/null +++ b/docs/en/sql-reference/functions/udf.md @@ -0,0 +1,249 @@ +--- +slug: /en/sql-reference/functions/udf +sidebar_position: 15 +sidebar_label: UDF +--- + +# UDFs User Defined Functions + + +## Executable User Defined Functions +ClickHouse can call any external executable program or script to process data. + +The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter. + +A function configuration contains the following settings: + +- `name` - a function name. +- `command` - script name to execute or command if `execute_direct` is false. +- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number. +- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command. +- `return_type` - the type of a returned value. +- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`. +- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created. +- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`. +- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`. +- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. +- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. +- `pool_size` - the size of a command pool. Optional. Default value is `16`. +- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. +- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter. +- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter. + +The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk. + +**Example** + +Creating `test_function` using XML configuration. +File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings). +```xml + + + executable + test_function_python + String + + UInt64 + value + + TabSeparated + test_function.py + + +``` + +Script file inside `user_scripts` folder `test_function.py` (`/var/lib/clickhouse/user_scripts/test_function.py` with default path settings). + +```python +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + for line in sys.stdin: + print("Value " + line, end='') + sys.stdout.flush() +``` + +Query: + +``` sql +SELECT test_function_python(toUInt64(2)); +``` + +Result: + +``` text +┌─test_function_python(2)─┐ +│ Value 2 │ +└─────────────────────────┘ +``` + +Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration. +File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings). +```xml + + + executable + test_function_sum + UInt64 + + UInt64 + lhs + + + UInt64 + rhs + + TabSeparated + cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table" + 0 + + +``` + +Query: + +``` sql +SELECT test_function_sum(2, 2); +``` + +Result: + +``` text +┌─test_function_sum(2, 2)─┐ +│ 4 │ +└─────────────────────────┘ +``` + +Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration. +File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings). +```xml + + + executable + test_function_sum_json + UInt64 + result_name + + UInt64 + argument_1 + + + UInt64 + argument_2 + + JSONEachRow + test_function_sum_json.py + + +``` + +Script file inside `user_scripts` folder `test_function_sum_json.py` (`/var/lib/clickhouse/user_scripts/test_function_sum_json.py` with default path settings). + +```python +#!/usr/bin/python3 + +import sys +import json + +if __name__ == '__main__': + for line in sys.stdin: + value = json.loads(line) + first_arg = int(value['argument_1']) + second_arg = int(value['argument_2']) + result = {'result_name': first_arg + second_arg} + print(json.dumps(result), end='\n') + sys.stdout.flush() +``` + +Query: + +``` sql +SELECT test_function_sum_json(2, 2); +``` + +Result: + +``` text +┌─test_function_sum_json(2, 2)─┐ +│ 4 │ +└──────────────────────────────┘ +``` + +Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). +File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings). +```xml + + + executable + test_function_parameter_python + String + + UInt64 + + TabSeparated + test_function_parameter_python.py {test_parameter:UInt64} + + +``` + +Script file inside `user_scripts` folder `test_function_parameter_python.py` (`/var/lib/clickhouse/user_scripts/test_function_parameter_python.py` with default path settings). + +```python +#!/usr/bin/python3 + +import sys + +if __name__ == "__main__": + for line in sys.stdin: + print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="") + sys.stdout.flush() +``` + +Query: + +``` sql +SELECT test_function_parameter_python(1)(2); +``` + +Result: + +``` text +┌─test_function_parameter_python(1)(2)─┐ +│ Parameter 1 value 2 │ +└──────────────────────────────────────┘ +``` + +## Error Handling + +Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query. + +## Evaluation of Argument Expressions + +In almost all programming languages, one of the arguments might not be evaluated for certain operators. This is usually the operators `&&`, `||`, and `?:`. +But in ClickHouse, arguments of functions (operators) are always evaluated. This is because entire parts of columns are evaluated at once, instead of calculating each row separately. + +## Performing Functions for Distributed Query Processing + +For distributed query processing, as many stages of query processing as possible are performed on remote servers, and the rest of the stages (merging intermediate results and everything after that) are performed on the requestor server. + +This means that functions can be performed on different servers. +For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),` + +- if a `distributed_table` has at least two shards, the functions ‘g’ and ‘h’ are performed on remote servers, and the function ‘f’ is performed on the requestor server. +- if a `distributed_table` has only one shard, all the ‘f’, ‘g’, and ‘h’ functions are performed on this shard’s server. + +The result of a function usually does not depend on which server it is performed on. However, sometimes this is important. +For example, functions that work with dictionaries use the dictionary that exists on the server they are running on. +Another example is the `hostName` function, which returns the name of the server it is running on in order to make `GROUP BY` by servers in a `SELECT` query. + +If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an ‘any’ aggregate function or add it to a key in `GROUP BY`. + +## SQL User Defined Functions + +Custom functions from lambda expressions can be created using the [CREATE FUNCTION](../statements/create/function.md) statement. To delete these functions use the [DROP FUNCTION](../statements/drop.md#drop-function) statement. + +## Related Content + +### [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index ff55f700023..378f41c1199 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -132,7 +132,7 @@ Comments are stored in the `comment_expression` column returned by the [DESCRIBE Example: ``` sql -ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for accessing the site.' +ALTER TABLE visits COMMENT COLUMN browser 'This column shows the browser used for accessing the site.' ``` ## MODIFY COLUMN diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 52e99d93109..ce0bc1ea528 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -103,7 +103,11 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab ``` This query copies the data partition from `table1` to `table2`. -Note that data will be deleted neither from `table1` nor from `table2`. + +Note that: + +- Data will be deleted neither from `table1` nor from `table2`. +- `table1` may be a temporary table. For the query to run successfully, the following conditions must be met: @@ -117,7 +121,12 @@ For the query to run successfully, the following conditions must be met: ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1 ``` -This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`. Note that data won’t be deleted from `table1`. +This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`. + +Note that: + +- Data won’t be deleted from `table1`. +- `table1` may be a temporary table. For the query to run successfully, the following conditions must be met: diff --git a/docs/en/sql-reference/statements/create/function.md b/docs/en/sql-reference/statements/create/function.md index 08946934f0c..15c2356445b 100644 --- a/docs/en/sql-reference/statements/create/function.md +++ b/docs/en/sql-reference/statements/create/function.md @@ -61,4 +61,6 @@ Result: ## Related Content -- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) +### [Executable UDFs](/docs/en/sql-reference/functions/udf.md). + +### [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md index aa0a07747f2..83bb2e6bb9a 100644 --- a/docs/en/sql-reference/statements/create/row-policy.md +++ b/docs/en/sql-reference/statements/create/row-policy.md @@ -14,8 +14,8 @@ Row policies makes sense only for users with readonly access. If user can modify Syntax: ``` sql -CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1 - [, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2 ...] +CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1|db1.* + [, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2|db2.* ...] [FOR SELECT] USING condition [AS {PERMISSIVE | RESTRICTIVE}] [TO {role1 [, role2 ...] | ALL | ALL EXCEPT role1 [, role2 ...]}] @@ -76,6 +76,20 @@ CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 AS RESTRICTIVE TO peter, antonio enables the user `peter` to see rows only if both `b=1` AND `c=2`. +Database policies are combined with table policies. + +For example, the following policies + +``` sql +CREATE ROW POLICY pol1 ON mydb.* USING b=1 TO mira, peter +CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 AS RESTRICTIVE TO peter, antonio +``` + +enables the user `peter` to see table1 rows only if both `b=1` AND `c=2`, although +any other table in mydb would have only `b=1` policy applied for the user. + + + ## ON CLUSTER Clause Allows creating row policies on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md). @@ -88,3 +102,5 @@ Allows creating row policies on a cluster, see [Distributed DDL](../../../sql-re `CREATE ROW POLICY filter2 ON mydb.mytable USING a<1000 AND b=5 TO ALL EXCEPT mira` `CREATE ROW POLICY filter3 ON mydb.mytable USING 1 TO admin` + +`CREATE ROW POLICY filter4 ON mydb.* USING 1 TO admin` diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index f8f031e1551..de44a001472 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -381,9 +381,9 @@ High compression levels are useful for asymmetric scenarios, like compress once, `DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply: - DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`. -- DEFLATE_QPL only works if ClickHouse was compiled with support for AVX2 or AVX512 instructions. Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details. +- DEFLATE_QPL requires a ClickHouse build compiled with SSE 4.2 instructions (by default, this is the case). Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details. - DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details. -- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with support for AVX2/AVX512 +- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with SSE 4.2 enabled. ### Specialized Codecs diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md index bd6db9e7d55..352af16042a 100644 --- a/docs/en/sql-reference/statements/select/into-outfile.md +++ b/docs/en/sql-reference/statements/select/into-outfile.md @@ -12,7 +12,7 @@ Compressed files are supported. Compression type is detected by the extension of **Syntax** ```sql -SELECT INTO OUTFILE file_name [AND STDOUT] [COMPRESSION type [LEVEL level]] +SELECT INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION type [LEVEL level]] ``` `file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`. @@ -25,6 +25,7 @@ SELECT INTO OUTFILE file_name [AND STDOUT] [COMPRESSION type [LEVEL - The query will fail if a file with the same file name already exists. - The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it. - If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output. +- If `APPEND` is mentioned in the query then the output is appended to an existing file. If compression is used, append cannot be used. **Example** diff --git a/docs/en/sql-reference/table-functions/dictionary.md b/docs/en/sql-reference/table-functions/dictionary.md index c4bdde4dce2..73d5039a64b 100644 --- a/docs/en/sql-reference/table-functions/dictionary.md +++ b/docs/en/sql-reference/table-functions/dictionary.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/table-functions/dictionary sidebar_position: 54 -sidebar_label: dictionary function +sidebar_label: dictionary title: dictionary --- diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 6182ab20203..812b0c0a2d4 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -391,7 +391,7 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT ## Проекции {#projections} Проекции похожи на [материализованные представления](../../../sql-reference/statements/create/view.md#materialized), но определяются на уровне кусков данных. Это обеспечивает гарантии согласованности данных наряду с автоматическим использованием в запросах. -Проекции — это экспериментальная возможность. Чтобы включить поддержку проекций, установите настройку [allow_experimental_projection_optimization](../../../operations/settings/settings.md#allow-experimental-projection-optimization) в значение `1`. См. также настройку [force_optimize_projection ](../../../operations/settings/settings.md#force-optimize-projection). +Проекции — это экспериментальная возможность. Чтобы включить поддержку проекций, установите настройку [optimize_use_projections](../../../operations/settings/settings.md#allow-experimental-projection-optimization) в значение `1`. См. также настройку [force_optimize_projection ](../../../operations/settings/settings.md#optimize_use_projections). Проекции не поддерживаются для запросов `SELECT` с модификатором [FINAL](../../../sql-reference/statements/select/from.md#select-from-final). diff --git a/docs/ru/operations/settings/settings-users.md b/docs/ru/operations/settings/settings-users.md index a8d5f0ec453..50f4eb5ae6b 100644 --- a/docs/ru/operations/settings/settings-users.md +++ b/docs/ru/operations/settings/settings-users.md @@ -37,6 +37,10 @@ sidebar_label: "Настройки пользователей" + + + GRANT SELECT ON system.* + @@ -89,6 +93,27 @@ sidebar_label: "Настройки пользователей" Значение по умолчанию: 0. +### grants {#grants-user-setting} + +Настройка позволяет указать набор прав для заданного пользователя. +Каждый элемент списка должен представлять собой `GRANT` запрос без указания пользователей в самом запросе. + +Пример: + +```xml + + + GRANT SHOW ON *.* + GRANT CREATE ON *.* WITH GRANT OPTION + GRANT SELECT ON system.* + + +``` + +Настройка не может быть выставлена одновременно с +`dictionaries`, `access_management`, `named_collection_control`, `show_named_collections_secrets` +или `allow_databases`. + ### user_name/networks {#user-namenetworks} Список сетей, из которых пользователь может подключиться к серверу ClickHouse. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 7ecc8b7cb42..066e71c25a5 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -3588,7 +3588,7 @@ SETTINGS index_granularity = 8192 │ Строка с идентификатором снэпшота, из которого будет выполняться [исходный дамп таблиц PostgreSQL](../../engines/database-engines/materialized-postgresql.md). Эта настройка должна использоваться совместно с [materialized_postgresql_replication_slot](#materialized-postgresql-replication-slot). -## allow_experimental_projection_optimization {#allow-experimental-projection-optimization} +## optimize_use_projections {#optimize_use_projections} Включает или отключает поддержку [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) при обработке запросов `SELECT`. @@ -3601,7 +3601,7 @@ SETTINGS index_granularity = 8192 │ ## force_optimize_projection {#force-optimize-projection} -Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [allow_experimental_projection_optimization](#allow-experimental-projection-optimization)). +Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [optimize_use_projections](#optimize_use_projections)). Возможные значения: diff --git a/docs/ru/sql-reference/statements/alter/partition.md b/docs/ru/sql-reference/statements/alter/partition.md index 95d02c062bd..90688c9ece2 100644 --- a/docs/ru/sql-reference/statements/alter/partition.md +++ b/docs/ru/sql-reference/statements/alter/partition.md @@ -102,7 +102,11 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab ``` Копирует партицию из таблицы `table1` в таблицу `table2`. -Обратите внимание, что данные не удаляются ни из `table1`, ни из `table2`. + +Обратите внимание, что: + +- Данные не удаляются ни из `table1`, ни из `table2`. +- `table1` может быть временной таблицей. Следует иметь в виду: @@ -118,7 +122,12 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1 ``` -Копирует партицию из таблицы `table1` в таблицу `table2` с заменой существующих данных в `table2`. Данные из `table1` не удаляются. +Копирует партицию из таблицы `table1` в таблицу `table2` с заменой существующих данных в `table2`. + +Обратите внимание, что: + +- Данные из `table1` не удаляются. +- `table1` может быть временной таблицей. Следует иметь в виду: diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index 51167521018..eedc913cf82 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -1074,7 +1074,7 @@ ClickHouse服务器日志文件中相应的跟踪日志确认了ClickHouse正在 Projections目前是一个实验性的功能,因此我们需要告诉ClickHouse: ```sql -SET allow_experimental_projection_optimization = 1; +SET optimize_use_projections = 1; ``` diff --git a/docs/zh/sql-reference/statements/grant.md b/docs/zh/sql-reference/statements/grant.md index 12ad2e0fe25..7e7cdbff350 100644 --- a/docs/zh/sql-reference/statements/grant.md +++ b/docs/zh/sql-reference/statements/grant.md @@ -55,7 +55,7 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION 同样 `john` 有权执行 `GRANT OPTION`,因此他能给其它账号进行和自己账号权限范围相同的授权。 -可以使用`*` 号代替表或库名进行授权操作。例如, `GRANT SELECT ONdb.* TO john` 操作运行 `john`对 `db`库的所有表执行 `SELECT`查询。同样,你可以忽略库名。在这种情形下,权限将指向当前的数据库。例如, `GRANT SELECT ON* to john` 对当前数据库的所有表指定授权, `GARNT SELECT ON mytable to john`对当前数据库的 `mytable`表进行授权。 +可以使用`*` 号代替表或库名进行授权操作。例如, `GRANT SELECT ONdb.* TO john` 操作运行 `john`对 `db`库的所有表执行 `SELECT`查询。同样,你可以忽略库名。在这种情形下,权限将指向当前的数据库。例如, `GRANT SELECT ON* to john` 对当前数据库的所有表指定授权, `GRANT SELECT ON mytable to john`对当前数据库的 `mytable`表进行授权。 访问 `systen`数据库总是被允许的(因为这个数据库用来处理sql操作) 可以一次给多个账号进行多种授权操作。 `GRANT SELECT,INSERT ON *.* TO john,robin` 允许 `john`和`robin` 账号对任意数据库的任意表执行 `INSERT`和 `SELECT`操作。 diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index df0abceb8c6..fb30f60a0b8 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -862,7 +862,8 @@ bool Client::processWithFuzzing(const String & full_query) const auto * tmp_pos = text_2.c_str(); const auto ast_3 = parseQuery(tmp_pos, tmp_pos + text_2.size(), false /* allow_multi_statements */); - const auto text_3 = ast_3->formatForErrorMessage(); + const auto text_3 = ast_3 ? ast_3->formatForErrorMessage() : ""; + if (text_3 != text_2) { fmt::print(stderr, "Found error: The query formatting is broken.\n"); @@ -877,7 +878,7 @@ bool Client::processWithFuzzing(const String & full_query) fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute); fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree()); fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2); - fmt::print(stderr, "AST-3 (Text-2 parsed):\n'{}'\n", ast_3->dumpTree()); + fmt::print(stderr, "AST-3 (Text-2 parsed):\n'{}'\n", ast_3 ? ast_3->dumpTree() : ""); fmt::print(stderr, "Text-3 (AST-3 formatted):\n'{}'\n", text_3); fmt::print(stderr, "Text-3 must be equal to Text-2, but it is not.\n"); diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h index 3d4a11186e3..36dcfa50842 100644 --- a/programs/copier/ZooKeeperStaff.h +++ b/programs/copier/ZooKeeperStaff.h @@ -175,7 +175,7 @@ public: Coordination::Stat stat{}; String _some_data; auto watch_callback = - [stale = stale] (const Coordination::WatchResponse & rsp) + [my_stale = stale] (const Coordination::WatchResponse & rsp) { auto logger = &Poco::Logger::get("ClusterCopier"); if (rsp.error == Coordination::Error::ZOK) @@ -184,11 +184,11 @@ public: { case Coordination::CREATED: LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path); - stale->store(true); + my_stale->store(true); break; case Coordination::CHANGED: LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at {}", rsp.path); - stale->store(true); + my_stale->store(true); } } }; diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 761335fb707..e5d56023f7b 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -114,7 +114,7 @@ if (BUILD_STANDALONE_KEEPER) clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES}) # Remove some redundant dependencies - target_compile_definitions (clickhouse-keeper PRIVATE -DKEEPER_STANDALONE_BUILD) + target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_PROGRAM_STANDALONE_BUILD) target_compile_definitions (clickhouse-keeper PUBLIC -DWITHOUT_TEXT_LOG) target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../src") # uses includes from src directory diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 3853c955171..3d1773260f5 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -57,7 +57,7 @@ int mainEntryClickHouseKeeper(int argc, char ** argv) } } -#ifdef KEEPER_STANDALONE_BUILD +#ifdef CLICKHOUSE_PROGRAM_STANDALONE_BUILD // Weak symbols don't work correctly on Darwin // so we have a stub implementation to avoid linker errors @@ -498,18 +498,18 @@ try /// Prometheus (if defined and not setup yet with http_port) port_name = "prometheus.port"; - createServer(listen_host, port_name, listen_try, [&, http_context = std::move(http_context)](UInt16 port) mutable + createServer(listen_host, port_name, listen_try, [&, my_http_context = std::move(http_context)](UInt16 port) mutable { Poco::Net::ServerSocket socket; auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(http_context->getReceiveTimeout()); - socket.setSendTimeout(http_context->getSendTimeout()); + socket.setReceiveTimeout(my_http_context->getReceiveTimeout()); + socket.setSendTimeout(my_http_context->getSendTimeout()); servers->emplace_back( listen_host, port_name, "Prometheus: http://" + address.toString(), std::make_unique( - std::move(http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + std::move(my_http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); }); } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7307b16fcce..632f3f3a02d 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -130,6 +130,7 @@ namespace CurrentMetrics extern const Metric Revision; extern const Metric VersionInteger; extern const Metric MemoryTracking; + extern const Metric MergesMutationsMemoryTracking; extern const Metric MaxDDLEntryID; extern const Metric MaxPushedDDLEntryID; } @@ -1225,6 +1226,25 @@ try total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); + size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit; + + size_t default_merges_mutations_server_memory_usage = static_cast(memory_amount * server_settings_.merges_mutations_memory_usage_to_ram_ratio); + if (merges_mutations_memory_usage_soft_limit == 0 || merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) + { + merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; + LOG_WARNING(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" + " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", + formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), + formatReadableSizeWithBinarySuffix(memory_amount), + server_settings_.merges_mutations_memory_usage_to_ram_ratio); + } + + LOG_INFO(log, "Merges and mutations memory limit is set to {}", + formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit)); + background_memory_tracker.setSoftLimit(merges_mutations_memory_usage_soft_limit); + background_memory_tracker.setDescription("(background)"); + background_memory_tracker.setMetric(CurrentMetrics::MergesMutationsMemoryTracking); + total_memory_tracker.setAllowUseJemallocMemory(server_settings_.allow_use_jemalloc_memory); auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker(); @@ -1375,8 +1395,8 @@ try { Poco::Net::ServerSocket socket; auto address = socketBindListen(config(), socket, listen_host, port); - socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); - socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); + socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); + socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); return ProtocolServerAdapter( listen_host, port_name, @@ -1398,8 +1418,8 @@ try #if USE_SSL Poco::Net::SecureServerSocket socket; auto address = socketBindListen(config(), socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); - socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); + socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); + socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); return ProtocolServerAdapter( listen_host, secure_port_name, @@ -1852,7 +1872,7 @@ try } if (current_connections) - LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); + LOG_WARNING(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); else LOG_INFO(log, "Closed all listening sockets."); @@ -1864,7 +1884,7 @@ try current_connections = waitServersToFinish(servers, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) - LOG_INFO(log, "Closed connections. But {} remain." + LOG_WARNING(log, "Closed connections. But {} remain." " Tip: To increase wait time add to config: 60", current_connections); else LOG_INFO(log, "Closed connections."); @@ -1880,7 +1900,7 @@ try /// Dump coverage here, because std::atexit callback would not be called. dumpCoverageReportIfPossible(); - LOG_INFO(log, "Will shutdown forcefully."); + LOG_WARNING(log, "Will shutdown forcefully."); safeExit(0); } }); diff --git a/programs/server/config.xml b/programs/server/config.xml index 51aa04ba0e5..82dd697084c 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1294,10 +1294,10 @@ - *_dictionary.xml + *_dictionary.*ml - *_function.xml + *_function.*ml diff --git a/src/Access/AccessChangesNotifier.cpp b/src/Access/AccessChangesNotifier.cpp index 05516285efb..b27dda82142 100644 --- a/src/Access/AccessChangesNotifier.cpp +++ b/src/Access/AccessChangesNotifier.cpp @@ -47,10 +47,10 @@ scope_guard AccessChangesNotifier::subscribeForChanges(AccessEntityType type, co list.push_back(handler); auto handler_it = std::prev(list.end()); - return [handlers=handlers, type, handler_it] + return [my_handlers = handlers, type, handler_it] { - std::lock_guard lock2{handlers->mutex}; - auto & list2 = handlers->by_type[static_cast(type)]; + std::lock_guard lock2{my_handlers->mutex}; + auto & list2 = my_handlers->by_type[static_cast(type)]; list2.erase(handler_it); }; } @@ -63,13 +63,13 @@ scope_guard AccessChangesNotifier::subscribeForChanges(const UUID & id, const On list.push_back(handler); auto handler_it = std::prev(list.end()); - return [handlers=handlers, it, handler_it] + return [my_handlers = handlers, it, handler_it] { - std::lock_guard lock2{handlers->mutex}; + std::lock_guard lock2{my_handlers->mutex}; auto & list2 = it->second; list2.erase(handler_it); if (list2.empty()) - handlers->by_id.erase(it); + my_handlers->by_id.erase(it); }; } diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index 409338209cc..3bb0be160f4 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index ecc0a927591..6394c0279a7 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -185,6 +185,7 @@ enum class AccessType M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \ M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \ M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \ + M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \ M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \ \ M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\ diff --git a/src/Access/Common/RowPolicyDefs.cpp b/src/Access/Common/RowPolicyDefs.cpp index ba7856116f6..b1f882fe971 100644 --- a/src/Access/Common/RowPolicyDefs.cpp +++ b/src/Access/Common/RowPolicyDefs.cpp @@ -22,7 +22,7 @@ String RowPolicyName::toString() const name += backQuoteIfNeed(database); name += '.'; } - name += backQuoteIfNeed(table_name); + name += (table_name == RowPolicyName::ANY_TABLE_MARK ? "*" : backQuoteIfNeed(table_name)); return name; } diff --git a/src/Access/Common/RowPolicyDefs.h b/src/Access/Common/RowPolicyDefs.h index 792884c56df..7ffc99e1272 100644 --- a/src/Access/Common/RowPolicyDefs.h +++ b/src/Access/Common/RowPolicyDefs.h @@ -9,6 +9,8 @@ namespace DB /// Represents the full name of a row policy, e.g. "myfilter ON mydb.mytable". struct RowPolicyName { + static constexpr char ANY_TABLE_MARK[] = ""; + String short_name; String database; String table_name; diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 710cf257b95..938881fafff 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -742,9 +742,9 @@ void DiskAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); - restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] + restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists] { - for (const auto & [id, entity] : entities) + for (const auto & [id, entity] : my_entities) insertWithID(id, entity, replace_if_exists, throw_if_exists, /* write_on_disk= */ true); }); } diff --git a/src/Access/EnabledRoles.cpp b/src/Access/EnabledRoles.cpp index 456529da942..cebc075a6e3 100644 --- a/src/Access/EnabledRoles.cpp +++ b/src/Access/EnabledRoles.cpp @@ -26,10 +26,10 @@ scope_guard EnabledRoles::subscribeForChanges(const OnChangeHandler & handler) c handlers->list.push_back(handler); auto it = std::prev(handlers->list.end()); - return [handlers=handlers, it] + return [my_handlers = handlers, it] { - std::lock_guard lock2{handlers->mutex}; - handlers->list.erase(it); + std::lock_guard lock2{my_handlers->mutex}; + my_handlers->list.erase(it); }; } @@ -53,10 +53,10 @@ void EnabledRoles::setRolesInfo(const std::shared_ptr & } notifications->join(scope_guard( - [info = info, handlers_to_notify = std::move(handlers_to_notify)] + [my_info = info, my_handlers_to_notify = std::move(handlers_to_notify)] { - for (const auto & handler : handlers_to_notify) - handler(info); + for (const auto & handler : my_handlers_to_notify) + handler(my_info); })); } } diff --git a/src/Access/EnabledRowPolicies.cpp b/src/Access/EnabledRowPolicies.cpp index c00dcf9e3a7..8ab1bf5928b 100644 --- a/src/Access/EnabledRowPolicies.cpp +++ b/src/Access/EnabledRowPolicies.cpp @@ -35,7 +35,13 @@ RowPolicyFilterPtr EnabledRowPolicies::getFilter(const String & database, const auto loaded = mixed_filters.load(); auto it = loaded->find({database, table_name, filter_type}); if (it == loaded->end()) - return {}; + { /// Look for a policy for database if a table policy not found + it = loaded->find({database, RowPolicyName::ANY_TABLE_MARK, filter_type}); + if (it == loaded->end()) + { + return {}; + } + } return it->second; } diff --git a/src/Access/MemoryAccessStorage.cpp b/src/Access/MemoryAccessStorage.cpp index 8fcca235ee8..c4192ee4552 100644 --- a/src/Access/MemoryAccessStorage.cpp +++ b/src/Access/MemoryAccessStorage.cpp @@ -297,9 +297,9 @@ void MemoryAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); - restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] + restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists] { - for (const auto & [id, entity] : entities) + for (const auto & [id, entity] : my_entities) insertWithID(id, entity, replace_if_exists, throw_if_exists); }); } diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp index 7a608c298b1..56d68be9268 100644 --- a/src/Access/ReplicatedAccessStorage.cpp +++ b/src/Access/ReplicatedAccessStorage.cpp @@ -525,9 +525,9 @@ void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zooke } const String zookeeper_uuids_path = zookeeper_path + "/uuid"; - auto watch_entities_list = [watched_queue = watched_queue](const Coordination::WatchResponse &) + auto watch_entities_list = [my_watched_queue = watched_queue](const Coordination::WatchResponse &) { - [[maybe_unused]] bool push_result = watched_queue->push(UUIDHelpers::Nil); + [[maybe_unused]] bool push_result = my_watched_queue->push(UUIDHelpers::Nil); }; Coordination::Stat stat; const auto entity_uuid_strs = zookeeper->getChildrenWatch(zookeeper_uuids_path, &stat, watch_entities_list); @@ -592,10 +592,10 @@ void ReplicatedAccessStorage::refreshEntityNoLock(const zkutil::ZooKeeperPtr & z AccessEntityPtr ReplicatedAccessStorage::tryReadEntityFromZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) const { - const auto watch_entity = [watched_queue = watched_queue, id](const Coordination::WatchResponse & response) + const auto watch_entity = [my_watched_queue = watched_queue, id](const Coordination::WatchResponse & response) { if (response.type == Coordination::Event::CHANGED) - [[maybe_unused]] bool push_result = watched_queue->push(id); + [[maybe_unused]] bool push_result = my_watched_queue->push(id); }; Coordination::Stat entity_stat; @@ -680,12 +680,12 @@ void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_col backup_entries_collector.addPostTask( [backup_entry = backup_entry_with_path.second, - zookeeper_path = zookeeper_path, + my_zookeeper_path = zookeeper_path, type, &backup_entries_collector, backup_coordination] { - for (const String & path : backup_coordination->getReplicatedAccessFilePaths(zookeeper_path, type)) + for (const String & path : backup_coordination->getReplicatedAccessFilePaths(my_zookeeper_path, type)) backup_entries_collector.addBackupEntry(path, backup_entry); }); } @@ -708,9 +708,9 @@ void ReplicatedAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); - restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] + restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists] { - for (const auto & [id, entity] : entities) + for (const auto & [id, entity] : my_entities) insertWithID(id, entity, replace_if_exists, throw_if_exists); }); } diff --git a/src/Access/RolesOrUsersSet.cpp b/src/Access/RolesOrUsersSet.cpp index 52374c3739d..c026ae42f76 100644 --- a/src/Access/RolesOrUsersSet.cpp +++ b/src/Access/RolesOrUsersSet.cpp @@ -228,25 +228,25 @@ void RolesOrUsersSet::add(const std::vector & ids_) bool RolesOrUsersSet::match(const UUID & id) const { - return (all || ids.count(id)) && !except_ids.count(id); + return (all || ids.contains(id)) && !except_ids.contains(id); } bool RolesOrUsersSet::match(const UUID & user_id, const boost::container::flat_set & enabled_roles) const { - if (!all && !ids.count(user_id)) + if (!all && !ids.contains(user_id)) { bool found_enabled_role = std::any_of( - enabled_roles.begin(), enabled_roles.end(), [this](const UUID & enabled_role) { return ids.count(enabled_role); }); + enabled_roles.begin(), enabled_roles.end(), [this](const UUID & enabled_role) { return ids.contains(enabled_role); }); if (!found_enabled_role) return false; } - if (except_ids.count(user_id)) + if (except_ids.contains(user_id)) return false; bool in_except_list = std::any_of( - enabled_roles.begin(), enabled_roles.end(), [this](const UUID & enabled_role) { return except_ids.count(enabled_role); }); + enabled_roles.begin(), enabled_roles.end(), [this](const UUID & enabled_role) { return except_ids.contains(enabled_role); }); return !in_except_list; } diff --git a/src/Access/RowPolicy.h b/src/Access/RowPolicy.h index 99e6f1992f5..9c190458620 100644 --- a/src/Access/RowPolicy.h +++ b/src/Access/RowPolicy.h @@ -35,6 +35,9 @@ struct RowPolicy : public IAccessEntity void setPermissive(bool permissive_ = true) { setRestrictive(!permissive_); } bool isPermissive() const { return !isRestrictive(); } + /// Applied for entire database + bool isForDatabase() const { return full_name.table_name == RowPolicyName::ANY_TABLE_MARK; } + /// Sets that the policy is restrictive. /// A row is only accessible if at least one of the permissive policies passes, /// in addition to all the restrictive policies. diff --git a/src/Access/RowPolicyCache.cpp b/src/Access/RowPolicyCache.cpp index 1036df92609..bb9da674477 100644 --- a/src/Access/RowPolicyCache.cpp +++ b/src/Access/RowPolicyCache.cpp @@ -16,7 +16,8 @@ namespace DB { namespace { - /// Accumulates filters from multiple row policies and joins them using the AND logical operation. + /// Helper to accumulate filters from multiple row policies and join them together + /// by AND or OR logical operations. class FiltersMixer { public: @@ -148,9 +149,11 @@ void RowPolicyCache::ensureAllRowPoliciesRead() for (const UUID & id : access_control.findAll()) { - auto quota = access_control.tryRead(id); - if (quota) - all_policies.emplace(id, PolicyInfo(quota)); + auto policy = access_control.tryRead(id); + if (policy) + { + all_policies.emplace(id, PolicyInfo(policy)); + } } } @@ -215,40 +218,105 @@ void RowPolicyCache::mixFiltersFor(EnabledRowPolicies & enabled) std::vector policies; }; - std::unordered_map mixers; + std::unordered_map database_mixers; + /// populate database_mixers using database-level policies + /// to aggregate (mix) rules per database for (const auto & [policy_id, info] : all_policies) { - const auto & policy = *info.policy; - bool match = info.roles->match(enabled.params.user_id, enabled.params.enabled_roles); - MixedFiltersKey key; - key.database = info.database_and_table_name->first; - key.table_name = info.database_and_table_name->second; - for (auto filter_type : collections::range(0, RowPolicyFilterType::MAX)) + if (info.isForDatabase()) { - auto filter_type_i = static_cast(filter_type); - if (info.parsed_filters[filter_type_i]) + const auto & policy = *info.policy; + bool match = info.roles->match(enabled.params.user_id, enabled.params.enabled_roles); + for (auto filter_type : collections::range(0, RowPolicyFilterType::MAX)) { - key.filter_type = filter_type; - auto & mixer = mixers[key]; - mixer.database_and_table_name = info.database_and_table_name; - if (match) + auto filter_type_i = static_cast(filter_type); + if (info.parsed_filters[filter_type_i]) { - mixer.mixer.add(info.parsed_filters[filter_type_i], policy.isRestrictive()); - mixer.policies.push_back(info.policy); + MixedFiltersKey key{info.database_and_table_name->first, + info.database_and_table_name->second, + filter_type}; + + auto & mixer = database_mixers[key]; + mixer.database_and_table_name = info.database_and_table_name; + if (match) + { + mixer.mixer.add(info.parsed_filters[filter_type_i], policy.isRestrictive()); + mixer.policies.push_back(info.policy); + } + } + } + } + } + + std::unordered_map table_mixers; + + /// populate table_mixers using database_mixers and table-level policies + for (const auto & [policy_id, info] : all_policies) + { + if (!info.isForDatabase()) + { + const auto & policy = *info.policy; + bool match = info.roles->match(enabled.params.user_id, enabled.params.enabled_roles); + for (auto filter_type : collections::range(0, RowPolicyFilterType::MAX)) + { + auto filter_type_i = static_cast(filter_type); + if (info.parsed_filters[filter_type_i]) + { + MixedFiltersKey key{info.database_and_table_name->first, + info.database_and_table_name->second, + filter_type}; + auto table_it = table_mixers.find(key); + if (table_it == table_mixers.end()) + { /// no exact match - create new mixer + MixedFiltersKey database_key = key; + database_key.table_name = RowPolicyName::ANY_TABLE_MARK; + + auto database_it = database_mixers.find(database_key); + + if (database_it == database_mixers.end()) + { + table_it = table_mixers.try_emplace(key).first; + } + else + { + /// table policies are based on database ones + table_it = table_mixers.insert({key, database_it->second}).first; + } + } + + auto & mixer = table_it->second; /// getting table level mixer + mixer.database_and_table_name = info.database_and_table_name; + if (match) + { + mixer.mixer.add(info.parsed_filters[filter_type_i], policy.isRestrictive()); + mixer.policies.push_back(info.policy); + } } } } } auto mixed_filters = boost::make_shared(); - for (auto & [key, mixer] : mixers) + + /// Retrieve aggregated policies from mixers + /// if a table has a policy for this particular table, we have all needed information in table_mixers + /// (policies for the database are already applied) + /// otherwise we would look for a policy for database using RowPolicy::ANY_TABLE_MARK + /// Consider restrictive policies a=1 for db.t, b=2 for db.* and c=3 for db.* + /// We are going to have two items in mixed_filters: + /// 1. a=1 AND b=2 AND c=3 for db.t (comes from table_mixers, where it had been created with the help of database_mixers) + /// 2. b=2 AND c=3 for db.* (comes directly from database_mixers) + for (auto * mixer_map_ptr : {&table_mixers, &database_mixers}) { - auto mixed_filter = std::make_shared(); - mixed_filter->database_and_table_name = std::move(mixer.database_and_table_name); - mixed_filter->expression = std::move(mixer.mixer).getResult(access_control.isEnabledUsersWithoutRowPoliciesCanReadRows()); - mixed_filter->policies = std::move(mixer.policies); - mixed_filters->emplace(key, std::move(mixed_filter)); + for (auto & [key, mixer] : *mixer_map_ptr) + { + auto mixed_filter = std::make_shared(); + mixed_filter->database_and_table_name = std::move(mixer.database_and_table_name); + mixed_filter->expression = std::move(mixer.mixer).getResult(access_control.isEnabledUsersWithoutRowPoliciesCanReadRows()); + mixed_filter->policies = std::move(mixer.policies); + mixed_filters->emplace(key, std::move(mixed_filter)); + } } enabled.mixed_filters.store(mixed_filters); diff --git a/src/Access/RowPolicyCache.h b/src/Access/RowPolicyCache.h index 4fbf90d1a2d..df263416509 100644 --- a/src/Access/RowPolicyCache.h +++ b/src/Access/RowPolicyCache.h @@ -29,6 +29,7 @@ private: explicit PolicyInfo(const RowPolicyPtr & policy_) { setPolicy(policy_); } void setPolicy(const RowPolicyPtr & policy_); + bool isForDatabase() const { return policy->isForDatabase(); } RowPolicyPtr policy; const RolesOrUsersSet * roles = nullptr; std::shared_ptr> database_and_table_name; diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index e83ab264f4f..12f584cab83 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -105,21 +105,21 @@ void SettingsConstraints::check(const Settings & current_settings, const Setting if (SettingsProfileElements::isAllowBackupSetting(element.setting_name)) continue; - if (!element.value.isNull()) + if (element.value) { - SettingChange value(element.setting_name, element.value); + SettingChange value(element.setting_name, *element.value); check(current_settings, value); } - if (!element.min_value.isNull()) + if (element.min_value) { - SettingChange value(element.setting_name, element.min_value); + SettingChange value(element.setting_name, *element.min_value); check(current_settings, value); } - if (!element.max_value.isNull()) + if (element.max_value) { - SettingChange value(element.setting_name, element.max_value); + SettingChange value(element.setting_name, *element.max_value); check(current_settings, value); } diff --git a/src/Access/SettingsProfileElement.cpp b/src/Access/SettingsProfileElement.cpp index ce56782d887..9358391cb93 100644 --- a/src/Access/SettingsProfileElement.cpp +++ b/src/Access/SettingsProfileElement.cpp @@ -63,18 +63,18 @@ void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const A max_value = ast.max_value; writability = ast.writability; - if (!value.isNull()) - value = Settings::castValueUtil(setting_name, value); - if (!min_value.isNull()) - min_value = Settings::castValueUtil(setting_name, min_value); - if (!max_value.isNull()) - max_value = Settings::castValueUtil(setting_name, max_value); + if (value) + value = Settings::castValueUtil(setting_name, *value); + if (min_value) + min_value = Settings::castValueUtil(setting_name, *min_value); + if (max_value) + max_value = Settings::castValueUtil(setting_name, *max_value); } } bool SettingsProfileElement::isConstraint() const { - return this->writability || !this->min_value.isNull() || !this->max_value.isNull(); + return this->writability || this->min_value || this->max_value; } std::shared_ptr SettingsProfileElement::toAST() const @@ -187,8 +187,8 @@ Settings SettingsProfileElements::toSettings() const Settings res; for (const auto & elem : *this) { - if (!elem.setting_name.empty() && !isAllowBackupSetting(elem.setting_name) && !elem.value.isNull()) - res.set(elem.setting_name, elem.value); + if (!elem.setting_name.empty() && !isAllowBackupSetting(elem.setting_name) && elem.value) + res.set(elem.setting_name, *elem.value); } return res; } @@ -200,8 +200,8 @@ SettingsChanges SettingsProfileElements::toSettingsChanges() const { if (!elem.setting_name.empty() && !isAllowBackupSetting(elem.setting_name)) { - if (!elem.value.isNull()) - res.push_back({elem.setting_name, elem.value}); + if (elem.value) + res.push_back({elem.setting_name, *elem.value}); } } return res; @@ -214,8 +214,8 @@ SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessC if (!elem.setting_name.empty() && elem.isConstraint() && !isAllowBackupSetting(elem.setting_name)) res.set( elem.setting_name, - elem.min_value, - elem.max_value, + elem.min_value ? *elem.min_value : Field{}, + elem.max_value ? *elem.max_value : Field{}, elem.writability ? *elem.writability : SettingConstraintWritability::WRITABLE); return res; } @@ -240,8 +240,8 @@ bool SettingsProfileElements::isBackupAllowed() const { for (const auto & setting : *this) { - if (isAllowBackupSetting(setting.setting_name)) - return static_cast(SettingFieldBool{setting.value}); + if (isAllowBackupSetting(setting.setting_name) && setting.value) + return static_cast(SettingFieldBool{*setting.value}); } return true; } diff --git a/src/Access/SettingsProfileElement.h b/src/Access/SettingsProfileElement.h index 7f9379c1e47..7078f565295 100644 --- a/src/Access/SettingsProfileElement.h +++ b/src/Access/SettingsProfileElement.h @@ -23,9 +23,9 @@ struct SettingsProfileElement std::optional parent_profile; String setting_name; - Field value; - Field min_value; - Field max_value; + std::optional value; + std::optional min_value; + std::optional max_value; std::optional writability; auto toTuple() const { return std::tie(parent_profile, setting_name, value, min_value, max_value, writability); } diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 562df61e8aa..df0e4584709 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -11,6 +11,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -49,7 +53,12 @@ namespace UUID generateID(const IAccessEntity & entity) { return generateID(entity.getType(), entity.getName()); } - UserPtr parseUser(const Poco::Util::AbstractConfiguration & config, const String & user_name, const std::unordered_set & allowed_profile_ids, bool allow_no_password, bool allow_plaintext_password) + UserPtr parseUser( + const Poco::Util::AbstractConfiguration & config, + const String & user_name, + const std::unordered_set & allowed_profile_ids, + bool allow_no_password, + bool allow_plaintext_password) { auto user = std::make_shared(); user->setName(user_name); @@ -207,42 +216,99 @@ namespace } } - /// By default all databases are accessible - /// and the user can grant everything he has. - user->access.grantWithGrantOption(AccessType::ALL); - - if (databases) + const auto grants_config = user_config + ".grants"; + std::optional grant_queries; + if (config.has(grants_config)) { - user->access.revoke(AccessFlags::allFlags() - AccessFlags::allGlobalFlags()); - user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG); - for (const String & database : *databases) - user->access.grantWithGrantOption(AccessFlags::allFlags(), database); - } - - if (dictionaries) - { - user->access.revoke(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG); - for (const String & dictionary : *dictionaries) - user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG, dictionary); + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(grants_config, keys); + grant_queries.emplace(); + grant_queries->reserve(keys.size()); + for (const auto & key : keys) + { + const auto query = config.getString(grants_config + "." + key); + grant_queries->push_back(query); + } } bool access_management = config.getBool(user_config + ".access_management", false); - if (!access_management) - { - user->access.revoke(AccessType::ACCESS_MANAGEMENT); - user->access.revokeGrantOption(AccessType::ALL); - } - bool named_collection_control = config.getBool(user_config + ".named_collection_control", false); - if (!named_collection_control) - { - user->access.revoke(AccessType::NAMED_COLLECTION_CONTROL); - } - bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false); - if (!show_named_collections_secrets) + + if (grant_queries) + if (databases || dictionaries || access_management || named_collection_control || show_named_collections_secrets) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Any other access control settings can't be specified with `grants`"); + + if (grant_queries) { - user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS_SECRETS); + ParserGrantQuery parser; + parser.parseWithoutGrantees(); + + for (const auto & string_query : *grant_queries) + { + String error_message; + const char * pos = string_query.data(); + auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, 0); + + if (!ast) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse grant query. Error: {}", error_message); + + auto & query = ast->as(); + + if (query.roles) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Roles can't be granted in config file"); + + if (!query.cluster.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Can't grant on cluster using config file"); + + if (query.grantees) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "You can't specify grantees in query using config file"); + + for (auto & element : query.access_rights_elements) + { + if (query.is_revoke) + user->access.revoke(element); + else + user->access.grant(element); + } + } + } + else + { + /// By default all databases are accessible + /// and the user can grant everything he has. + user->access.grantWithGrantOption(AccessType::ALL); + + if (databases) + { + user->access.revoke(AccessFlags::allFlags() - AccessFlags::allGlobalFlags()); + user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG); + for (const String & database : *databases) + user->access.grantWithGrantOption(AccessFlags::allFlags(), database); + } + + if (dictionaries) + { + user->access.revoke(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG); + for (const String & dictionary : *dictionaries) + user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG, dictionary); + } + + if (!access_management) + { + user->access.revoke(AccessType::ACCESS_MANAGEMENT); + user->access.revokeGrantOption(AccessType::ALL); + } + + if (!named_collection_control) + { + user->access.revoke(AccessType::NAMED_COLLECTION_CONTROL); + } + + if (!show_named_collections_secrets) + { + user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS_SECRETS); + } } String default_database = config.getString(user_config + ".default_database", ""); @@ -252,7 +318,11 @@ namespace } - std::vector parseUsers(const Poco::Util::AbstractConfiguration & config, const std::unordered_set & allowed_profile_ids, bool allow_no_password, bool allow_plaintext_password) + std::vector parseUsers( + const Poco::Util::AbstractConfiguration & config, + const std::unordered_set & allowed_profile_ids, + bool allow_no_password, + bool allow_plaintext_password) { Poco::Util::AbstractConfiguration::Keys user_names; config.keys("users", user_names); diff --git a/src/AggregateFunctions/AggregateFunctionAny.cpp b/src/AggregateFunctions/AggregateFunctionAny.cpp index 9bc6e6af14f..7f57062126b 100644 --- a/src/AggregateFunctions/AggregateFunctionAny.cpp +++ b/src/AggregateFunctions/AggregateFunctionAny.cpp @@ -14,11 +14,29 @@ AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); } +template +AggregateFunctionPtr createAggregateFunctionNullableAny( + const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) +{ + return AggregateFunctionPtr( + createAggregateFunctionSingleNullableValue( + name, argument_types, parameters, settings)); +} + AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) { return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); } +template +AggregateFunctionPtr createAggregateFunctionNullableAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) +{ + return AggregateFunctionPtr(createAggregateFunctionSingleNullableValue< + AggregateFunctionsSingleValue, + AggregateFunctionAnyLastData, + RespectNulls>(name, argument_types, parameters, settings)); +} + AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) { return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); @@ -38,9 +56,15 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory) factory.registerFunction("first_value", { createAggregateFunctionAny, properties }, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("first_value_respect_nulls", + { createAggregateFunctionNullableAny, properties }, + AggregateFunctionFactory::CaseInsensitive); factory.registerFunction("last_value", { createAggregateFunctionAnyLast, properties }, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("last_value_respect_nulls", + { createAggregateFunctionNullableAnyLast, properties }, + AggregateFunctionFactory::CaseInsensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h index 81ba298bb8a..480b4cc690e 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.h +++ b/src/AggregateFunctions/AggregateFunctionForEach.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 67cfa3f7356..15f500b8bb6 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -121,7 +121,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample( void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory) { - AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true }; + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; factory.registerFunction("groupArray", { createAggregateFunctionGroupArray, properties }); factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties }); diff --git a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h index 33a9966ee2c..5629de31c88 100644 --- a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h +++ b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h @@ -43,6 +43,7 @@ struct KolmogorovSmirnov : public StatisticalSample Float64 now_s = 0; UInt64 pos_x = 0; UInt64 pos_y = 0; + UInt64 pos_tmp; UInt64 n1 = x.size(); UInt64 n2 = y.size(); @@ -65,14 +66,22 @@ struct KolmogorovSmirnov : public StatisticalSample now_s -= n2_d; ++pos_y; } - max_s = std::max(max_s, now_s); - min_s = std::min(min_s, now_s); } else { - now_s += n1_d; - ++pos_x; + pos_tmp = pos_x + 1; + while (pos_tmp < x.size() && unlikely(fabs(x[pos_tmp] - x[pos_x]) <= tol)) + pos_tmp++; + now_s += n1_d * (pos_tmp - pos_x); + pos_x = pos_tmp; + pos_tmp = pos_y + 1; + while (pos_tmp < y.size() && unlikely(fabs(y[pos_tmp] - y[pos_y]) <= tol)) + pos_tmp++; + now_s -= n2_d * (pos_tmp - pos_y); + pos_y = pos_tmp; } + max_s = std::max(max_s, now_s); + min_s = std::min(min_s, now_s); } now_s += n1_d * (x.size() - pos_x) - n2_d * (y.size() - pos_y); min_s = std::min(min_s, now_s); diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 9c809352fd3..94c0d60be81 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -768,19 +769,23 @@ static_assert( /// For any other value types. +template struct SingleValueDataGeneric { private: using Self = SingleValueDataGeneric; Field value; + bool has_value = false; public: - static constexpr bool is_nullable = false; + static constexpr bool is_nullable = IS_NULLABLE; static constexpr bool is_any = false; bool has() const { + if constexpr (is_nullable) + return has_value; return !value.isNull(); } @@ -815,11 +820,15 @@ public: void change(const IColumn & column, size_t row_num, Arena *) { column.get(row_num, value); + if constexpr (is_nullable) + has_value = true; } void change(const Self & to, Arena *) { value = to.value; + if constexpr (is_nullable) + has_value = true; } bool changeFirstTime(const IColumn & column, size_t row_num, Arena * arena) @@ -835,7 +844,7 @@ public: bool changeFirstTime(const Self & to, Arena * arena) { - if (!has() && to.has()) + if (!has() && (is_nullable || to.has())) { change(to, arena); return true; @@ -870,27 +879,61 @@ public: } else { - Field new_value; - column.get(row_num, new_value); - if (new_value < value) + if constexpr (is_nullable) { - value = new_value; - return true; + Field new_value; + column.get(row_num, new_value); + if (!value.isNull() && (new_value.isNull() || new_value < value)) + { + value = new_value; + return true; + } + else + return false; } else - return false; + { + Field new_value; + column.get(row_num, new_value); + if (new_value < value) + { + value = new_value; + return true; + } + else + return false; + } } } bool changeIfLess(const Self & to, Arena * arena) { - if (to.has() && (!has() || to.value < value)) + if (!to.has()) + return false; + if constexpr (is_nullable) { - change(to, arena); - return true; + if (!has()) + { + change(to, arena); + return true; + } + if (to.value.isNull() || (!value.isNull() && to.value < value)) + { + value = to.value; + return true; + } + return false; } else - return false; + { + if (!has() || to.value < value) + { + change(to, arena); + return true; + } + else + return false; + } } bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena) @@ -902,27 +945,55 @@ public: } else { - Field new_value; - column.get(row_num, new_value); - if (new_value > value) + if constexpr (is_nullable) { - value = new_value; - return true; + Field new_value; + column.get(row_num, new_value); + if (!value.isNull() && (new_value.isNull() || value < new_value)) + { + value = new_value; + return true; + } + return false; } else - return false; + { + Field new_value; + column.get(row_num, new_value); + if (new_value > value) + { + value = new_value; + return true; + } + else + return false; + } } } bool changeIfGreater(const Self & to, Arena * arena) { - if (to.has() && (!has() || to.value > value)) + if (!to.has()) + return false; + if constexpr (is_nullable) { - change(to, arena); - return true; + if (!value.isNull() && (to.value.isNull() || value < to.value)) + { + value = to.value; + return true; + } + return false; } else - return false; + { + if (!has() || to.value > value) + { + change(to, arena); + return true; + } + else + return false; + } } bool isEqualTo(const IColumn & column, size_t row_num) const @@ -1359,6 +1430,17 @@ public: this->data(place).insertResultInto(to); } + AggregateFunctionPtr getOwnNullAdapter( + const AggregateFunctionPtr & nested_function, + const DataTypes & /*arguments*/, + const Array & /*params*/, + const AggregateFunctionProperties & /*properties*/) const override + { + if (Data::is_nullable) + return nested_function; + return nullptr; + } + #if USE_EMBEDDED_COMPILER bool isCompilable() const override diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp index 19c66db98cd..3d3d7af3026 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -72,7 +72,7 @@ public: { /// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64. if (properties.returns_default_when_only_null) - return std::make_shared(arguments, params, nested_function->getResultType()); + return std::make_shared(arguments, params, std::make_shared()); else return std::make_shared(arguments, params, std::make_shared(std::make_shared())); } diff --git a/src/AggregateFunctions/HelpersMinMaxAny.h b/src/AggregateFunctions/HelpersMinMaxAny.h index 026a206b109..31ae5fdd59a 100644 --- a/src/AggregateFunctions/HelpersMinMaxAny.h +++ b/src/AggregateFunctions/HelpersMinMaxAny.h @@ -9,7 +9,6 @@ #include #include - namespace DB { struct Settings; @@ -22,7 +21,6 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na assertUnary(name, argument_types); const DataTypePtr & argument_type = argument_types[0]; - WhichDataType which(argument_type); #define DISPATCH(TYPE) \ if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate>>(argument_type); /// NOLINT @@ -46,7 +44,28 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na if (which.idx == TypeIndex::String) return new AggregateFunctionTemplate>(argument_type); - return new AggregateFunctionTemplate>(argument_type); + return new AggregateFunctionTemplate>>(argument_type); +} + +template