Merge branch 'master' into async-loader-integration

This commit is contained in:
serxa 2023-10-24 10:09:49 +00:00
commit 749cdb9954
445 changed files with 11828 additions and 4380 deletions

View File

@ -7,6 +7,8 @@ assignees: ''
---
> Please make sure that the version you're using is still supported (you can find the list [here](https://github.com/ClickHouse/ClickHouse/blob/master/SECURITY.md#scope-and-supported-versions)).
> You have to provide the following information whenever possible.
**Describe what's wrong**

View File

@ -77,6 +77,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -185,6 +186,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -227,6 +229,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -399,6 +402,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -448,6 +452,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -487,6 +492,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -18,6 +18,7 @@ on: # yamllint disable-line rule:truthy
- 'docs/**'
- 'utils/check-style/aspell-ignore/**'
- 'tests/ci/docs_check.py'
- '.github/workflows/docs_check.yml'
jobs:
CheckLabels:
runs-on: [self-hosted, style-checker]
@ -73,6 +74,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:

View File

@ -24,6 +24,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0
filter: tree:0
- name: Jepsen Test
run: |
sudo rm -fr "$TEMP_PATH"
@ -53,6 +54,7 @@ jobs:
# with:
# clear-repository: true
# fetch-depth: 0
# filter: tree:0
# - name: Jepsen Test
# run: |
# sudo rm -fr "$TEMP_PATH"

View File

@ -61,6 +61,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -200,6 +201,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -242,6 +244,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -283,6 +286,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -581,6 +585,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -630,6 +635,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -672,6 +678,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -714,6 +721,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -763,6 +771,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -805,6 +814,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -847,6 +857,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -889,6 +900,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -931,6 +943,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -963,6 +976,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -54,6 +54,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -90,6 +91,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
filter: tree:0
submodules: true
- name: Set up JDK 11
uses: actions/setup-java@v1

View File

@ -18,6 +18,7 @@ on: # yamllint disable-line rule:truthy
- 'docs/**'
- 'utils/check-style/aspell-ignore/**'
- 'tests/ci/docs_check.py'
- '.github/workflows/docs_check.yml'
##########################################################################################
##################################### SMALL CHECKS #######################################
##########################################################################################
@ -94,6 +95,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -266,6 +268,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # for performance artifact
filter: tree:0
submodules: true
- name: Build
run: |
@ -350,6 +353,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # for performance artifact
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -1021,6 +1025,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -49,6 +49,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # otherwise we will have no version info
filter: tree:0
ref: ${{ env.GITHUB_TAG }}
- name: Check docker clickhouse/clickhouse-server building
run: |

View File

@ -53,6 +53,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -161,6 +162,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -203,6 +205,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -456,6 +459,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -505,6 +509,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -544,6 +549,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -38,6 +38,7 @@ jobs:
with:
ref: master
fetch-depth: 0
filter: tree:0
- name: Update versions, docker version, changelog, security
env:
GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}

2
.gitmodules vendored
View File

@ -184,7 +184,7 @@
url = https://github.com/ClickHouse/nanodbc
[submodule "contrib/datasketches-cpp"]
path = contrib/datasketches-cpp
url = https://github.com/ClickHouse/datasketches-cpp
url = https://github.com/apache/datasketches-cpp
[submodule "contrib/yaml-cpp"]
path = contrib/yaml-cpp
url = https://github.com/ClickHouse/yaml-cpp

View File

@ -88,7 +88,6 @@ if (ENABLE_FUZZING)
set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF)
set (ENABLE_LIBRARIES 0)
set (ENABLE_SSL 1)
set (ENABLE_EMBEDDED_COMPILER 0)
set (ENABLE_EXAMPLES 0)
set (ENABLE_UTILS 0)
set (ENABLE_THINLTO 0)

View File

@ -131,29 +131,3 @@ void sort(RandomIt first, RandomIt last)
using comparator = std::less<value_type>;
::sort(first, last, comparator());
}
/** Try to fast sort elements for common sorting patterns:
* 1. If elements are already sorted.
* 2. If elements are already almost sorted.
* 3. If elements are already sorted in reverse order.
*
* Returns true if fast sort was performed or elements were already sorted, false otherwise.
*/
template <typename RandomIt, typename Compare>
bool trySort(RandomIt first, RandomIt last, Compare compare)
{
#ifndef NDEBUG
::shuffle(first, last);
#endif
ComparatorWrapper<Compare> compare_wrapper = compare;
return ::pdqsort_try_sort(first, last, compare_wrapper);
}
template <typename RandomIt>
bool trySort(RandomIt first, RandomIt last)
{
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
return ::trySort(first, last, comparator());
}

View File

@ -54,9 +54,6 @@ if (SANITIZE)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
# llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "")
else ()
message (FATAL_ERROR "Unknown sanitizer type: ${SANITIZE}")
endif ()

View File

@ -68,6 +68,7 @@ if (CMAKE_CROSSCOMPILING)
set (ENABLE_ORC OFF CACHE INTERNAL "")
set (ENABLE_GRPC OFF CACHE INTERNAL "")
set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "")
set (ENABLE_DWARF_PARSER OFF CACHE INTERNAL "")
else ()
message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
endif ()

2
contrib/avro vendored

@ -1 +1 @@
Subproject commit 7832659ec986075d560f930c288e973c64679552
Subproject commit 2fb8a8a6ec0eab9109b68abf3b4857e8c476b918

@ -1 +1 @@
Subproject commit 7abd49bb2e72bf9a5029993d31dcb1872da88292
Subproject commit c3abaaefe5fa400eed99e082af07c1b61a7144db

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit 3f975ecab377cd5f739af780566596128f17bb74
Subproject commit 80e8fd63fef4a8d35c1abe612854f238eadadf0a

View File

@ -1,12 +1,17 @@
if (APPLE OR SANITIZE STREQUAL "undefined" OR SANITIZE STREQUAL "memory")
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
# llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
set (ENABLE_DWARF_PARSER_DEFAULT OFF)
else()
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ${ENABLE_LIBRARIES})
set (ENABLE_DWARF_PARSER_DEFAULT ${ENABLE_LIBRARIES})
endif()
option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during query execution (uses LLVM library)" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
if (NOT ENABLE_EMBEDDED_COMPILER)
option (ENABLE_DWARF_PARSER "Enable support for DWARF input format (uses LLVM library)" ${ENABLE_DWARF_PARSER_DEFAULT})
if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER)
message(STATUS "Not using LLVM")
return()
endif()

View File

@ -54,10 +54,8 @@ namespace pdqsort_detail {
block_size = 64,
// Cacheline size, assumes power of two.
cacheline_size = 64,
cacheline_size = 64
/// Try sort allowed iterations
try_sort_iterations = 3,
};
#if __cplusplus >= 201103L
@ -503,167 +501,6 @@ namespace pdqsort_detail {
leftmost = false;
}
}
template<class Iter, class Compare, bool Branchless>
inline bool pdqsort_try_sort_loop(Iter begin,
Iter end,
Compare comp,
size_t bad_allowed,
size_t iterations_allowed,
bool force_sort = false,
bool leftmost = true) {
typedef typename std::iterator_traits<Iter>::difference_type diff_t;
// Use a while loop for tail recursion elimination.
while (true) {
if (!force_sort && iterations_allowed == 0) {
return false;
}
diff_t size = end - begin;
// Insertion sort is faster for small arrays.
if (size < insertion_sort_threshold) {
if (leftmost) insertion_sort(begin, end, comp);
else unguarded_insertion_sort(begin, end, comp);
return true;
}
// Choose pivot as median of 3 or pseudomedian of 9.
diff_t s2 = size / 2;
if (size > ninther_threshold) {
sort3(begin, begin + s2, end - 1, comp);
sort3(begin + 1, begin + (s2 - 1), end - 2, comp);
sort3(begin + 2, begin + (s2 + 1), end - 3, comp);
sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp);
std::iter_swap(begin, begin + s2);
} else sort3(begin + s2, begin, end - 1, comp);
// If *(begin - 1) is the end of the right partition of a previous partition operation
// there is no element in [begin, end) that is smaller than *(begin - 1). Then if our
// pivot compares equal to *(begin - 1) we change strategy, putting equal elements in
// the left partition, greater elements in the right partition. We do not have to
// recurse on the left partition, since it's sorted (all equal).
if (!leftmost && !comp(*(begin - 1), *begin)) {
begin = partition_left(begin, end, comp) + 1;
continue;
}
// Partition and get results.
std::pair<Iter, bool> part_result =
Branchless ? partition_right_branchless(begin, end, comp)
: partition_right(begin, end, comp);
Iter pivot_pos = part_result.first;
bool already_partitioned = part_result.second;
// Check for a highly unbalanced partition.
diff_t l_size = pivot_pos - begin;
diff_t r_size = end - (pivot_pos + 1);
bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;
// If we got a highly unbalanced partition we shuffle elements to break many patterns.
if (highly_unbalanced) {
if (!force_sort) {
return false;
}
// If we had too many bad partitions, switch to heapsort to guarantee O(n log n).
if (--bad_allowed == 0) {
std::make_heap(begin, end, comp);
std::sort_heap(begin, end, comp);
return true;
}
if (l_size >= insertion_sort_threshold) {
std::iter_swap(begin, begin + l_size / 4);
std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);
if (l_size > ninther_threshold) {
std::iter_swap(begin + 1, begin + (l_size / 4 + 1));
std::iter_swap(begin + 2, begin + (l_size / 4 + 2));
std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1));
std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
}
}
if (r_size >= insertion_sort_threshold) {
std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4));
std::iter_swap(end - 1, end - r_size / 4);
if (r_size > ninther_threshold) {
std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4));
std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4));
std::iter_swap(end - 2, end - (1 + r_size / 4));
std::iter_swap(end - 3, end - (2 + r_size / 4));
}
}
} else {
// If we were decently balanced and we tried to sort an already partitioned
// sequence try to use insertion sort.
if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
&& partial_insertion_sort(pivot_pos + 1, end, comp)) {
return true;
}
}
// Sort the left partition first using recursion and do tail recursion elimination for
// the right-hand partition.
if (pdqsort_try_sort_loop<Iter, Compare, Branchless>(begin,
pivot_pos,
comp,
bad_allowed,
iterations_allowed - 1,
force_sort,
leftmost)) {
force_sort = true;
} else {
return false;
}
--iterations_allowed;
begin = pivot_pos + 1;
leftmost = false;
}
return false;
}
template<class Iter, class Compare, bool Branchless>
inline bool pdqsort_try_sort_impl(Iter begin, Iter end, Compare comp, size_t bad_allowed)
{
typedef typename std::iterator_traits<Iter>::difference_type diff_t;
static constexpr size_t iterations_allowed = pdqsort_detail::try_sort_iterations;
static constexpr size_t num_to_try = 16;
diff_t size = end - begin;
if (size > num_to_try * 10)
{
size_t out_of_order_elements = 0;
for (size_t i = 1; i < num_to_try; ++i)
{
diff_t offset = size / num_to_try;
diff_t prev_position = offset * (i - 1);
diff_t curr_position = offset * i;
diff_t next_position = offset * (i + 1) - 1;
bool prev_less_than_curr = comp(*(begin + prev_position), *(begin + curr_position));
bool curr_less_than_next = comp(*(begin + curr_position), *(begin + next_position));
if ((prev_less_than_curr && curr_less_than_next) || (!prev_less_than_curr && !curr_less_than_next))
continue;
++out_of_order_elements;
if (out_of_order_elements > iterations_allowed)
return false;
}
}
return pdqsort_try_sort_loop<Iter, Compare, Branchless>(begin, end, comp, bad_allowed, iterations_allowed);
}
}
@ -701,41 +538,6 @@ inline void pdqsort_branchless(Iter begin, Iter end) {
pdqsort_branchless(begin, end, std::less<T>());
}
template<class Iter, class Compare>
inline bool pdqsort_try_sort(Iter begin, Iter end, Compare comp) {
if (begin == end) return true;
#if __cplusplus >= 201103L
return pdqsort_detail::pdqsort_try_sort_impl<Iter, Compare,
pdqsort_detail::is_default_compare<typename std::decay<Compare>::type>::value &&
std::is_arithmetic<typename std::iterator_traits<Iter>::value_type>::value>(
begin, end, comp, pdqsort_detail::log2(end - begin));
#else
return pdqsort_detail::pdqsort_try_sort_impl<Iter, Compare, false>(
begin, end, comp, pdqsort_detail::log2(end - begin));
#endif
}
template<class Iter>
inline bool pdqsort_try_sort(Iter begin, Iter end) {
typedef typename std::iterator_traits<Iter>::value_type T;
return pdqsort_try_sort(begin, end, std::less<T>());
}
template<class Iter, class Compare>
inline bool pdqsort_try_sort_branchless(Iter begin, Iter end, Compare comp) {
if (begin == end) return true;
return pdqsort_detail::pdqsort_try_sort_impl<Iter, Compare, true>(
begin, end, comp, pdqsort_detail::log2(end - begin));
}
template<class Iter>
inline bool pdqsort_try_sort_branchless(Iter begin, Iter end) {
typedef typename std::iterator_traits<Iter>::value_type T;
return pdqsort_try_sort_branchless(begin, end, std::less<T>());
}
#undef PDQSORT_PREFER_MOVE

View File

@ -24,6 +24,12 @@ else ()
set (SNAPPY_HAVE_SSSE3 0)
endif ()
if (ARCH_AMD64 AND ENABLE_SSE42)
set (SNAPPY_HAVE_X86_CRC32 1)
else ()
set (SNAPPY_HAVE_X86_CRC32 0)
endif ()
configure_file(
"${SOURCE_DIR}/cmake/config.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/config.h")

View File

@ -2,8 +2,8 @@
# If the image is built from Dockerfile.alpine, then the `-alpine` suffix is added automatically,
# so the only purpose of Dockerfile.ubuntu is to push `latest`, `head` and so on w/o suffixes
FROM ubuntu:20.04 AS glibc-donor
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
@ -31,8 +31,10 @@ RUN arch=${TARGETARCH:-amd64} \
arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
ARG VERSION="23.9.1.1854"
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.9.2.56"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.
@ -46,16 +48,14 @@ ARG PACKAGES="clickhouse-keeper"
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& for package in ${PACKAGES}; do \
{ \
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
} || \
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
} ; \
} || exit 1 \
( \
cd /tmp \
&& echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz.sha512" \
&& sed 's:/output/:/tmp/:' < "${package}-${VERSION}-${arch}.tgz.sha512" | sha512sum -c \
&& tar xvzf "${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / \
) \
; done \
&& rm /tmp/*.tgz /install -r \
&& addgroup -S -g 101 clickhouse \

View File

@ -172,10 +172,15 @@ then
# This is why we add this repository snapshot from CI to the performance test
# package.
mkdir "$PERF_OUTPUT"/ch
git -C "$PERF_OUTPUT"/ch init --bare
git -C "$PERF_OUTPUT"/ch remote add origin /build
git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin HEAD:pr
git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin master:master
# Copy .git only, but skip modules, using tar
tar c -C /build/ --exclude='.git/modules/**' .git | tar x -C "$PERF_OUTPUT"/ch
# Create branch pr and origin/master to have them for the following performance comparison
git -C "$PERF_OUTPUT"/ch branch pr
git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin master:origin/master
# Clean remote, to not have it stale
git -C "$PERF_OUTPUT"/ch remote | xargs -n1 git -C "$PERF_OUTPUT"/ch remote remove
# And clean all tags
git -C "$PERF_OUTPUT"/ch tag | xargs git -C "$PERF_OUTPUT"/ch tag -d
git -C "$PERF_OUTPUT"/ch reset --soft pr
git -C "$PERF_OUTPUT"/ch log -5
(

View File

@ -23,7 +23,6 @@ COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
@ -33,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.9.1.1854"
ARG VERSION="23.9.2.56"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.
@ -45,16 +44,14 @@ ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
RUN arch=${TARGETARCH:-amd64} \
&& for package in ${PACKAGES}; do \
{ \
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
} || \
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
} ; \
} || exit 1 \
( \
cd /tmp \
&& echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz.sha512" \
&& sed 's:/output/:/tmp/:' < "${package}-${VERSION}-${arch}.tgz.sha512" | sha512sum -c \
&& tar xvzf "${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / \
) \
; done \
&& rm /tmp/*.tgz /install -r \
&& addgroup -S -g 101 clickhouse \

View File

@ -5,6 +5,13 @@ ARG DEBIAN_FRONTEND=noninteractive
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list \
&& groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
@ -23,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.9.1.1854"
ARG VERSION="23.9.2.56"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image
@ -35,13 +42,6 @@ ARG deb_location_url=""
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
ARG single_binary_location_url=""
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
ARG TARGETARCH
# install from a web location with deb packages

View File

@ -104,66 +104,76 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
EOT
fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port --try)"
HTTPS_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=https_port --try)"
# checking $DATA_DIR for initialization
if [ -d "${DATA_DIR%/}/data" ]; then
DATABASE_ALREADY_EXISTS='true'
fi
if [ -n "$HTTP_PORT" ]; then
URL="http://127.0.0.1:$HTTP_PORT/ping"
else
URL="https://127.0.0.1:$HTTPS_PORT/ping"
fi
# only run initialization on an empty data directory
if [ -z "${DATABASE_ALREADY_EXISTS}" ]; then
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port --try)"
HTTPS_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=https_port --try)"
# Listen only on localhost until the initialization is done
/usr/bin/clickhouse su "${USER}:${GROUP}" /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
pid="$!"
if [ -n "$HTTP_PORT" ]; then
URL="http://127.0.0.1:$HTTP_PORT/ping"
else
URL="https://127.0.0.1:$HTTPS_PORT/ping"
fi
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 1000 retries by default, with 1 sec timeout and 1 sec delay between retries)
tries=${CLICKHOUSE_INIT_TIMEOUT:-1000}
while ! wget --spider --no-check-certificate -T 1 -q "$URL" 2>/dev/null; do
if [ "$tries" -le "0" ]; then
echo >&2 'ClickHouse init process failed.'
# Listen only on localhost until the initialization is done
/usr/bin/clickhouse su "${USER}:${GROUP}" /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
pid="$!"
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 1000 retries by default, with 1 sec timeout and 1 sec delay between retries)
tries=${CLICKHOUSE_INIT_TIMEOUT:-1000}
while ! wget --spider --no-check-certificate -T 1 -q "$URL" 2>/dev/null; do
if [ "$tries" -le "0" ]; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi
tries=$(( tries-1 ))
sleep 1
done
clickhouseclient=( clickhouse-client --multiquery --host "127.0.0.1" -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
echo
# create default database, if defined
if [ -n "$CLICKHOUSE_DB" ]; then
echo "$0: create database '$CLICKHOUSE_DB'"
"${clickhouseclient[@]}" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
fi
for f in /docker-entrypoint-initdb.d/*; do
case "$f" in
*.sh)
if [ -x "$f" ]; then
echo "$0: running $f"
"$f"
else
echo "$0: sourcing $f"
# shellcheck source=/dev/null
. "$f"
fi
;;
*.sql) echo "$0: running $f"; "${clickhouseclient[@]}" < "$f" ; echo ;;
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
*) echo "$0: ignoring $f" ;;
esac
echo
done
if ! kill -s TERM "$pid" || ! wait "$pid"; then
echo >&2 'Finishing of ClickHouse init process failed.'
exit 1
fi
tries=$(( tries-1 ))
sleep 1
done
clickhouseclient=( clickhouse-client --multiquery --host "127.0.0.1" -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
echo
# create default database, if defined
if [ -n "$CLICKHOUSE_DB" ]; then
echo "$0: create database '$CLICKHOUSE_DB'"
"${clickhouseclient[@]}" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
fi
for f in /docker-entrypoint-initdb.d/*; do
case "$f" in
*.sh)
if [ -x "$f" ]; then
echo "$0: running $f"
"$f"
else
echo "$0: sourcing $f"
# shellcheck source=/dev/null
. "$f"
fi
;;
*.sql) echo "$0: running $f"; "${clickhouseclient[@]}" < "$f" ; echo ;;
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
*) echo "$0: ignoring $f" ;;
esac
echo
done
if ! kill -s TERM "$pid" || ! wait "$pid"; then
echo >&2 'Finishing of ClickHouse init process failed.'
exit 1
fi
else
echo "ClickHouse Database directory appears to contain a database; Skipping initialization"
fi
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments

View File

@ -175,7 +175,6 @@ function run_cmake
"-DENABLE_LIBRARIES=0"
"-DENABLE_TESTS=0"
"-DENABLE_UTILS=0"
"-DENABLE_EMBEDDED_COMPILER=0"
"-DENABLE_THINLTO=0"
"-DENABLE_NURAFT=1"
"-DENABLE_SIMDJSON=1"

View File

@ -337,8 +337,8 @@ quit
# which is confusing.
task_exit_code=$fuzzer_exit_code
echo "failure" > status.txt
{ rg --text -o "Found error:.*" fuzzer.log \
|| rg --text -ao "Exception:.*" fuzzer.log \
{ rg -ao "Found error:.*" fuzzer.log \
|| rg -ao "Exception:.*" fuzzer.log \
|| echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \
| tail -1 > description.txt
fi

View File

@ -61,11 +61,13 @@ function configure()
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
}
# Randomize all Keeper feature flags
randomize_config_boolean_value filtered_list
randomize_config_boolean_value multi_read
randomize_config_boolean_value check_not_exists
randomize_config_boolean_value create_if_not_exists
if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then
# Randomize all Keeper feature flags
randomize_config_boolean_value filtered_list
randomize_config_boolean_value multi_read
randomize_config_boolean_value check_not_exists
randomize_config_boolean_value create_if_not_exists
fi
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml

View File

@ -0,0 +1,52 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.3.14.78-lts (c8f4ba52c65) FIXME as compared to v23.3.13.6-lts (25635e27551)
#### Build/Testing/Packaging Improvement
* Backported in [#54312](https://github.com/ClickHouse/ClickHouse/issues/54312): Fix strange additional QEMU logging after [#47151](https://github.com/ClickHouse/ClickHouse/issues/47151), see https://s3.amazonaws.com/clickhouse-test-reports/50078/a4743996ee4f3583884d07bcd6501df0cfdaa346/stateless_tests__release__databasereplicated__[3_4].html. [#50442](https://github.com/ClickHouse/ClickHouse/pull/50442) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#55391](https://github.com/ClickHouse/ClickHouse/issues/55391): Resource with source code including submodules is built in Darwin special build task. It may be used to build ClickHouse without checkouting submodules. [#51435](https://github.com/ClickHouse/ClickHouse/pull/51435) ([Ilya Yatsishin](https://github.com/qoega)).
* Backported in [#54703](https://github.com/ClickHouse/ClickHouse/issues/54703): Enrich `changed_images.json` with the latest tag from master for images that are not changed in the pull request. [#54369](https://github.com/ClickHouse/ClickHouse/pull/54369) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#54679](https://github.com/ClickHouse/ClickHouse/issues/54679): We build and upload them for every push, which isn't worth it. [#54675](https://github.com/ClickHouse/ClickHouse/pull/54675) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)).
* Fix sorting of sparse columns with large limit [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)).
* Fix adding sub-second intervals to DateTime [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix: allow IPv6 for bloom filter [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Check for overflow before addition in `analysisOfVariance` function [#54385](https://github.com/ClickHouse/ClickHouse/pull/54385) ([Antonio Andelic](https://github.com/antonio2368)).
* reproduce and fix the bug in removeSharedRecursive [#54430](https://github.com/ClickHouse/ClickHouse/pull/54430) ([Sema Checherinda](https://github.com/CheSema)).
* Fix aggregate projections with normalized states [#54480](https://github.com/ClickHouse/ClickHouse/pull/54480) ([Amos Bird](https://github.com/amosbird)).
* Fix possible parsing error in WithNames formats with disabled input_format_with_names_use_header [#54513](https://github.com/ClickHouse/ClickHouse/pull/54513) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix "Invalid number of rows in Chunk" in MaterializedPostgreSQL [#54844](https://github.com/ClickHouse/ClickHouse/pull/54844) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Prevent attaching parts from tables with different projections or indices [#55062](https://github.com/ClickHouse/ClickHouse/pull/55062) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix trash optimization (up to a certain extent) [#55353](https://github.com/ClickHouse/ClickHouse/pull/55353) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix parsing of arrays in cast operator [#55417](https://github.com/ClickHouse/ClickHouse/pull/55417) ([Anton Popov](https://github.com/CurtizJ)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Backport [#54430](https://github.com/ClickHouse/ClickHouse/issues/54430) to 23.3: reproduce and fix the bug in removeSharedRecursive"'. [#54731](https://github.com/ClickHouse/ClickHouse/pull/54731) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Test libunwind changes. [#51436](https://github.com/ClickHouse/ClickHouse/pull/51436) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Refactor CI_CONFIG [#52948](https://github.com/ClickHouse/ClickHouse/pull/52948) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Use pathlib.Path in S3Helper, rewrite build reports, improve small things [#54010](https://github.com/ClickHouse/ClickHouse/pull/54010) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fixed parameterized_view test after backporting a fix 23.3 [#54401](https://github.com/ClickHouse/ClickHouse/pull/54401) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Update automated commit status comment [#54441](https://github.com/ClickHouse/ClickHouse/pull/54441) ([vdimir](https://github.com/vdimir)).
* S3 artifacts [#54504](https://github.com/ClickHouse/ClickHouse/pull/54504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix CI skip build and skip tests checks [#54532](https://github.com/ClickHouse/ClickHouse/pull/54532) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Update WebObjectStorage.cpp [#54695](https://github.com/ClickHouse/ClickHouse/pull/54695) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Do not set PR status label [#54799](https://github.com/ClickHouse/ClickHouse/pull/54799) ([vdimir](https://github.com/vdimir)).
* Get rid of the most of `os.path` stuff [#55028](https://github.com/ClickHouse/ClickHouse/pull/55028) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* check if block is empty after async insert retries [#55143](https://github.com/ClickHouse/ClickHouse/pull/55143) ([Han Fei](https://github.com/hanfei1991)).
* MaterializedPostgreSQL: remove back check [#55297](https://github.com/ClickHouse/ClickHouse/pull/55297) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Remove existing moving/ dir if allow_remove_stale_moving_parts is off [#55480](https://github.com/ClickHouse/ClickHouse/pull/55480) ([Mike Kot](https://github.com/myrrc)).

View File

@ -0,0 +1,51 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.8.4.69-lts (d4d1e7b9ded) FIXME as compared to v23.8.3.48-lts (ebe4eb3d23e)
#### Build/Testing/Packaging Improvement
* Backported in [#55673](https://github.com/ClickHouse/ClickHouse/issues/55673): If the database is already initialized, it doesn't need to be initialized again upon subsequent launches. This can potentially fix the issue of infinite container restarts when the database fails to load within 1000 attempts (relevant for very large databases and multi-node setups). [#50724](https://github.com/ClickHouse/ClickHouse/pull/50724) ([Alexander Nikolaev](https://github.com/AlexNik)).
* Backported in [#55293](https://github.com/ClickHouse/ClickHouse/issues/55293): Resource with source code including submodules is built in Darwin special build task. It may be used to build ClickHouse without checkouting submodules. [#51435](https://github.com/ClickHouse/ClickHouse/pull/51435) ([Ilya Yatsishin](https://github.com/qoega)).
* Backported in [#55366](https://github.com/ClickHouse/ClickHouse/issues/55366): Solve issue with launching standalone clickhouse-keeper from clickhouse-server package. [#55226](https://github.com/ClickHouse/ClickHouse/pull/55226) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#55725](https://github.com/ClickHouse/ClickHouse/issues/55725): Fix integration check python script to use gh api url - Add Readme for CI tests. [#55716](https://github.com/ClickHouse/ClickHouse/pull/55716) ([Max K.](https://github.com/mkaynov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix "Invalid number of rows in Chunk" in MaterializedPostgreSQL [#54844](https://github.com/ClickHouse/ClickHouse/pull/54844) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Move obsolete format settings to separate section [#54855](https://github.com/ClickHouse/ClickHouse/pull/54855) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix: insert quorum w/o keeper retries [#55026](https://github.com/ClickHouse/ClickHouse/pull/55026) ([Igor Nikonov](https://github.com/devcrafter)).
* Prevent attaching parts from tables with different projections or indices [#55062](https://github.com/ClickHouse/ClickHouse/pull/55062) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Proper cleanup in case of exception in ctor of ShellCommandSource [#55103](https://github.com/ClickHouse/ClickHouse/pull/55103) ([Alexander Gololobov](https://github.com/davenger)).
* Fix deadlock in LDAP assigned role update [#55119](https://github.com/ClickHouse/ClickHouse/pull/55119) ([Julian Maicher](https://github.com/jmaicher)).
* Fix for background download in fs cache [#55252](https://github.com/ClickHouse/ClickHouse/pull/55252) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix functions execution over sparse columns [#55275](https://github.com/ClickHouse/ClickHouse/pull/55275) ([Azat Khuzhin](https://github.com/azat)).
* Fix bug with inability to drop detached partition in replicated merge tree on top of S3 without zero copy [#55309](https://github.com/ClickHouse/ClickHouse/pull/55309) ([alesapin](https://github.com/alesapin)).
* Fix trash optimization (up to a certain extent) [#55353](https://github.com/ClickHouse/ClickHouse/pull/55353) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix parsing of arrays in cast operator [#55417](https://github.com/ClickHouse/ClickHouse/pull/55417) ([Anton Popov](https://github.com/CurtizJ)).
* Fix filtering by virtual columns with OR filter in query [#55418](https://github.com/ClickHouse/ClickHouse/pull/55418) ([Azat Khuzhin](https://github.com/azat)).
* Fix MongoDB connection issues [#55419](https://github.com/ClickHouse/ClickHouse/pull/55419) ([Nikolay Degterinsky](https://github.com/evillique)).
* Destroy fiber in case of exception in cancelBefore in AsyncTaskExecutor [#55516](https://github.com/ClickHouse/ClickHouse/pull/55516) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash in QueryNormalizer with cyclic aliases [#55602](https://github.com/ClickHouse/ClickHouse/pull/55602) ([vdimir](https://github.com/vdimir)).
* Fix filtering by virtual columns with OR filter in query (resubmit) [#55678](https://github.com/ClickHouse/ClickHouse/pull/55678) ([Azat Khuzhin](https://github.com/azat)).
#### NO CL CATEGORY
* Backported in [#55706](https://github.com/ClickHouse/ClickHouse/issues/55706):. [#55657](https://github.com/ClickHouse/ClickHouse/pull/55657) ([Antonio Andelic](https://github.com/antonio2368)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* S3 artifacts [#54504](https://github.com/ClickHouse/ClickHouse/pull/54504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix CI skip build and skip tests checks [#54532](https://github.com/ClickHouse/ClickHouse/pull/54532) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Update WebObjectStorage.cpp [#54695](https://github.com/ClickHouse/ClickHouse/pull/54695) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Do not set PR status label [#54799](https://github.com/ClickHouse/ClickHouse/pull/54799) ([vdimir](https://github.com/vdimir)).
* Get rid of the most of `os.path` stuff [#55028](https://github.com/ClickHouse/ClickHouse/pull/55028) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Clean data dir and always start an old server version in aggregate functions compatibility test. [#55105](https://github.com/ClickHouse/ClickHouse/pull/55105) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* check if block is empty after async insert retries [#55143](https://github.com/ClickHouse/ClickHouse/pull/55143) ([Han Fei](https://github.com/hanfei1991)).
* MaterializedPostgreSQL: remove back check [#55297](https://github.com/ClickHouse/ClickHouse/pull/55297) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Remove existing moving/ dir if allow_remove_stale_moving_parts is off [#55480](https://github.com/ClickHouse/ClickHouse/pull/55480) ([Mike Kot](https://github.com/myrrc)).
* Bump curl to 8.4 [#55492](https://github.com/ClickHouse/ClickHouse/pull/55492) ([Robert Schulze](https://github.com/rschu1ze)).

View File

@ -0,0 +1,46 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.9.2.56-stable (a1bf3f1de55) FIXME as compared to v23.9.1.1854-stable (8f9a227de1f)
#### Build/Testing/Packaging Improvement
* Backported in [#55295](https://github.com/ClickHouse/ClickHouse/issues/55295): Resource with source code including submodules is built in Darwin special build task. It may be used to build ClickHouse without checkouting submodules. [#51435](https://github.com/ClickHouse/ClickHouse/pull/51435) ([Ilya Yatsishin](https://github.com/qoega)).
* Backported in [#55368](https://github.com/ClickHouse/ClickHouse/issues/55368): Solve issue with launching standalone clickhouse-keeper from clickhouse-server package. [#55226](https://github.com/ClickHouse/ClickHouse/pull/55226) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#55727](https://github.com/ClickHouse/ClickHouse/issues/55727): Fix integration check python script to use gh api url - Add Readme for CI tests. [#55716](https://github.com/ClickHouse/ClickHouse/pull/55716) ([Max K.](https://github.com/mkaynov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix deadlock in LDAP assigned role update [#55119](https://github.com/ClickHouse/ClickHouse/pull/55119) ([Julian Maicher](https://github.com/jmaicher)).
* Fix for background download in fs cache [#55252](https://github.com/ClickHouse/ClickHouse/pull/55252) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix functions execution over sparse columns [#55275](https://github.com/ClickHouse/ClickHouse/pull/55275) ([Azat Khuzhin](https://github.com/azat)).
* Fix incorrect merging of Nested for SELECT FINAL FROM SummingMergeTree [#55276](https://github.com/ClickHouse/ClickHouse/pull/55276) ([Azat Khuzhin](https://github.com/azat)).
* Fix bug with inability to drop detached partition in replicated merge tree on top of S3 without zero copy [#55309](https://github.com/ClickHouse/ClickHouse/pull/55309) ([alesapin](https://github.com/alesapin)).
* Fix SIGSEGV in MergeSortingPartialResultTransform (due to zero chunks after remerge()) [#55335](https://github.com/ClickHouse/ClickHouse/pull/55335) ([Azat Khuzhin](https://github.com/azat)).
* Fix data-race in CreatingSetsTransform (on errors) due to throwing shared exception [#55338](https://github.com/ClickHouse/ClickHouse/pull/55338) ([Azat Khuzhin](https://github.com/azat)).
* Fix trash optimization (up to a certain extent) [#55353](https://github.com/ClickHouse/ClickHouse/pull/55353) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix parsing of arrays in cast operator [#55417](https://github.com/ClickHouse/ClickHouse/pull/55417) ([Anton Popov](https://github.com/CurtizJ)).
* Fix filtering by virtual columns with OR filter in query [#55418](https://github.com/ClickHouse/ClickHouse/pull/55418) ([Azat Khuzhin](https://github.com/azat)).
* Fix MongoDB connection issues [#55419](https://github.com/ClickHouse/ClickHouse/pull/55419) ([Nikolay Degterinsky](https://github.com/evillique)).
* Destroy fiber in case of exception in cancelBefore in AsyncTaskExecutor [#55516](https://github.com/ClickHouse/ClickHouse/pull/55516) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash in QueryNormalizer with cyclic aliases [#55602](https://github.com/ClickHouse/ClickHouse/pull/55602) ([vdimir](https://github.com/vdimir)).
* Fix filtering by virtual columns with OR filter in query (resubmit) [#55678](https://github.com/ClickHouse/ClickHouse/pull/55678) ([Azat Khuzhin](https://github.com/azat)).
#### NO CL CATEGORY
* Backported in [#55708](https://github.com/ClickHouse/ClickHouse/issues/55708):. [#55657](https://github.com/ClickHouse/ClickHouse/pull/55657) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#55691](https://github.com/ClickHouse/ClickHouse/issues/55691):. [#55682](https://github.com/ClickHouse/ClickHouse/pull/55682) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Add setting allow_experimental_partial_result [#54514](https://github.com/ClickHouse/ClickHouse/pull/54514) ([vdimir](https://github.com/vdimir)).
* Fix CI skip build and skip tests checks [#54532](https://github.com/ClickHouse/ClickHouse/pull/54532) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* check if block is empty after async insert retries [#55143](https://github.com/ClickHouse/ClickHouse/pull/55143) ([Han Fei](https://github.com/hanfei1991)).
* MaterializedPostgreSQL: remove back check [#55297](https://github.com/ClickHouse/ClickHouse/pull/55297) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Review [#51946](https://github.com/ClickHouse/ClickHouse/issues/51946) and partially revert it [#55336](https://github.com/ClickHouse/ClickHouse/pull/55336) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove existing moving/ dir if allow_remove_stale_moving_parts is off [#55480](https://github.com/ClickHouse/ClickHouse/pull/55480) ([Mike Kot](https://github.com/myrrc)).
* Bump curl to 8.4 [#55492](https://github.com/ClickHouse/ClickHouse/pull/55492) ([Robert Schulze](https://github.com/rschu1ze)).

View File

@ -67,6 +67,48 @@ This check means that the CI system started to process the pull request. When it
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
If it fails, fix the style errors following the [code style guide](style.md).
#### Running style check locally:
```sh
mkdir -p /tmp/test_output
# running all checks
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE clickhouse/style-test
# run specified check script (e.g.: ./check-mypy)
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE --entrypoint= -w/ClickHouse/utils/check-style clickhouse/style-test ./check-mypy
# find all style check scripts under the directory:
cd ./utils/check-style
# Check duplicate includes
./check-duplicate-includes.sh
# Check c++ formatiing
./check-style
# Check python formatting with black
./check-black
# Check python type hinting with mypy
./check-mypy
# Check code with codespell
./check-typos
# Check docs spelling
./check-doc-aspell
# Check whitespaces
./check-whitespaces
# Check github actions workflows
./check-workflows
# Check submodules
./check-submodules
# Check shell scripts with shellcheck
./shellcheck-run.sh
```
## Fast Test
Normally this is the first check that is ran for a PR. It builds ClickHouse and
@ -75,6 +117,15 @@ some. If it fails, further checks are not started until it is fixed. Look at
the report to see which tests fail, then reproduce the failure locally as
described [here](tests.md#functional-test-locally).
#### Running Fast Test locally:
```sh
mkdir -p /tmp/test_output
mkdir -p /tmp/fasttest-workspace
cd ClickHouse
# this docker command performs minimal ClickHouse build and run FastTests against it
docker run --rm --cap-add=SYS_PTRACE -u $(id -u ${USER}):$(id -g ${USER}) --network=host -e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output -e FASTTEST_SOURCE=/ClickHouse --cap-add=SYS_PTRACE -e stage=clone_submodules --volume=/tmp/fasttest-workspace:/fasttest-workspace --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output clickhouse/fasttest
```
#### Status Page Files
- `runlog.out.log` is the general log that includes all other logs.
@ -122,6 +173,13 @@ Builds ClickHouse in various configurations for use in further steps. You have t
## Special Build Check
Performs static analysis and code style checks using `clang-tidy`. The report is similar to the [build check](#build-check). Fix the errors found in the build log.
#### Running clang-tidy locally:
There is a convenience `packager` script that runs the clang-tidy build in docker
```sh
mkdir build_tidy
./docker/packager/packager --output-dir=./build_tidy --package-type=binary --compiler=clang-17 --debug-build --clang-tidy
```
## Functional Stateless Tests
Runs [stateless functional tests](tests.md#functional-tests) for ClickHouse

View File

@ -282,7 +282,7 @@ Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHous
If you are not interested in functionality provided by third-party libraries, you can further speed up the build using `cmake` options
```
-DENABLE_LIBRARIES=0 -DENABLE_EMBEDDED_COMPILER=0
-DENABLE_LIBRARIES=0
```
In case of problems with any of the development options, you are on your own!

View File

@ -197,6 +197,11 @@ Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.htm
ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = <new_size>;
```
### `materialized_postgresql_use_unique_replication_consumer_identifier` {#materialized_postgresql_use_unique_replication_consumer_identifier}
Use a unique replication consumer identifier for replication. Default: `0`.
If set to `1`, allows to setup several `MaterializedPostgreSQL` tables pointing to the same `PostgreSQL` table.
## Notes {#notes}
### Failover of the logical replication slot {#logical-replication-slot-failover}

View File

@ -24,12 +24,15 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32)
[after_processing = 'keep',]
[keeper_path = '',]
[s3queue_loading_retries = 0,]
[s3queue_processing_threads_num = 1,]
[s3queue_enable_logging_to_s3queue_log = 0,]
[s3queue_polling_min_timeout_ms = 1000,]
[s3queue_polling_max_timeout_ms = 10000,]
[s3queue_polling_backoff_ms = 0,]
[s3queue_tracked_files_limit = 1000,]
[s3queue_tracked_file_ttl_sec = 0,]
[s3queue_polling_size = 50,]
[s3queue_tracked_files_limit = 1000,]
[s3queue_cleanup_interval_min_ms = 10000,]
[s3queue_cleanup_interval_max_ms = 30000,]
```
**Engine parameters**
@ -46,7 +49,7 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32)
CREATE TABLE s3queue_engine_table (name String, value UInt32)
ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip')
SETTINGS
mode = 'ordered';
mode = 'unordered';
```
Using named collections:
@ -109,6 +112,18 @@ Possible values:
Default value: `0`.
### s3queue_processing_threads_num {#processing_threads_num}
Number of threads to perform processing. Applies only for `Unordered` mode.
Default value: `1`.
### s3queue_enable_logging_to_s3queue_log {#enable_logging_to_s3queue_log}
Enable logging to `system.s3queue_log`.
Default value: `0`.
### s3queue_polling_min_timeout_ms {#polling_min_timeout_ms}
Minimal timeout before next polling (in milliseconds).
@ -161,18 +176,17 @@ Possible values:
Default value: `0`.
### s3queue_polling_size {#polling_size}
### s3queue_cleanup_interval_min_ms {#cleanup_interval_min_ms}
Maximum files to fetch from S3 with SELECT or in background task.
Engine takes files for processing from S3 in batches.
We limit the batch size to increase concurrency if multiple table engines with the same `keeper_path` consume files from the same path.
For 'Ordered' mode. Defines a minimum boundary for reschedule interval for a background task, which is responsible for maintaining tracked file TTL and maximum tracked files set.
Possible values:
Default value: `10000`.
- Positive integer.
### s3queue_cleanup_interval_max_ms {#cleanup_interval_max_ms}
Default value: `50`.
For 'Ordered' mode. Defines a maximum boundary for reschedule interval for a background task, which is responsible for maintaining tracked file TTL and maximum tracked files set.
Default value: `30000`.
## S3-related Settings {#s3-settings}
@ -227,6 +241,118 @@ For more information about virtual columns see [here](../../../engines/table-eng
Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.
:::note
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
:::
## Limitations {#limitations}
1. Duplicated rows can be as a result of:
- an exception happens during parsing in the middle of file processing and retries are enabled via `s3queue_loading_retries`;
- `S3Queue` is configured on multiple servers pointing to the same path in zookeeper and keeper session expires before one server managed to commit processed file, which could lead to another server taking processing of the file, which could be partially or fully processed by the first server;
- abnormal server termination.
2. `S3Queue` is configured on multiple servers pointing to the same path in zookeeper and `Ordered` mode is used, then `s3queue_loading_retries` will not work. This will be fixed soon.
## Introspection {#introspection}
For introspection use `system.s3queue` stateless table and `system.s3queue_log` persistent table.
1. `system.s3queue`. This table is not persistent and shows in-memory state of `S3Queue`: which files are currently being processed, which files are processed or failed.
``` sql
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE system.s3queue
(
`database` String,
`table` String,
`file_name` String,
`rows_processed` UInt64,
`status` String,
`processing_start_time` Nullable(DateTime),
`processing_end_time` Nullable(DateTime),
`ProfileEvents` Map(String, UInt64)
`exception` String
)
ENGINE = SystemS3Queue
COMMENT 'SYSTEM TABLE is built on the fly.' │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
Example:
``` sql
SELECT *
FROM system.s3queue
Row 1:
──────
zookeeper_path: /clickhouse/s3queue/25ea5621-ae8c-40c7-96d0-cec959c5ab88/3b3f66a1-9866-4c2e-ba78-b6bfa154207e
file_name: wikistat/original/pageviews-20150501-030000.gz
rows_processed: 5068534
status: Processed
processing_start_time: 2023-10-13 13:09:48
processing_end_time: 2023-10-13 13:10:31
ProfileEvents: {'ZooKeeperTransactions':3,'ZooKeeperGet':2,'ZooKeeperMulti':1,'SelectedRows':5068534,'SelectedBytes':198132283,'ContextLock':1,'S3QueueSetFileProcessingMicroseconds':2480,'S3QueueSetFileProcessedMicroseconds':9985,'S3QueuePullMicroseconds':273776,'LogTest':17}
exception:
```
2. `system.s3queue_log`. Persistent table. Has the same information as `system.s3queue`, but for `processed` and `failed` files.
The table has the following structure:
``` sql
SHOW CREATE TABLE system.s3queue_log
Query id: 0ad619c3-0f2a-4ee4-8b40-c73d86e04314
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE system.s3queue_log
(
`event_date` Date,
`event_time` DateTime,
`table_uuid` String,
`file_name` String,
`rows_processed` UInt64,
`status` Enum8('Processed' = 0, 'Failed' = 1),
`processing_start_time` Nullable(DateTime),
`processing_end_time` Nullable(DateTime),
`ProfileEvents` Map(String, UInt64),
`exception` String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(event_date)
ORDER BY (event_date, event_time)
SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
In order to use `system.s3queue_log` define its configuration in server config file:
``` xml
<s3queue_log>
<database>system</database>
<table>s3queue_log</table>
</s3queue_log>
```
Example:
``` sql
SELECT *
FROM system.s3queue_log
Row 1:
──────
event_date: 2023-10-13
event_time: 2023-10-13 13:10:12
table_uuid:
file_name: wikistat/original/pageviews-20150501-020000.gz
rows_processed: 5112621
status: Processed
processing_start_time: 2023-10-13 13:09:48
processing_end_time: 2023-10-13 13:10:12
ProfileEvents: {'ZooKeeperTransactions':3,'ZooKeeperGet':2,'ZooKeeperMulti':1,'SelectedRows':5112621,'SelectedBytes':198577687,'ContextLock':1,'S3QueueSetFileProcessingMicroseconds':1934,'S3QueueSetFileProcessedMicroseconds':17063,'S3QueuePullMicroseconds':5841972,'LogTest':17}
exception:
```

View File

@ -58,6 +58,12 @@ where `N` specifies the tokenizer:
- `inverted(0)` (or shorter: `inverted()`) set the tokenizer to "tokens", i.e. split strings along spaces,
- `inverted(N)` with `N` between 2 and 8 sets the tokenizer to "ngrams(N)"
The maximum rows per postings list can be specified as the second parameter. This parameter can be used to control postings list sizes to avoid generating huge postings list files. The following variants exist:
- `inverted(ngrams, max_rows_per_postings_list)`: Use given max_rows_per_postings_list (assuming it is not 0)
- `inverted(ngrams, 0)`: No limitation of maximum rows per postings list
- `inverted(ngrams)`: Use a default maximum rows which is 64K.
Being a type of skipping index, inverted indexes can be dropped or added to a column after table creation:
``` sql

View File

@ -380,7 +380,7 @@ build.
### macOS-only: Install with Homebrew
To install ClickHouse using the popular `brew` package manager, follow the instructions listed in the [ClickHouse Homebrew tap](https://github.com/ClickHouse/homebrew-clickhouse).
To install ClickHouse using [homebrew](https://brew.sh/), see [here](https://formulae.brew.sh/cask/clickhouse).
## Launch {#launch}

View File

@ -87,6 +87,7 @@ The supported formats are:
| [RawBLOB](#rawblob) | ✔ | ✔ |
| [MsgPack](#msgpack) | ✔ | ✔ |
| [MySQLDump](#mysqldump) | ✔ | ✗ |
| [DWARF](#dwarf) | ✔ | ✗ |
| [Markdown](#markdown) | ✗ | ✔ |
@ -2719,6 +2720,53 @@ FROM file(dump.sql, MySQLDump)
└───┘
```
## DWARF {#dwarf}
Parses DWARF debug symbols from an ELF file (executable, library, or object file). Similar to `dwarfdump`, but much faster (hundreds of MB/s) and with SQL. Produces one row for each Debug Information Entry (DIE) in the `.debug_info` section. Includes "null" entries that the DWARF encoding uses to terminate lists of children in the tree.
Quick background: `.debug_info` consists of *units*, corresponding to compilation units. Each unit is a tree of *DIE*s, with a `compile_unit` DIE as its root. Each DIE has a *tag* and a list of *attributes*. Each attribute has a *name* and a *value* (and also a *form*, which specifies how the value is encoded). The DIEs represent things from the source code, and their *tag* tells what kind of thing it is. E.g. there are functions (tag = `subprogram`), classes/structs/enums (`class_type`/`structure_type`/`enumeration_type`), variables (`variable`), function arguments (`formal_parameter`). The tree structure mirrors the corresponding source code. E.g. a `class_type` DIE can contain `subprogram` DIEs representing methods of the class.
Outputs the following columns:
- `offset` - position of the DIE in the `.debug_info` section
- `size` - number of bytes in the encoded DIE (including attributes)
- `tag` - type of the DIE; the conventional "DW_TAG_" prefix is omitted
- `unit_name` - name of the compilation unit containing this DIE
- `unit_offset` - position of the compilation unit containing this DIE in the `.debug_info` section
- `ancestor_tags` - array of tags of the ancestors of the current DIE in the tree, in order from innermost to outermost
- `ancestor_offsets` - offsets of ancestors, parallel to `ancestor_tags`
- a few common attributes duplicated from the attributes array for convenience:
- `name`
- `linkage_name` - mangled fully-qualified name; typically only functions have it (but not all functions)
- `decl_file` - name of the source code file where this entity was declared
- `decl_line` - line number in the source code where this entity was declared
- parallel arrays describing attributes:
- `attr_name` - name of the attribute; the conventional "DW_AT_" prefix is omitted
- `attr_form` - how the attribute is encoded and interpreted; the conventional DW_FORM_ prefix is omitted
- `attr_int` - integer value of the attribute; 0 if the attribute doesn't have a numeric value
- `attr_str` - string value of the attribute; empty if the attribute doesn't have a string value
Example: find compilation units that have the most function definitions (including template instantiations and functions from included header files):
```sql
SELECT
unit_name,
count() AS c
FROM file('programs/clickhouse', DWARF)
WHERE tag = 'subprogram' AND NOT has(attr_name, 'declaration')
GROUP BY unit_name
ORDER BY c DESC
LIMIT 3
```
```text
┌─unit_name──────────────────────────────────────────────────┬─────c─┐
│ ./src/Core/Settings.cpp │ 28939 │
│ ./src/AggregateFunctions/AggregateFunctionSumMap.cpp │ 23327 │
│ ./src/AggregateFunctions/AggregateFunctionUniqCombined.cpp │ 22649 │
└────────────────────────────────────────────────────────────┴───────┘
3 rows in set. Elapsed: 1.487 sec. Processed 139.76 million rows, 1.12 GB (93.97 million rows/s., 752.77 MB/s.)
Peak memory usage: 271.92 MiB.
```
## Markdown {#markdown}
You can export results using [Markdown](https://en.wikipedia.org/wiki/Markdown) format to generate output ready to be pasted into your `.md` files:

View File

@ -0,0 +1,91 @@
---
slug: /en/operations/external-authenticators/http
title: "HTTP"
---
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
<SelfManaged />
HTTP server can be used to authenticate ClickHouse users. HTTP authentication can only be used as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths. Currently, [Basic](https://datatracker.ietf.org/doc/html/rfc7617) authentication scheme using GET method is supported.
## HTTP authentication server definition {#http-auth-server-definition}
To define HTTP authentication server you must add `http_authentication_servers` section to the `config.xml`.
**Example**
```xml
<clickhouse>
<!- ... -->
<http_authentication_servers>
<basic_auth_server>
<uri>http://localhost:8000/auth</uri>
<connection_timeout_ms>1000</connection_timeout_ms>
<receive_timeout_ms>1000</receive_timeout_ms>
<send_timeout_ms>1000</send_timeout_ms>
<max_tries>3</max_tries>
<retry_initial_backoff_ms>50</retry_initial_backoff_ms>
<retry_max_backoff_ms>1000</retry_max_backoff_ms>
</basic_auth_server>
</http_authentication_servers>
</clickhouse>
```
Note, that you can define multiple HTTP servers inside the `http_authentication_servers` section using distinct names.
**Parameters**
- `uri` - URI for making authentication request
Timeouts in milliseconds on the socket used for communicating with the server:
- `connection_timeout_ms` - Default: 1000 ms.
- `receive_timeout_ms` - Default: 1000 ms.
- `send_timeout_ms` - Default: 1000 ms.
Retry parameters:
- `max_tries` - The maximum number of attempts to make an authentication request. Default: 3
- `retry_initial_backoff_ms` - The backoff initial interval on retry. Default: 50 ms
- `retry_max_backoff_ms` - The maximum backoff interval. Default: 1000 ms
### Enabling HTTP authentication in `users.xml` {#enabling-http-auth-in-users-xml}
In order to enable HTTP authentication for the user, specify `http_authentication` section instead of `password` or similar sections in the user definition.
Parameters:
- `server` - Name of the HTTP authentication server configured in the main `config.xml` file as described previously.
- `scheme` - HTTP authentication scheme. `Basic` is only supported now. Default: Basic
Example (goes into `users.xml`):
```xml
<clickhouse>
<!- ... -->
<my_user>
<!- ... -->
<http_authentication>
<server>basic_server</server>
<scheme>basic</scheme>
</http_authentication>
</test_user_2>
</clickhouse>
```
:::note
Note that HTTP authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `http_authentication` will force ClickHouse to shutdown.
:::
### Enabling HTTP authentication using SQL {#enabling-http-auth-using-sql}
When [SQL-driven Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled in ClickHouse, users identified by HTTP authentication can also be created using SQL statements.
```sql
CREATE USER my_user IDENTIFIED WITH HTTP SERVER 'basic_server' SCHEME 'Basic'
```
...or, `Basic` is default without explicit scheme definition
```sql
CREATE USER my_user IDENTIFIED WITH HTTP SERVER 'basic_server'
```
### Passing session settings {#passing-session-settings}
If a response body from HTTP authentication server has JSON format and contains `settings` sub-object, ClickHouse will try parse its key: value pairs as string values and set them as session settings for authenticated user's current session. If parsing is failed, a response body from server will be ignored.

View File

@ -16,3 +16,4 @@ The following external authenticators and directories are supported:
- [LDAP](./ldap.md#external-authenticators-ldap) [Authenticator](./ldap.md#ldap-external-authenticator) and [Directory](./ldap.md#ldap-external-user-directory)
- Kerberos [Authenticator](./kerberos.md#external-authenticators-kerberos)
- [SSL X.509 authentication](./ssl-x509.md#ssl-external-authentication)
- HTTP [Authenticator](./http.md)

View File

@ -569,7 +569,6 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc
<max_size_rows>10M</max_size_rows>
<max_file_segment_size>1M</max_file_segment_size>
<cache_on_write_operations>1</cache_on_write_operations>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
</tiny_local_cache>
<!-- highlight-end -->
</disks>
@ -2670,3 +2669,114 @@ Possible values:
- 1 — Enabled.
Default value: 0.
## proxy {#proxy}
Define proxy servers for HTTP and HTTPS requests, currently supported by S3 storage, S3 table functions, and URL functions.
There are three ways to define proxy servers: environment variables, proxy lists, and remote proxy resolvers.
### Environment variables
The `http_proxy` and `https_proxy` environment variables allow you to specify a
proxy server for a given protocol. If you have it set on your system, it should work seamlessly.
This is the simplest approach if a given protocol has
only one proxy server and that proxy server doesn't change.
### Proxy lists
This approach allows you to specify one or more
proxy servers for a protocol. If more than one proxy server is defined,
ClickHouse uses the different proxies on a round-robin basis, balancing the
load across the servers. This is the simplest approach if there is more than
one proxy server for a protocol and the list of proxy servers doesn't change.
### Configuration template
``` xml
<proxy>
<http>
<uri>http://proxy1</uri>
<uri>http://proxy2:3128</uri>
</http>
<https>
<uri>http://proxy1:3128</uri>
</https>
</proxy>
```
`<proxy>` fields
* `<http>` - A list of one or more HTTP proxies
* `<https>` - A list of one or more HTTPS proxies
`<http>` and `<https>` fields
* `<uri>` - The URI of the proxy
### Remote proxy resolvers
It's possible that the proxy servers change dynamically. In that
case, you can define the endpoint of a resolver. ClickHouse sends
an empty GET request to that endpoint, the remote resolver should return the proxy host.
ClickHouse will use it to form the proxy URI using the following template: `{proxy_scheme}://{proxy_host}:{proxy_port}`
### Configuration template
``` xml
<proxy>
<http>
<resolver>
<endpoint>http://resolver:8080/hostname</endpoint>
<proxy_scheme>http</proxy_scheme>
<proxy_port>80</proxy_port>
<proxy_cache_time>10</proxy_cache_time>
</resolver>
</http>
<https>
<resolver>
<endpoint>http://resolver:8080/hostname</endpoint>
<proxy_scheme>http</proxy_scheme>
<proxy_port>3128</proxy_port>
<proxy_cache_time>10</proxy_cache_time>
</resolver>
</https>
</proxy>
```
`<proxy>` fields
* `<http>` - A list of one or more resolvers*
* `<https>` - A list of one or more resolvers*
`<http>` and `<https>` fields
* `<resolver>` - The endpoint and other details for a resolver.
You can have multiple `<resolver>` elements, but only the first
`<resolver>` for a given protocol is used. Any other `<resolver>`
elements for that protocol are ignored. That means load balancing
(if needed) should be implemented by the remote resolver.
`<resolver>` fields
* `<endpoint>` - The URI of the proxy resolver
* `<proxy_scheme>` - The protocol of the final proxy URI. This can be either `http` or `https`.
* `<proxy_port>` - The port number of the proxy resolver
* `<proxy_cache_time>` - The time in seconds that values from the resolver
should be cached by ClickHouse. Setting this value to `0` causes ClickHouse
to contact the resolver for every HTTP or HTTPS request.
### Precedence
Proxy settings are determined in the following order:
1. Remote proxy resolvers
2. Proxy lists
3. Environment variables
ClickHouse will check the highest priority resolver type for the request protocol. If it is not defined,
it will check the next highest priority resolver type, until it reaches the environment resolver.
This also allows a mix of resolver types can be used.

View File

@ -3310,6 +3310,28 @@ Possible values:
Default value: `0`.
## mysql_map_string_to_text_in_show_columns {#mysql_map_string_to_text_in_show_columns}
When enabled, [String](../../sql-reference/data-types/string.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns).
Has effect only when [use_mysql_types_in_show_columns](#use_mysql_types_in_show_columns) is enabled.
- 0 - Use `BLOB`.
- 1 - Use `TEXT`.
Default value: `0`.
## mysql_map_fixed_string_to_text_in_show_columns {#mysql_map_fixed_string_to_text_in_show_columns}
When enabled, [FixedString](../../sql-reference/data-types/fixedstring.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns).
Has effect only when [use_mysql_types_in_show_columns](#use_mysql_types_in_show_columns) is enabled.
- 0 - Use `BLOB`.
- 1 - Use `TEXT`.
Default value: `0`.
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
Enables special logic to perform merges on replicas.
@ -4746,3 +4768,18 @@ a Tuple(
l Nullable(String)
)
```
## dictionary_use_async_executor {#dictionary_use_async_executor}
Execute a pipeline for reading dictionary source in several threads. It's supported only by dictionaries with local CLICKHOUSE source.
You may specify it in `SETTINGS` section of dictionary definition:
```sql
CREATE DICTIONARY t1_dict ( key String, attr UInt64 )
PRIMARY KEY key
SOURCE(CLICKHOUSE(QUERY `SELECT key, attr FROM t1 GROUP BY key`))
LIFETIME(MIN 0 MAX 3600)
LAYOUT(COMPLEX_KEY_HASHED_ARRAY())
SETTINGS(dictionary_use_async_executor=1, max_threads=8);
```

View File

@ -114,7 +114,7 @@ Example of disk configuration:
## Using local cache {#using-local-cache}
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc.
For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS.
Cache uses `LRU` cache policy.
@ -192,12 +192,12 @@ These settings should be defined in the disk configuration section.
- `enable_bypass_cache_with_threshold` - allows to skip cache completely in case the requested read range exceeds the threshold. Default: `false`. This threshold can be defined by `bypass_cache_threashold`. Default: `268435456` (`256Mi`).
- `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading.
- `max_file_segment_size` - a maximum size of a single cache file in bytes or in readable format (`ki, Mi, Gi, etc`, example `10Gi`). Default: `8388608` (`8Mi`).
- `max_elements` - a limit for a number of cache files. Default: `10000000`.
- `load_metadata_threads` - number of threads being used to load cache metadata on starting time. Default: `1`.
File Cache **query/profile settings**:
Some of these settings will disable cache features per query/profile that are enabled by default or in disk configuration settings. For example, you can enable cache in disk configuration and disable it per query/profile setting `enable_filesystem_cache` to `false`. Also setting `cache_on_write_operations` to `true` in disk configuration means that "write-though" cache is enabled. But if you need to disable this general setting per specific queries then setting `enable_filesystem_cache_on_write_operations` to `false` means that write operations cache will be disabled for a specific query/profile.
@ -248,9 +248,9 @@ DESCRIBE FILESYSTEM CACHE 's3_cache'
```
``` text
┌────max_size─┬─max_elements─┬─max_file_segment_size─┬─cache_on_write_operations─┬─enable_cache_hits_threshold─┬─current_size─┬─current_elements─┬─path────────┬─do_not_evict_index_and_mark_files─┐
│ 10000000000 │ 1048576 │ 104857600 │ 1 │ 0 │ 3276 │ 54 │ /s3_cache/ │ 1
└─────────────┴──────────────┴───────────────────────┴────────────────────────────────────────────────────────┴──────────────┴──────────────────┴─────────────┴───────────────────────────────────┘
┌────max_size─┬─max_elements─┬─max_file_segment_size─┬─boundary_alignment─┬─cache_on_write_operations─┬─cache_hits_threshold─┬─current_size─┬─current_elements─┬─path───────┬─background_download_threads─┬─enable_bypass_cache_with_threshold─┐
│ 10000000000 │ 1048576 │ 104857600 │ 4194304 │ 1 │ 0 │ 3276 │ 54 │ /s3_cache/ │ 2 │ 0
└─────────────┴──────────────┴───────────────────────┴────────────────────┴───────────────────────────┴──────────────────────┴──────────────┴──────────────────┴────────────┴─────────────────────────────┴───────────────────────────────────┘
```
Cache current metrics:

View File

@ -16,6 +16,7 @@ Columns:
- `is_expired` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the current connection expired.
- `keeper_api_version` ([String](../../sql-reference/data-types/string.md)) — Keeper API version.
- `client_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Session id of the connection.
- `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — Xid of the current session.
Example:

View File

@ -103,4 +103,5 @@ ClickHouse-specific aggregate functions:
- [quantileInterpolatedWeighted](./quantileinterpolatedweighted.md)
- [sparkBar](./sparkbar.md)
- [sumCount](./sumcount.md)
- [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md)

View File

@ -0,0 +1,67 @@
---
slug: /en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets
sidebar_position: 312
sidebar_label: largestTriangleThreeBuckets
---
# largestTriangleThreeBuckets
Applies the [Largest-Triangle-Three-Buckets](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm to the input data.
The algorithm is used for downsampling time series data for visualization. It is designed to operate on series sorted by x coordinate.
It works by dividing the sorted series into buckets and then finding the largest triangle in each bucket. The number of buckets is equal to the number of points in the resulting series.
the function will sort data by `x` and then apply the downsampling algorithm to the sorted data.
**Syntax**
``` sql
largestTriangleThreeBuckets(n)(x, y)
```
Alias: `lttb`.
**Arguments**
- `x` — x coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md).
- `y` — y coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md).
**Parameters**
- `n` — number of points in the resulting series. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
[Array](../../../sql-reference/data-types/array.md) of [Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
**Example**
Input table:
``` text
┌─────x───────┬───────y──────┐
│ 1.000000000 │ 10.000000000 │
│ 2.000000000 │ 20.000000000 │
│ 3.000000000 │ 15.000000000 │
│ 8.000000000 │ 60.000000000 │
│ 9.000000000 │ 55.000000000 │
│ 10.00000000 │ 70.000000000 │
│ 4.000000000 │ 30.000000000 │
│ 5.000000000 │ 40.000000000 │
│ 6.000000000 │ 35.000000000 │
│ 7.000000000 │ 50.000000000 │
└─────────────┴──────────────┘
```
Query:
``` sql
SELECT largestTriangleThreeBuckets(4)(x, y) FROM largestTriangleThreeBuckets_test;
```
Result:
``` text
┌────────largestTriangleThreeBuckets(3)(x, y)───────────┐
│ [(1,10),(3,15),(5,40),(10,70)] │
└───────────────────────────────────────────────────────┘
```

View File

@ -1144,11 +1144,32 @@ SELECT arrayFold( x,acc -> acc + x*2, [1, 2, 3, 4], toInt64(3)) AS res;
Result:
``` text
┌─arrayFold(lambda(tuple(x, acc), plus(acc, multiply(x, 2))), [1, 2, 3, 4], toInt64(3))─┐
3 │
└───────────────────────────────────────────────────────────────────────────────────────
┌─res─┐
23 │
└─────┘
```
**Example with the Fibonacci sequence**
```sql
SELECT arrayFold( x, acc -> (acc.2, acc.2 + acc.1), range(number), (1::Int64, 0::Int64)).1 AS fibonacci
FROM numbers(1,10);
┌─fibonacci─┐
│ 0 │
│ 1 │
│ 1 │
│ 2 │
│ 3 │
│ 5 │
│ 8 │
│ 13 │
│ 21 │
│ 34 │
└───────────┘
```
## arrayReverse(arr)
Returns an array of the same size as the original array containing the elements in reverse order.

View File

@ -1910,6 +1910,7 @@ Result:
```
**See Also**
- [subDate](#subDate)
## timestamp\_add
@ -2053,6 +2054,7 @@ Result:
Alias: `ADDDATE`
**See Also**
- [date_add](#date_add)
## subDate
@ -2095,6 +2097,7 @@ Result:
Alias: `SUBDATE`
**See Also**
- [date_sub](#date_sub)
## now {#now}
@ -2388,42 +2391,50 @@ Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../../sql-ref
Accepts an additional, optional `precision` parameter after the `timezone` parameter.
## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters
## addYears, addQuarters, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addMilliseconds, addMicroseconds, addNanoseconds
Function adds a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example:
These functions add units of the interval specified by the function name to a date, a date with time or a string-encoded date / date with time. A date or date with time is returned.
Example:
``` sql
WITH
toDate('2018-01-01') AS date,
toDateTime('2018-01-01 00:00:00') AS date_time
toDate('2024-01-01') AS date,
toDateTime('2024-01-01 00:00:00') AS date_time,
'2024-01-01 00:00:00' AS date_time_string
SELECT
addYears(date, 1) AS add_years_with_date,
addYears(date_time, 1) AS add_years_with_date_time
addYears(date_time, 1) AS add_years_with_date_time,
addYears(date_time_string, 1) AS add_years_with_date_time_string
```
``` text
┌─add_years_with_date─┬─add_years_with_date_time─┐
│ 2019-01-01 │ 2019-01-01 00:00:00 │
└─────────────────────┴──────────────────────────┘
┌─add_years_with_date─┬─add_years_with_date_time─┬─add_years_with_date_time_string─
│ 2025-01-01 │ 2025-01-01 00:00:00 │ 2025-01-01 00:00:00.000 │
└─────────────────────┴──────────────────────────┴─────────────────────────────────
```
## subtractYears, subtractMonths, subtractWeeks, subtractDays, subtractHours, subtractMinutes, subtractSeconds, subtractQuarters
## subtractYears, subtractQuarters, subtractMonths, subtractWeeks, subtractDays, subtractHours, subtractMinutes, subtractSeconds, subtractMilliseconds, subtractMicroseconds, subtractNanoseconds
Function subtract a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example:
These functions subtract units of the interval specified by the function name from a date, a date with time or a string-encoded date / date with time. A date or date with time is returned.
Example:
``` sql
WITH
toDate('2019-01-01') AS date,
toDateTime('2019-01-01 00:00:00') AS date_time
toDate('2024-01-01') AS date,
toDateTime('2024-01-01 00:00:00') AS date_time,
'2024-01-01 00:00:00' AS date_time_string
SELECT
subtractYears(date, 1) AS subtract_years_with_date,
subtractYears(date_time, 1) AS subtract_years_with_date_time
subtractYears(date_time, 1) AS subtract_years_with_date_time,
subtractYears(date_time_string, 1) AS subtract_years_with_date_time_string
```
``` text
┌─subtract_years_with_date─┬─subtract_years_with_date_time─┐
│ 2018-01-01 │ 2018-01-01 00:00:00 │
└──────────────────────────┴───────────────────────────────┘
┌─subtract_years_with_date─┬─subtract_years_with_date_time─┬─subtract_years_with_date_time_string─
│ 2023-01-01 │ 2023-01-01 00:00:00 │ 2023-01-01 00:00:00.000 │
└──────────────────────────┴───────────────────────────────┴──────────────────────────────────────
```
## timeSlots(StartTime, Duration,\[, Size\])

View File

@ -12,7 +12,7 @@ Syntax:
``` sql
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}]
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[VALID UNTIL datetime]
[IN access_storage_type]
@ -40,6 +40,7 @@ There are multiple ways of user identification:
- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'`
- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'`
- `IDENTIFIED WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa', KEY 'another_public_key' TYPE 'ssh-ed25519'`
- `IDENTIFIED WITH http SERVER 'http_server'` or `IDENTIFIED WITH http SERVER 'http_server' SCHEME 'basic'`
- `IDENTIFIED BY 'qwerty'`
Password complexity requirements can be edited in [config.xml](/docs/en/operations/configuration-files). Below is an example configuration that requires passwords to be at least 12 characters long and contain 1 number. Each password complexity rule requires a regex to match against passwords and a description of the rule.

View File

@ -62,7 +62,7 @@ Materialized views store data transformed by the corresponding [SELECT](../../..
When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` the table engine for storing data.
When creating a materialized view with `TO [db].[table]`, you must not use `POPULATE`.
When creating a materialized view with `TO [db].[table]`, you can't also use `POPULATE`.
A materialized view is implemented as follows: when inserting data to the table specified in `SELECT`, part of the inserted data is converted by this `SELECT` query, and the result is inserted in the view.

View File

@ -16,7 +16,7 @@ The `RENAME` query is supported by the [Atomic](../../engines/database-engines/a
**Syntax**
```sql
RENAME DATABASE|TABLE|DICTIONARY name TO new_name [,...] [ON CLUSTER cluster]
RENAME [DATABASE|TABLE|DICTIONARY] name TO new_name [,...] [ON CLUSTER cluster]
```
## RENAME DATABASE
@ -48,6 +48,11 @@ RENAME TABLE [db1.]name1 TO [db2.]name2 [,...] [ON CLUSTER cluster]
RENAME TABLE table_A TO table_A_bak, table_B TO table_B_bak;
```
And you can use a simpler sql:
```sql
RENAME table_A TO table_A_bak, table_B TO table_B_bak;
```
## RENAME DICTIONARY
Renames one or several dictionaries. This query can be used to move dictionaries between databases.

View File

@ -112,7 +112,8 @@ EOF
tar -czf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR"
fi
sha512sum "$TARBALL" > "$TARBALL".sha512
# Cut the $OUTPUT_DIR/ from the sha512sum output to make it universal
sha512sum "$TARBALL" | sed "s|$OUTPUT_DIR/||" > "$TARBALL".sha512
rm -r "$PKG_PATH"
}

View File

@ -320,6 +320,7 @@ try
registerAggregateFunctions();
processConfig();
adjustSettings();
initTtyBuffer(toProgressOption(config().getString("progress", "default")));
{
@ -1238,6 +1239,8 @@ void Client::processConfig()
if (config().has("multiquery"))
is_multiquery = true;
pager = config().getString("pager", "");
is_default_format = !config().has("vertical") && !config().has("format");
if (config().has("vertical"))
format = config().getString("format", "Vertical");
@ -1264,15 +1267,6 @@ void Client::processConfig()
global_context->setQueryKindInitial();
global_context->setQuotaClientKey(config().getString("quota_key", ""));
global_context->setQueryKind(query_kind);
if (is_multiquery && !global_context->getSettingsRef().input_format_values_allow_data_after_semicolon.changed)
{
Settings settings = global_context->getSettings();
settings.input_format_values_allow_data_after_semicolon = true;
/// Do not send it to the server
settings.input_format_values_allow_data_after_semicolon.changed = false;
global_context->setSettings(settings);
}
}

View File

@ -391,7 +391,7 @@ zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNee
auto code = zookeeper->tryMulti(ops, responses);
if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
return std::make_shared<zkutil::EphemeralNodeHolder>(current_worker_path, *zookeeper, false, false, description);
return zkutil::EphemeralNodeHolder::existing(current_worker_path, *zookeeper);
if (code == Coordination::Error::ZBADVERSION)
{

View File

@ -495,6 +495,7 @@ try
registerFormats();
processConfig();
adjustSettings();
initTtyBuffer(toProgressOption(config().getString("progress", "default")));
applyCmdSettings(global_context);
@ -578,6 +579,8 @@ void LocalServer::processConfig()
if (config().has("multiquery"))
is_multiquery = true;
pager = config().getString("pager", "");
delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file"));
if (!is_interactive || delayed_interactive)
{
@ -784,15 +787,6 @@ void LocalServer::processConfig()
global_context->setQueryKindInitial();
global_context->setQueryKind(query_kind);
if (is_multiquery && !global_context->getSettingsRef().input_format_values_allow_data_after_semicolon.changed)
{
Settings settings = global_context->getSettings();
settings.input_format_values_allow_data_after_semicolon = true;
/// Do not send it to the server
settings.input_format_values_allow_data_after_semicolon.changed = false;
global_context->setSettings(settings);
}
}

View File

@ -145,6 +145,10 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
if (tables.next())
{
catalog_name = tables.table_catalog();
/// `tables.next()` call is mandatory to drain the iterator before next operation and avoid "Invalid cursor state"
if (tables.next())
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Driver returned more than one table for '{}': '{}' and '{}'",
table_name, catalog_name, tables.table_schema());
LOG_TRACE(log, "Will fetch info for table '{}.{}'", catalog_name, table_name);
return catalog.find_columns(/* column = */ "", table_name, /* schema = */ "", catalog_name);
}
@ -153,6 +157,10 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
if (tables.next())
{
catalog_name = tables.table_catalog();
/// `tables.next()` call is mandatory to drain the iterator before next operation and avoid "Invalid cursor state"
if (tables.next())
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Driver returned more than one table for '{}': '{}' and '{}'",
table_name, catalog_name, tables.table_schema());
LOG_TRACE(log, "Will fetch info for table '{}.{}.{}'", catalog_name, schema_name, table_name);
return catalog.find_columns(/* column = */ "", table_name, schema_name, catalog_name);
}

View File

@ -91,16 +91,17 @@ T execute(nanodbc::ConnectionHolderPtr connection_holder, std::function<T(nanodb
}
catch (const nanodbc::database_error & e)
{
LOG_ERROR(
&Poco::Logger::get("ODBCConnection"),
"ODBC query failed with error: {}, state: {}, native code: {}",
e.what(), e.state(), e.native());
/// SQLState, connection related errors start with 08 (main: 08S01), cursor invalid state is 24000.
/// Invalid cursor state is a retriable error.
/// Invalid transaction state 25000. Truncate to 2 letters on purpose.
/// https://docs.microsoft.com/ru-ru/sql/odbc/reference/appendixes/appendix-a-odbc-error-codes?view=sql-server-ver15
if (e.state().starts_with("08") || e.state().starts_with("24") || e.state().starts_with("25"))
bool is_retriable = e.state().starts_with("08") || e.state().starts_with("24") || e.state().starts_with("25");
LOG_ERROR(
&Poco::Logger::get("ODBCConnection"),
"ODBC query failed with error: {}, state: {}, native code: {}{}",
e.what(), e.state(), e.native(), is_retriable ? ", will retry" : "");
if (is_retriable)
{
connection_holder->updateConnection();
return query_func(connection_holder->get());

View File

@ -821,77 +821,85 @@ function insertChart(i) {
let move_text = document.createTextNode('✥');
move.appendChild(move_text);
let is_dragging = false;
move.addEventListener('mousedown', e => {
const idx = getCurrentIndex();
is_dragging = true;
let drag_state = {
is_dragging: false,
idx: null,
offset_x: null,
offset_y: null,
displace_idx: null,
displace_chart: null
};
function dragStop(e) {
drag_state.is_dragging = false;
chart.className = 'chart';
chart.style.left = null;
chart.style.top = null;
if (drag_state.displace_idx !== null) {
const elem = queries[drag_state.idx];
queries.splice(drag_state.idx, 1);
queries.splice(drag_state.displace_idx, 0, elem);
drag_state.displace_chart.className = 'chart';
drawAll();
}
}
function dragMove(e) {
if (!drag_state.is_dragging) return;
let x = e.clientX - drag_state.offset_x;
let y = e.clientY - drag_state.offset_y;
chart.style.left = `${x}px`;
chart.style.top = `${y}px`;
drag_state.displace_idx = null;
drag_state.displace_chart = null;
let current_idx = -1;
for (const elem of charts.querySelectorAll('.chart')) {
++current_idx;
if (current_idx == drag_state.idx) {
continue;
}
const this_rect = chart.getBoundingClientRect();
const this_center_x = this_rect.left + this_rect.width / 2;
const this_center_y = this_rect.top + this_rect.height / 2;
const elem_rect = elem.getBoundingClientRect();
if (this_center_x >= elem_rect.left && this_center_x <= elem_rect.right
&& this_center_y >= elem_rect.top && this_center_y <= elem_rect.bottom) {
elem.className = 'chart chart-displaced';
drag_state.displace_idx = current_idx;
drag_state.displace_chart = elem;
} else {
elem.className = 'chart';
}
}
}
function dragStart(e) {
if (e.button !== 0) return; /// left button only
move.setPointerCapture(e.pointerId);
drag_state.is_dragging = true;
drag_state.idx = getCurrentIndex();
chart.className = 'chart chart-moving';
let offset_x = e.clientX;
let offset_y = e.clientY;
drag_state.offset_x = e.clientX;
drag_state.offset_y = e.clientY;
}
let displace_idx = null;
let displace_chart = null;
function mouseup(e) {
is_dragging = false;
chart.className = 'chart';
chart.style.left = null;
chart.style.top = null;
if (displace_idx !== null) {
const elem = queries[idx];
queries.splice(idx, 1);
queries.splice(displace_idx, 0, elem);
displace_chart.className = 'chart';
drawAll();
}
}
function mousemove(e) {
if (!is_dragging) {
document.body.removeEventListener('mousemove', mousemove);
document.body.removeEventListener('mouseup', mouseup);
return;
}
let x = e.clientX - offset_x;
let y = e.clientY - offset_y;
chart.style.left = `${x}px`;
chart.style.top = `${y}px`;
displace_idx = null;
displace_chart = null;
let current_idx = -1;
for (const elem of charts.querySelectorAll('.chart')) {
++current_idx;
if (current_idx == idx) {
continue;
}
const this_rect = chart.getBoundingClientRect();
const this_center_x = this_rect.left + this_rect.width / 2;
const this_center_y = this_rect.top + this_rect.height / 2;
const elem_rect = elem.getBoundingClientRect();
if (this_center_x >= elem_rect.left && this_center_x <= elem_rect.right
&& this_center_y >= elem_rect.top && this_center_y <= elem_rect.bottom) {
elem.className = 'chart chart-displaced';
displace_idx = current_idx;
displace_chart = elem;
} else {
elem.className = 'chart';
}
}
}
document.body.addEventListener('mouseup', mouseup);
document.body.addEventListener('mousemove', mousemove);
});
/// Read https://www.redblobgames.com/making-of/draggable/
move.addEventListener('pointerdown', dragStart);
move.addEventListener('pointermove', dragMove);
move.addEventListener('pointerup', dragStop);
move.addEventListener('pointerancel', dragStop);
move.addEventListener('touchstart', (e) => e.preventDefault());
let maximize = document.createElement('a');
let maximize_text = document.createTextNode('🗖');

View File

@ -567,7 +567,7 @@ AccessChangesNotifier & AccessControl::getChangesNotifier()
}
UUID AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const
AuthResult AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const
{
try
{

View File

@ -118,7 +118,7 @@ public:
scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const;
scope_guard subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const;
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
AuthResult authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
/// Makes a backup of access entities.
void restoreFromBackup(RestorerFromBackup & restorer) override;

View File

@ -85,7 +85,11 @@ namespace
}
bool Authentication::areCredentialsValid(const Credentials & credentials, const AuthenticationData & auth_data, const ExternalAuthenticators & external_authenticators)
bool Authentication::areCredentialsValid(
const Credentials & credentials,
const AuthenticationData & auth_data,
const ExternalAuthenticators & external_authenticators,
SettingsChanges & settings)
{
if (!credentials.isReady())
return false;
@ -100,6 +104,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
case AuthenticationType::DOUBLE_SHA1_PASSWORD:
case AuthenticationType::BCRYPT_PASSWORD:
case AuthenticationType::LDAP:
case AuthenticationType::HTTP:
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
case AuthenticationType::KERBEROS:
@ -133,6 +138,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
case AuthenticationType::BCRYPT_PASSWORD:
case AuthenticationType::LDAP:
case AuthenticationType::KERBEROS:
case AuthenticationType::HTTP:
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
case AuthenticationType::SSL_CERTIFICATE:
@ -177,6 +183,14 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
case AuthenticationType::BCRYPT_PASSWORD:
return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary());
case AuthenticationType::HTTP:
switch (auth_data.getHTTPAuthenticationScheme())
{
case HTTPAuthenticationScheme::BASIC:
return external_authenticators.checkHTTPBasicCredentials(
auth_data.getHTTPAuthenticationServerName(), *basic_credentials, settings);
}
case AuthenticationType::MAX:
break;
}
@ -192,6 +206,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
case AuthenticationType::DOUBLE_SHA1_PASSWORD:
case AuthenticationType::BCRYPT_PASSWORD:
case AuthenticationType::LDAP:
case AuthenticationType::HTTP:
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
case AuthenticationType::KERBEROS:
@ -218,6 +233,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
case AuthenticationType::DOUBLE_SHA1_PASSWORD:
case AuthenticationType::BCRYPT_PASSWORD:
case AuthenticationType::LDAP:
case AuthenticationType::HTTP:
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
case AuthenticationType::KERBEROS:

View File

@ -14,12 +14,19 @@ namespace ErrorCodes
class Credentials;
class ExternalAuthenticators;
class SettingsChanges;
/// TODO: Try to move this checking to Credentials.
struct Authentication
{
/// Checks the credentials (passwords, readiness, etc.)
static bool areCredentialsValid(const Credentials & credentials, const AuthenticationData & auth_data, const ExternalAuthenticators & external_authenticators);
/// If necessary, makes a request to external authenticators and fills in the session settings if they were
/// returned by the authentication server
static bool areCredentialsValid(
const Credentials & credentials,
const AuthenticationData & auth_data,
const ExternalAuthenticators & external_authenticators,
SettingsChanges & settings);
// A signaling class used to communicate requirements for credentials.
template <typename CredentialsType>

View File

@ -105,7 +105,8 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash)
&& (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm)
&& (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names)
&& (lhs.ssh_keys == rhs.ssh_keys);
&& (lhs.ssh_keys == rhs.ssh_keys) && (lhs.http_auth_scheme == rhs.http_auth_scheme)
&& (lhs.http_auth_server_name == rhs.http_auth_server_name);
}
@ -128,6 +129,7 @@ void AuthenticationData::setPassword(const String & password_)
case AuthenticationType::KERBEROS:
case AuthenticationType::SSL_CERTIFICATE:
case AuthenticationType::SSH_KEY:
case AuthenticationType::HTTP:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify password for authentication type {}", toString(type));
case AuthenticationType::MAX:
@ -232,6 +234,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
case AuthenticationType::KERBEROS:
case AuthenticationType::SSL_CERTIFICATE:
case AuthenticationType::SSH_KEY:
case AuthenticationType::HTTP:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify password binary hash for authentication type {}", toString(type));
case AuthenticationType::MAX:
@ -326,6 +329,12 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
#endif
}
case AuthenticationType::HTTP:
{
node->children.push_back(std::make_shared<ASTLiteral>(getHTTPAuthenticationServerName()));
node->children.push_back(std::make_shared<ASTLiteral>(toString(getHTTPAuthenticationScheme())));
break;
}
case AuthenticationType::NO_PASSWORD: [[fallthrough]];
case AuthenticationType::MAX:
@ -484,6 +493,17 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
auth_data.setSSLCertificateCommonNames(std::move(common_names));
}
else if (query.type == AuthenticationType::HTTP)
{
String server = checkAndGetLiteralArgument<String>(args[0], "http_auth_server_name");
auto scheme = HTTPAuthenticationScheme::BASIC; // Default scheme
if (args_size > 1)
scheme = parseHTTPAuthenticationScheme(checkAndGetLiteralArgument<String>(args[1], "scheme"));
auth_data.setHTTPAuthenticationServerName(server);
auth_data.setHTTPAuthenticationScheme(scheme);
}
else
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected ASTAuthenticationData structure");

View File

@ -1,13 +1,14 @@
#pragma once
#include <Access/Common/AuthenticationType.h>
#include <Common/SSH/Wrappers.h>
#include <Parsers/Access/ASTAuthenticationData.h>
#include <Access/Common/HTTPAuthenticationScheme.h>
#include <Interpreters/Context_fwd.h>
#include <Parsers/Access/ASTAuthenticationData.h>
#include <Common/SSH/Wrappers.h>
#include <vector>
#include <base/types.h>
#include <boost/container/flat_set.hpp>
#include <vector>
namespace DB
{
@ -61,6 +62,12 @@ public:
const std::vector<ssh::SSHKey> & getSSHKeys() const { return ssh_keys; }
void setSSHKeys(std::vector<ssh::SSHKey> && ssh_keys_) { ssh_keys = std::forward<std::vector<ssh::SSHKey>>(ssh_keys_); }
HTTPAuthenticationScheme getHTTPAuthenticationScheme() const { return http_auth_scheme; }
void setHTTPAuthenticationScheme(HTTPAuthenticationScheme scheme) { http_auth_scheme = scheme; }
const String & getHTTPAuthenticationServerName() const { return http_auth_server_name; }
void setHTTPAuthenticationServerName(const String & name) { http_auth_server_name = name; }
friend bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs);
friend bool operator !=(const AuthenticationData & lhs, const AuthenticationData & rhs) { return !(lhs == rhs); }
@ -88,6 +95,9 @@ private:
boost::container::flat_set<String> ssl_certificate_common_names;
String salt;
std::vector<ssh::SSHKey> ssh_keys;
/// HTTP authentication properties
String http_auth_server_name;
HTTPAuthenticationScheme http_auth_scheme = HTTPAuthenticationScheme::BASIC;
};
}

View File

@ -67,6 +67,11 @@ const AuthenticationTypeInfo & AuthenticationTypeInfo::get(AuthenticationType ty
static const auto info = make_info("SSH_KEY");
return info;
}
case AuthenticationType::HTTP:
{
static const auto info = make_info("HTTP");
return info;
}
case AuthenticationType::MAX:
break;
}

View File

@ -37,6 +37,9 @@ enum class AuthenticationType
/// The check is performed on server side by decrypting the data and comparing with the original string.
SSH_KEY,
/// Authentication through HTTP protocol
HTTP,
MAX,
};

View File

@ -0,0 +1,31 @@
#include "HTTPAuthenticationScheme.h"
#include <base/types.h>
#include <Poco/String.h>
#include <Common/Exception.h>
#include <magic_enum.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
String toString(HTTPAuthenticationScheme scheme)
{
return String(magic_enum::enum_name(scheme));
}
HTTPAuthenticationScheme parseHTTPAuthenticationScheme(const String & scheme_str)
{
auto scheme = magic_enum::enum_cast<HTTPAuthenticationScheme>(Poco::toUpper(scheme_str));
if (!scheme)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown HTTP authentication scheme: {}. Possible value is 'BASIC'", scheme_str);
return *scheme;
}
}

View File

@ -0,0 +1,16 @@
#pragma once
#include <base/types.h>
namespace DB
{
enum class HTTPAuthenticationScheme
{
BASIC,
};
String toString(HTTPAuthenticationScheme scheme);
HTTPAuthenticationScheme parseHTTPAuthenticationScheme(const String & scheme_str);
}

View File

@ -262,9 +262,16 @@ void ContextAccess::initialize()
UserPtr changed_user = entity ? typeid_cast<UserPtr>(entity) : nullptr;
std::lock_guard lock2{ptr->mutex};
ptr->setUser(changed_user);
if (!ptr->user && !ptr->user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041, a)");
});
setUser(access_control->read<User>(*params.user_id));
if (!user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041, b)");
initialized = true;
}
@ -378,12 +385,16 @@ UserPtr ContextAccess::tryGetUser() const
String ContextAccess::getUserName() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
return user_name;
}
std::shared_ptr<const EnabledRolesInfo> ContextAccess::getRolesInfo() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (roles_info)
return roles_info;
static const auto no_roles = std::make_shared<EnabledRolesInfo>();
@ -394,6 +405,9 @@ RowPolicyFilterPtr ContextAccess::getRowPolicyFilter(const String & database, co
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
RowPolicyFilterPtr filter;
if (enabled_row_policies)
filter = enabled_row_policies->getFilter(database, table_name, filter_type);
@ -414,6 +428,9 @@ std::shared_ptr<const EnabledQuota> ContextAccess::getQuota() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (!enabled_quota)
{
if (roles_info)
@ -445,6 +462,8 @@ std::optional<QuotaUsage> ContextAccess::getQuotaUsage() const
SettingsChanges ContextAccess::getDefaultSettings() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (enabled_settings)
{
if (auto info = enabled_settings->getInfo())
@ -457,6 +476,8 @@ SettingsChanges ContextAccess::getDefaultSettings() const
std::shared_ptr<const SettingsProfilesInfo> ContextAccess::getDefaultProfileInfo() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (enabled_settings)
return enabled_settings->getInfo();
static const auto everything_by_default = std::make_shared<SettingsProfilesInfo>(*access_control);
@ -467,6 +488,8 @@ std::shared_ptr<const SettingsProfilesInfo> ContextAccess::getDefaultProfileInfo
std::shared_ptr<const AccessRights> ContextAccess::getAccessRights() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (access)
return access;
static const auto nothing_granted = std::make_shared<AccessRights>();
@ -477,6 +500,8 @@ std::shared_ptr<const AccessRights> ContextAccess::getAccessRights() const
std::shared_ptr<const AccessRights> ContextAccess::getAccessRightsWithImplicit() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (access_with_implicit)
return access_with_implicit;
static const auto nothing_granted = std::make_shared<AccessRights>();

View File

@ -183,9 +183,11 @@ private:
const AccessControl * access_control = nullptr;
const Params params;
mutable std::atomic<bool> initialized = false; // can be removed after Bug 5504 is resolved
mutable std::atomic<bool> user_was_dropped = false;
mutable std::atomic<Poco::Logger *> trace_log = nullptr;
mutable std::mutex mutex;
mutable UserPtr user TSA_GUARDED_BY(mutex);
mutable String user_name TSA_GUARDED_BY(mutex);
mutable scope_guard subscription_for_user_change TSA_GUARDED_BY(mutex);
@ -198,8 +200,6 @@ private:
mutable std::shared_ptr<const EnabledRowPolicies> row_policies_of_initial_user TSA_GUARDED_BY(mutex);
mutable std::shared_ptr<const EnabledQuota> enabled_quota TSA_GUARDED_BY(mutex);
mutable std::shared_ptr<const EnabledSettings> enabled_settings TSA_GUARDED_BY(mutex);
mutable std::mutex mutex;
};
}

View File

@ -1,11 +1,14 @@
#include <Access/ExternalAuthenticators.h>
#include <Access/LDAPClient.h>
#include <Access/SettingsAuthResponseParser.h>
#include <Access/resolveSetting.h>
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <Common/SettingsChanges.h>
#include <Common/SipHash.h>
#include <Common/quoteString.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <boost/algorithm/string/case_conv.hpp>
#include <Poco/Util/AbstractConfiguration.h>
#include <optional>
#include <utility>
@ -230,6 +233,24 @@ void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::
params.keytab = config.getString("kerberos.keytab", "");
}
HTTPAuthClientParams parseHTTPAuthParams(const Poco::Util::AbstractConfiguration & config, const String & prefix)
{
HTTPAuthClientParams http_auth_params;
http_auth_params.uri = config.getString(prefix + ".uri");
size_t connection_timeout_ms = config.getInt(prefix + ".connection_timeout_ms", 1000);
size_t receive_timeout_ms = config.getInt(prefix + ".receive_timeout_ms", 1000);
size_t send_timeout_ms = config.getInt(prefix + ".send_timeout_ms", 1000);
http_auth_params.timeouts = ConnectionTimeouts{connection_timeout_ms, receive_timeout_ms, send_timeout_ms};
http_auth_params.max_tries = config.getInt(prefix + ".max_tries", 3);
http_auth_params.retry_initial_backoff_ms = config.getInt(prefix + ".retry_initial_backoff_ms", 50);
http_auth_params.retry_max_backoff_ms = config.getInt(prefix + ".retry_max_backoff_ms", 1000);
return http_auth_params;
}
}
void parseLDAPRoleSearchParams(LDAPClient::RoleSearchParams & params, const Poco::Util::AbstractConfiguration & config, const String & prefix)
@ -265,6 +286,9 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur
std::size_t ldap_servers_key_count = 0;
std::size_t kerberos_keys_count = 0;
std::size_t http_auth_server_keys_count = 0;
const String http_auth_servers_config = "http_authentication_servers";
for (auto key : all_keys)
{
@ -276,6 +300,7 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur
ldap_servers_key_count += (key == "ldap_servers");
kerberos_keys_count += (key == "kerberos");
http_auth_server_keys_count += (key == http_auth_servers_config);
}
if (ldap_servers_key_count > 1)
@ -284,6 +309,25 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur
if (kerberos_keys_count > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple kerberos sections are not allowed");
if (http_auth_server_keys_count > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple http_authentication_servers sections are not allowed");
Poco::Util::AbstractConfiguration::Keys http_auth_server_names;
config.keys(http_auth_servers_config, http_auth_server_names);
http_auth_servers.clear();
for (const auto & http_auth_server_name : http_auth_server_names)
{
String prefix = fmt::format("{}.{}", http_auth_servers_config, http_auth_server_name);
try
{
http_auth_servers[http_auth_server_name] = parseHTTPAuthParams(config, prefix);
}
catch (...)
{
tryLogCurrentException(log, "Could not parse HTTP auth server" + backQuote(http_auth_server_name));
}
}
Poco::Util::AbstractConfiguration::Keys ldap_server_names;
config.keys("ldap_servers", ldap_server_names);
ldap_client_params_blueprint.clear();
@ -490,4 +534,27 @@ GSSAcceptorContext::Params ExternalAuthenticators::getKerberosParams() const
return kerberos_params.value();
}
HTTPAuthClientParams ExternalAuthenticators::getHTTPAuthenticationParams(const String& server) const
{
std::scoped_lock lock{mutex};
const auto it = http_auth_servers.find(server);
if (it == http_auth_servers.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP server '{}' is not configured", server);
return it->second;
}
bool ExternalAuthenticators::checkHTTPBasicCredentials(
const String & server, const BasicCredentials & credentials, SettingsChanges & settings) const
{
auto params = getHTTPAuthenticationParams(server);
HTTPBasicAuthClient<SettingsAuthResponseParser> client(params);
auto [is_ok, settings_from_auth_server] = client.authenticate(credentials.getUserName(), credentials.getPassword());
if (is_ok)
std::ranges::move(settings_from_auth_server, std::back_inserter(settings));
return is_ok;
}
}

View File

@ -1,11 +1,14 @@
#pragma once
#include <Access/LDAPClient.h>
#include <Access/Credentials.h>
#include <Access/GSSAcceptor.h>
#include <Access/HTTPAuthClient.h>
#include <Access/LDAPClient.h>
#include <base/defines.h>
#include <base/types.h>
#include <base/extended_types.h>
#include <base/types.h>
#include <Poco/URI.h>
#include <chrono>
#include <map>
@ -27,6 +30,8 @@ namespace Poco
namespace DB
{
class SettingsChanges;
class ExternalAuthenticators
{
public:
@ -37,10 +42,13 @@ public:
bool checkLDAPCredentials(const String & server, const BasicCredentials & credentials,
const LDAPClient::RoleSearchParamsList * role_search_params = nullptr, LDAPClient::SearchResultsList * role_search_results = nullptr) const;
bool checkKerberosCredentials(const String & realm, const GSSAcceptorContext & credentials) const;
bool checkHTTPBasicCredentials(const String & server, const BasicCredentials & credentials, SettingsChanges & settings) const;
GSSAcceptorContext::Params getKerberosParams() const;
private:
HTTPAuthClientParams getHTTPAuthenticationParams(const String& server) const;
struct LDAPCacheEntry
{
UInt128 last_successful_params_hash = 0;
@ -56,6 +64,7 @@ private:
LDAPParams ldap_client_params_blueprint TSA_GUARDED_BY(mutex) ;
mutable LDAPCaches ldap_caches TSA_GUARDED_BY(mutex) ;
std::optional<GSSAcceptorContext::Params> kerberos_params TSA_GUARDED_BY(mutex) ;
std::unordered_map<String, HTTPAuthClientParams> http_auth_servers TSA_GUARDED_BY(mutex) ;
void resetImpl() TSA_REQUIRES(mutex);
};

View File

@ -0,0 +1,93 @@
#pragma once
#include <IO/ConnectionTimeouts.h>
#include <IO/HTTPCommon.h>
#include <base/sleep.h>
#include <Poco/Net/HTTPBasicCredentials.h>
namespace DB
{
struct HTTPAuthClientParams
{
Poco::URI uri;
ConnectionTimeouts timeouts;
size_t max_tries;
size_t retry_initial_backoff_ms;
size_t retry_max_backoff_ms;
};
template <typename TResponseParser>
class HTTPAuthClient
{
public:
using Result = TResponseParser::Result;
HTTPAuthClient(const HTTPAuthClientParams & params, const TResponseParser & parser_ = TResponseParser{})
: timeouts{params.timeouts}
, max_tries{params.max_tries}
, retry_initial_backoff_ms{params.retry_initial_backoff_ms}
, retry_max_backoff_ms{params.retry_max_backoff_ms}
, uri{params.uri}
, parser{parser_}
{
}
Result authenticateRequest(Poco::Net::HTTPRequest & request) const
{
auto session = makeHTTPSession(uri, timeouts);
Poco::Net::HTTPResponse response;
auto milliseconds_to_wait = retry_initial_backoff_ms;
for (size_t attempt = 0; attempt < max_tries; ++attempt)
{
bool last_attempt = attempt + 1 >= max_tries;
try
{
session->sendRequest(request);
auto & body_stream = session->receiveResponse(response);
return parser.parse(response, &body_stream);
}
catch (const Poco::Exception &) // TODO: make retries smarter
{
if (last_attempt)
throw;
sleepForMilliseconds(milliseconds_to_wait);
milliseconds_to_wait = std::min(milliseconds_to_wait * 2, retry_max_backoff_ms);
}
}
UNREACHABLE();
}
const Poco::URI & getURI() const { return uri; }
private:
const ConnectionTimeouts timeouts;
const size_t max_tries;
const size_t retry_initial_backoff_ms;
const size_t retry_max_backoff_ms;
const Poco::URI uri;
TResponseParser parser;
};
template <typename TResponseParser>
class HTTPBasicAuthClient : private HTTPAuthClient<TResponseParser>
{
public:
using HTTPAuthClient<TResponseParser>::HTTPAuthClient;
using Result = HTTPAuthClient<TResponseParser>::Result;
Result authenticate(const String & user_name, const String & password) const
{
Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, this->getURI().getPathAndQuery()};
Poco::Net::HTTPBasicCredentials basic_credentials{user_name, password};
basic_credentials.authenticate(request);
return this->authenticateRequest(request);
}
};
}

View File

@ -488,7 +488,7 @@ bool IAccessStorage::updateImpl(const UUID & id, const UpdateFunc &, bool throw_
}
UUID IAccessStorage::authenticate(
AuthResult IAccessStorage::authenticate(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
@ -499,7 +499,7 @@ UUID IAccessStorage::authenticate(
}
std::optional<UUID> IAccessStorage::authenticate(
std::optional<AuthResult> IAccessStorage::authenticate(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
@ -511,7 +511,7 @@ std::optional<UUID> IAccessStorage::authenticate(
}
std::optional<UUID> IAccessStorage::authenticateImpl(
std::optional<AuthResult> IAccessStorage::authenticateImpl(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
@ -523,6 +523,7 @@ std::optional<UUID> IAccessStorage::authenticateImpl(
{
if (auto user = tryRead<User>(*id))
{
AuthResult auth_result { .user_id = *id };
if (!isAddressAllowed(*user, address))
throwAddressNotAllowed(address);
@ -531,10 +532,10 @@ std::optional<UUID> IAccessStorage::authenticateImpl(
((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password))
throwAuthenticationTypeNotAllowed(auth_type);
if (!areCredentialsValid(*user, credentials, external_authenticators))
if (!areCredentialsValid(*user, credentials, external_authenticators, auth_result.settings))
throwInvalidCredentials();
return id;
return auth_result;
}
}
@ -548,7 +549,8 @@ std::optional<UUID> IAccessStorage::authenticateImpl(
bool IAccessStorage::areCredentialsValid(
const User & user,
const Credentials & credentials,
const ExternalAuthenticators & external_authenticators) const
const ExternalAuthenticators & external_authenticators,
SettingsChanges & settings) const
{
if (!credentials.isReady())
return false;
@ -564,7 +566,7 @@ bool IAccessStorage::areCredentialsValid(
return false;
}
return Authentication::areCredentialsValid(credentials, user.auth_data, external_authenticators);
return Authentication::areCredentialsValid(credentials, user.auth_data, external_authenticators, settings);
}

View File

@ -5,10 +5,12 @@
#include <Core/UUID.h>
#include <Parsers/IParser.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>
#include <Common/SettingsChanges.h>
#include <atomic>
#include <functional>
#include <optional>
#include <vector>
#include <atomic>
namespace Poco { class Logger; }
@ -23,6 +25,14 @@ enum class AuthenticationType;
class BackupEntriesCollector;
class RestorerFromBackup;
/// Result of authentication
struct AuthResult
{
UUID user_id;
/// Session settings received from authentication server (if any)
SettingsChanges settings;
};
/// Contains entities, i.e. instances of classes derived from IAccessEntity.
/// The implementations of this class MUST be thread-safe.
class IAccessStorage : public boost::noncopyable
@ -171,8 +181,19 @@ public:
/// Finds a user, check the provided credentials and returns the ID of the user if they are valid.
/// Throws an exception if no such user or credentials are invalid.
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool allow_no_password, bool allow_plaintext_password) const;
std::optional<UUID> authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const;
AuthResult authenticate(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
bool allow_no_password,
bool allow_plaintext_password) const;
std::optional<AuthResult> authenticate(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
bool throw_if_user_not_exists,
bool allow_no_password,
bool allow_plaintext_password) const;
/// Returns true if this storage can be stored to or restored from a backup.
virtual bool isBackupAllowed() const { return false; }
@ -190,8 +211,18 @@ protected:
virtual bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
virtual bool removeImpl(const UUID & id, bool throw_if_not_exists);
virtual bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
virtual std::optional<UUID> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const;
virtual bool areCredentialsValid(const User & user, const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const;
virtual std::optional<AuthResult> authenticateImpl(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
bool throw_if_user_not_exists,
bool allow_no_password,
bool allow_plaintext_password) const;
virtual bool areCredentialsValid(
const User & user,
const Credentials & credentials,
const ExternalAuthenticators & external_authenticators,
SettingsChanges & settings) const;
virtual bool isAddressAllowed(const User & user, const Poco::Net::IPAddress & address) const;
static UUID generateRandomID();
Poco::Logger * getLogger() const;

View File

@ -450,7 +450,7 @@ std::optional<std::pair<String, AccessEntityType>> LDAPAccessStorage::readNameWi
}
std::optional<UUID> LDAPAccessStorage::authenticateImpl(
std::optional<AuthResult> LDAPAccessStorage::authenticateImpl(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
@ -503,7 +503,9 @@ std::optional<UUID> LDAPAccessStorage::authenticateImpl(
updateAssignedRolesNoLock(*id, user->getName(), external_roles);
}
return id;
if (id)
return AuthResult{ .user_id = *id };
return std::nullopt;
}
}

View File

@ -48,7 +48,7 @@ private: // IAccessStorage implementations.
virtual std::vector<UUID> findAllImpl(AccessEntityType type) const override;
virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
virtual std::optional<UUID> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
virtual std::optional<AuthResult> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
void setConfiguration(const Poco::Util::AbstractConfiguration & config, const String & prefix);
void processRoleChange(const UUID & id, const AccessEntityPtr & entity);

View File

@ -415,7 +415,7 @@ bool MultipleAccessStorage::updateImpl(const UUID & id, const UpdateFunc & updat
}
std::optional<UUID>
std::optional<AuthResult>
MultipleAccessStorage::authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
bool throw_if_user_not_exists,
@ -426,14 +426,14 @@ MultipleAccessStorage::authenticateImpl(const Credentials & credentials, const P
{
const auto & storage = (*storages)[i];
bool is_last_storage = (i == storages->size() - 1);
auto id = storage->authenticate(credentials, address, external_authenticators,
auto auth_result = storage->authenticate(credentials, address, external_authenticators,
(throw_if_user_not_exists && is_last_storage),
allow_no_password, allow_plaintext_password);
if (id)
if (auth_result)
{
std::lock_guard lock{mutex};
ids_cache.set(*id, storage);
return id;
ids_cache.set(auth_result->user_id, storage);
return auth_result;
}
}

View File

@ -67,7 +67,7 @@ protected:
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
std::optional<UUID> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
std::optional<AuthResult> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
private:
using Storages = std::vector<StoragePtr>;

View File

@ -0,0 +1,45 @@
#include "SettingsAuthResponseParser.h"
#include <Access/resolveSetting.h>
#include <IO/HTTPCommon.h>
#include <Poco/JSON/Object.h>
#include <Poco/JSON/Parser.h>
#include <Poco/Timespan.h>
namespace DB
{
SettingsAuthResponseParser::Result
SettingsAuthResponseParser::parse(const Poco::Net::HTTPResponse & response, std::istream * body_stream) const
{
Result result;
if (response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK)
return result;
result.is_ok = true;
if (!body_stream)
return result;
Poco::JSON::Parser parser;
Poco::JSON::Object::Ptr parsed_body;
try
{
Poco::Dynamic::Var json = parser.parse(*body_stream);
Poco::JSON::Object::Ptr obj = json.extract<Poco::JSON::Object::Ptr>();
Poco::JSON::Object::Ptr settings_obj = obj->getObject(settings_key);
if (settings_obj)
for (const auto & [key, value] : *settings_obj)
result.settings.emplace_back(key, settingStringToValueUtil(key, value));
}
catch (...)
{
LOG_INFO(&Poco::Logger::get("HTTPAuthentication"), "Failed to parse settings from authentication response. Skip it.");
}
return result;
}
}

View File

@ -0,0 +1,28 @@
#pragma once
#include <Common/SettingsChanges.h>
#include <istream>
namespace Poco::Net
{
class HTTPResponse;
}
namespace DB
{
/// Class for parsing authentication response containing session settings
class SettingsAuthResponseParser
{
static constexpr auto settings_key = "settings";
public:
struct Result
{
bool is_ok = false;
SettingsChanges settings;
};
Result parse(const Poco::Net::HTTPResponse & response, std::istream * body_stream) const;
};
}

View File

@ -135,18 +135,22 @@ namespace
const auto ssh_keys_config = user_config + ".ssh_keys";
bool has_ssh_keys = config.has(ssh_keys_config);
size_t num_password_fields = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex + has_ldap + has_kerberos + has_certificates + has_ssh_keys;
const auto http_auth_config = user_config + ".http_authentication";
bool has_http_auth = config.has(http_auth_config);
size_t num_password_fields = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex
+ has_ldap + has_kerberos + has_certificates + has_ssh_keys + has_http_auth;
if (num_password_fields > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "More than one field of 'password', 'password_sha256_hex', "
"'password_double_sha1_hex', 'no_password', 'ldap', 'kerberos', 'ssl_certificates', 'ssh_keys' "
"are used to specify authentication info for user {}. "
"'password_double_sha1_hex', 'no_password', 'ldap', 'kerberos', 'ssl_certificates', 'ssh_keys', "
"'http_authentication' are used to specify authentication info for user {}. "
"Must be only one of them.", user_name);
if (num_password_fields < 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'password' or 'password_sha256_hex' "
"or 'password_double_sha1_hex' or 'no_password' or 'ldap' or 'kerberos "
"or 'ssl_certificates' or 'ssh_keys' must be specified for user {}.", user_name);
"or 'ssl_certificates' or 'ssh_keys' or 'http_authentication' must be specified for user {}.", user_name);
if (has_password_plaintext)
{
@ -248,6 +252,13 @@ namespace
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
#endif
}
else if (has_http_auth)
{
user->auth_data = AuthenticationData{AuthenticationType::HTTP};
user->auth_data.setHTTPAuthenticationServerName(config.getString(http_auth_config + ".server"));
auto scheme = config.getString(http_auth_config + ".scheme");
user->auth_data.setHTTPAuthenticationScheme(parseHTTPAuthenticationScheme(scheme));
}
auto auth_type = user->auth_data.getType();
if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) ||

View File

@ -0,0 +1,52 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
namespace DB
{
struct Settings;
namespace
{
AggregateFunctionPtr
createAggregateFunctionLargestTriangleThreeBuckets(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
if (!(isNumber(argument_types[0]) || isDateOrDate32(argument_types[0]) || isDateTime(argument_types[0])
|| isDateTime64(argument_types[0])))
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the first argument",
name);
if (!(isNumber(argument_types[1]) || isDateOrDate32(argument_types[1]) || isDateTime(argument_types[1])
|| isDateTime64(argument_types[1])))
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the second argument",
name);
return std::make_shared<AggregateFunctionLargestTriangleThreeBuckets>(argument_types, parameters);
}
}
void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory)
{
factory.registerFunction(AggregateFunctionLargestTriangleThreeBuckets::name, createAggregateFunctionLargestTriangleThreeBuckets);
factory.registerAlias("lttb", AggregateFunctionLargestTriangleThreeBuckets::name);
}
}

View File

@ -0,0 +1,327 @@
#pragma once
#include <iostream>
#include <limits>
#include <numeric>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/StatCommon.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnsDateTime.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <base/types.h>
#include <Common/PODArray_fwd.h>
#include <Common/assert_cast.h>
#include <boost/math/distributions/normal.hpp>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
struct LargestTriangleThreeBucketsData : public StatisticalSample<Float64, Float64>
{
void add(const Float64 xval, const Float64 yval, Arena * arena)
{
this->addX(xval, arena);
this->addY(yval, arena);
}
void sort(Arena * arena)
{
// sort the this->x and this->y in ascending order of this->x using index
std::vector<size_t> index(this->x.size());
std::iota(index.begin(), index.end(), 0);
::sort(index.begin(), index.end(), [&](size_t i1, size_t i2) { return this->x[i1] < this->x[i2]; });
SampleX temp_x{};
SampleY temp_y{};
for (size_t i = 0; i < this->x.size(); ++i)
{
temp_x.push_back(this->x[index[i]], arena);
temp_y.push_back(this->y[index[i]], arena);
}
for (size_t i = 0; i < this->x.size(); ++i)
{
this->x[i] = temp_x[i];
this->y[i] = temp_y[i];
}
}
PODArray<std::pair<Float64, Float64>> getResult(size_t total_buckets, Arena * arena)
{
// Sort the data
this->sort(arena);
PODArray<std::pair<Float64, Float64>> result;
// Handle special cases for small data list
if (this->x.size() <= total_buckets)
{
for (size_t i = 0; i < this->x.size(); ++i)
{
result.emplace_back(std::make_pair(this->x[i], this->y[i]));
}
return result;
}
// Handle special cases for 0 or 1 or 2 buckets
if (total_buckets == 0)
return result;
if (total_buckets == 1)
{
result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
return result;
}
if (total_buckets == 2)
{
result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
return result;
}
// Find the size of each bucket
size_t single_bucket_size = this->x.size() / total_buckets;
// Include the first data point
result.emplace_back(std::make_pair(this->x[0], this->y[0]));
for (size_t i = 1; i < total_buckets - 1; ++i) // Skip the first and last bucket
{
size_t start_index = i * single_bucket_size;
size_t end_index = (i + 1) * single_bucket_size;
// Compute the average point in the next bucket
Float64 avg_x = 0;
Float64 avg_y = 0;
for (size_t j = end_index; j < (i + 2) * single_bucket_size; ++j)
{
avg_x += this->x[j];
avg_y += this->y[j];
}
avg_x /= single_bucket_size;
avg_y /= single_bucket_size;
// Find the point in the current bucket that forms the largest triangle
size_t max_index = start_index;
Float64 max_area = 0.0;
for (size_t j = start_index; j < end_index; ++j)
{
Float64 area = std::abs(
0.5
* (result.back().first * this->y[j] + this->x[j] * avg_y + avg_x * result.back().second - result.back().first * avg_y
- this->x[j] * result.back().second - avg_x * this->y[j]));
if (area > max_area)
{
max_area = area;
max_index = j;
}
}
// Include the selected point
result.emplace_back(std::make_pair(this->x[max_index], this->y[max_index]));
}
// Include the last data point
result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
return result;
}
};
class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>
{
private:
UInt64 total_buckets{0};
TypeIndex x_type;
TypeIndex y_type;
public:
explicit AggregateFunctionLargestTriangleThreeBuckets(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>({arguments}, {}, createResultType(arguments))
{
if (params.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter", getName());
if (params[0].getType() != Field::Types::UInt64)
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName());
total_buckets = params[0].get<UInt64>();
this->x_type = WhichDataType(arguments[0]).idx;
this->y_type = WhichDataType(arguments[1]).idx;
}
static constexpr auto name = "largestTriangleThreeBuckets";
String getName() const override { return name; }
bool allocatesMemoryInArena() const override { return true; }
static DataTypePtr createResultType(const DataTypes & arguments)
{
TypeIndex x_type = arguments[0]->getTypeId();
TypeIndex y_type = arguments[1]->getTypeId();
UInt32 x_scale = 0;
UInt32 y_scale = 0;
if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[0].get()))
{
x_scale = datetime64_type->getScale();
}
if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[1].get()))
{
y_scale = datetime64_type->getScale();
}
DataTypes types = {getDataTypeFromTypeIndex(x_type, x_scale), getDataTypeFromTypeIndex(y_type, y_scale)};
auto tuple = std::make_shared<DataTypeTuple>(std::move(types));
return std::make_shared<DataTypeArray>(tuple);
}
static DataTypePtr getDataTypeFromTypeIndex(TypeIndex type_index, UInt32 scale)
{
DataTypePtr data_type;
switch (type_index)
{
case TypeIndex::Date:
data_type = std::make_shared<DataTypeDate>();
break;
case TypeIndex::Date32:
data_type = std::make_shared<DataTypeDate32>();
break;
case TypeIndex::DateTime:
data_type = std::make_shared<DataTypeDateTime>();
break;
case TypeIndex::DateTime64:
data_type = std::make_shared<DataTypeDateTime64>(scale);
break;
default:
data_type = std::make_shared<DataTypeNumber<Float64>>();
}
return data_type;
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type);
Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type);
this->data(place).add(x, y, arena);
}
Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const
{
switch (type_index)
{
case TypeIndex::Date:
return static_cast<const ColumnDate &>(*column).getData()[row_num];
case TypeIndex::Date32:
return static_cast<const ColumnDate32 &>(*column).getData()[row_num];
case TypeIndex::DateTime:
return static_cast<const ColumnDateTime &>(*column).getData()[row_num];
case TypeIndex::DateTime64:
return static_cast<const ColumnDateTime64 &>(*column).getData()[row_num];
default:
return column->getFloat64(row_num);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & a = this->data(place);
const auto & b = this->data(rhs);
a.merge(b, arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
auto res = this->data(place).getResult(total_buckets, arena);
auto & col = assert_cast<ColumnArray &>(to);
auto & col_offsets = assert_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
auto column_x_adder_func = getColumnAdderFunc(x_type);
auto column_y_adder_func = getColumnAdderFunc(y_type);
for (size_t i = 0; i < res.size(); ++i)
{
auto & column_tuple = assert_cast<ColumnTuple &>(col.getData());
column_x_adder_func(column_tuple.getColumn(0), res[i].first);
column_y_adder_func(column_tuple.getColumn(1), res[i].second);
}
col_offsets.getData().push_back(col.getData().size());
}
std::function<void(IColumn &, Float64)> getColumnAdderFunc(TypeIndex type_index) const
{
switch (type_index)
{
case TypeIndex::Date:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDate &>(column);
col.getData().push_back(static_cast<UInt16>(value));
};
case TypeIndex::Date32:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDate32 &>(column);
col.getData().push_back(static_cast<UInt32>(value));
};
case TypeIndex::DateTime:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDateTime &>(column);
col.getData().push_back(static_cast<UInt32>(value));
};
case TypeIndex::DateTime64:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDateTime64 &>(column);
col.getData().push_back(static_cast<UInt64>(value));
};
default:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnFloat64 &>(column);
col.getData().push_back(value);
};
}
}
};
}

View File

@ -129,7 +129,10 @@ public:
{
writePODBinary(value[i].first, buf);
writePODBinary(zero_padding, buf);
writePODBinary(value[i].second, buf);
if constexpr (std::endian::native == std::endian::little)
writePODBinary(value[i].second, buf);
else
writePODBinary(std::byteswap(value[i].second), buf);
}
}

View File

@ -8,9 +8,6 @@
#include <DataTypes/DataTypeIPv4andIPv6.h>
static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF;
namespace DB
{
@ -134,9 +131,12 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
threshold = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
if (threshold > TOP_K_MAX_SIZE || load_factor > TOP_K_MAX_SIZE || threshold * load_factor > TOP_K_MAX_SIZE)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too large parameter(s) for aggregate function '{}' (maximum is {})", name, toString(TOP_K_MAX_SIZE));
if (threshold > DB::TOP_K_MAX_SIZE || load_factor > DB::TOP_K_MAX_SIZE || threshold * load_factor > DB::TOP_K_MAX_SIZE)
throw Exception(
ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too large parameter(s) for aggregate function '{}' (maximum is {})",
name,
toString(DB::TOP_K_MAX_SIZE));
if (threshold == 0)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Parameter 0 is illegal for aggregate function '{}'", name);

View File

@ -20,6 +20,12 @@ namespace DB
{
struct Settings;
static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF;
namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
}
template <typename T>
struct AggregateFunctionTopKData
@ -163,11 +169,18 @@ public:
{
auto & set = this->data(place).value;
set.clear();
set.resize(reserved);
// Specialized here because there's no deserialiser for StringRef
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > TOP_K_MAX_SIZE))
throw Exception(
ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too large size ({}) for aggregate function '{}' state (maximum is {})",
size,
getName(),
TOP_K_MAX_SIZE);
set.resize(size);
for (size_t i = 0; i < size; ++i)
{
auto ref = readStringBinaryInto(*arena, buf);

View File

@ -82,6 +82,7 @@ void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &);
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory);
void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -176,6 +177,7 @@ void registerAggregateFunctions()
registerAggregateFunctionAnalysisOfVariance(factory);
registerAggregateFunctionFlameGraph(factory);
registerAggregateFunctionKolmogorovSmirnovTest(factory);
registerAggregateFunctionLargestTriangleThreeBuckets(factory);
registerWindowFunctions(factory);
}

View File

@ -104,6 +104,7 @@ if (TARGET ch_contrib::nats_io)
endif()
add_headers_and_sources(dbms Storages/DataLakes)
add_headers_and_sources(dbms Storages/DataLakes/Iceberg)
add_headers_and_sources(dbms Common/NamedCollections)
if (TARGET ch_contrib::amqp_cpp)

View File

@ -72,6 +72,7 @@
#include <boost/algorithm/string/replace.hpp>
#include <iostream>
#include <filesystem>
#include <limits>
#include <map>
#include <memory>
#include <unordered_map>
@ -106,6 +107,7 @@ namespace ErrorCodes
extern const int CANNOT_OPEN_FILE;
extern const int FILE_ALREADY_EXISTS;
extern const int USER_SESSION_LIMIT_EXCEEDED;
extern const int NOT_IMPLEMENTED;
}
}
@ -560,11 +562,19 @@ try
}
WriteBuffer * out_buf = nullptr;
String pager = config().getString("pager", "");
if (!pager.empty())
{
if (SIG_ERR == signal(SIGPIPE, SIG_IGN))
throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
throwFromErrno("Cannot set signal handler for SIGPIPE.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
/// We need to reset signals that had been installed in the
/// setupSignalHandler() since terminal will send signals to both
/// processes and so signals will be delivered to the
/// clickhouse-client/local as well, which will be terminated when
/// signal will be delivered second time.
if (SIG_ERR == signal(SIGINT, SIG_IGN))
throwFromErrno("Cannot set signal handler for SIGINT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
if (SIG_ERR == signal(SIGQUIT, SIG_IGN))
throwFromErrno("Cannot set signal handler for SIGQUIT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
ShellCommand::Config config(pager);
config.pipe_stdin_only = true;
@ -710,6 +720,30 @@ void ClientBase::initLogsOutputStream()
}
}
void ClientBase::adjustSettings()
{
Settings settings = global_context->getSettings();
/// NOTE: Do not forget to set changed=false to avoid sending it to the server (to avoid breakage read only profiles)
/// In case of multi-query we allow data after semicolon since it will be
/// parsed by the client and interpreted as new query
if (is_multiquery && !global_context->getSettingsRef().input_format_values_allow_data_after_semicolon.changed)
{
settings.input_format_values_allow_data_after_semicolon = true;
settings.input_format_values_allow_data_after_semicolon.changed = false;
}
/// If pager is specified then output_format_pretty_max_rows is ignored, this should be handled by pager.
if (!pager.empty() && !global_context->getSettingsRef().output_format_pretty_max_rows.changed)
{
settings.output_format_pretty_max_rows = std::numeric_limits<UInt64>::max();
settings.output_format_pretty_max_rows.changed = false;
}
global_context->setSettings(settings);
}
void ClientBase::initTtyBuffer(ProgressOption progress)
{
if (tty_buf)
@ -1300,6 +1334,15 @@ void ClientBase::resetOutput()
{
pager_cmd->in.close();
pager_cmd->wait();
if (SIG_ERR == signal(SIGPIPE, SIG_DFL))
throwFromErrno("Cannot set signal handler for SIIGPIEP.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
if (SIG_ERR == signal(SIGINT, SIG_DFL))
throwFromErrno("Cannot set signal handler for SIGINT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
if (SIG_ERR == signal(SIGQUIT, SIG_DFL))
throwFromErrno("Cannot set signal handler for SIGQUIT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
setupSignalHandler();
}
pager_cmd = nullptr;
@ -1390,7 +1433,7 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
}
/// Process the query that requires transferring data blocks to the server.
const auto parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
const auto & parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && std_in.eof())))
{
const auto & settings = global_context->getSettingsRef();
@ -1836,10 +1879,20 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (insert && insert->select)
insert->tryFindInputFunction(input_function);
bool is_async_insert = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
bool is_async_insert_with_inlined_data = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
if (is_async_insert_with_inlined_data)
{
bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && !std_in.eof();
bool have_external_data = have_data_in_stdin || insert->infile;
if (have_external_data)
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Processing async inserts with both inlined and external data (from stdin or infile) is not supported");
}
/// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately.
if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert)
if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert_with_inlined_data)
{
if (input_function && insert->format.empty())
throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()");

View File

@ -58,8 +58,6 @@ enum ProgressOption
ProgressOption toProgressOption(std::string progress);
std::istream& operator>> (std::istream & in, ProgressOption & progress);
void interruptSignalHandler(int signum);
class InternalTextLogs;
class WriteBufferFromFileDescriptor;
@ -184,6 +182,9 @@ protected:
static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context);
bool processMultiQueryFromFile(const String & file_name);
/// Adjust some settings after command line options and config had been processed.
void adjustSettings();
void initTtyBuffer(ProgressOption progress);
/// Should be one of the first, to be destroyed the last,
@ -212,6 +213,8 @@ protected:
bool stderr_is_a_tty = false; /// stderr is a terminal.
uint64_t terminal_width = 0;
String pager;
String format; /// Query results output format.
bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering.
bool select_into_file_and_stdout = false; /// If writing result INTO OUTFILE AND STDOUT. It affects progress rendering.

View File

@ -95,12 +95,18 @@ void LocalConnection::sendQuery(
else
query_context = session.makeQueryContext();
query_context->setCurrentQueryId(query_id);
if (send_progress)
{
query_context->setProgressCallback([this] (const Progress & value) { this->updateProgress(value); });
query_context->setFileProgressCallback([this](const FileProgress & value) { this->updateProgress(Progress(value)); });
}
if (!current_database.empty())
/// Switch the database to the desired one (set by the USE query)
/// but don't attempt to do it if we are already in that database.
/// (there is a rare case when it matters - if we deleted the current database,
// we can still do some queries, but we cannot switch to the same database)
if (!current_database.empty() && current_database != query_context->getCurrentDatabase())
query_context->setCurrentDatabase(current_database);
query_context->addQueryParameters(query_parameters);
@ -125,7 +131,7 @@ void LocalConnection::sendQuery(
try
{
state->io = executeQuery(state->query, query_context, false, state->stage);
state->io = executeQuery(state->query, query_context, false, state->stage).second;
if (state->io.pipeline.pushing())
{

View File

@ -1,7 +1,6 @@
#include <Client/MultiplexedConnections.h>
#include <Common/thread_local_rng.h>
#include <Common/logger_useful.h>
#include <Core/Protocol.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/Operators.h>
@ -24,14 +23,6 @@ namespace ErrorCodes
}
#define MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION \
mutex_last_locked_by.store((getThreadId() << 32) | __LINE__); \
memcpy(mutex_memory_dump.data(), &cancel_mutex, mutex_memory_dump.size()); \
mutex_locked += 1; \
SCOPE_EXIT({ mutex_locked -= 1; });
/// When you remove this macro, please also remove the clang-tidy suppressions at the beginning + end of this file.
MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler)
: settings(settings_)
{
@ -86,7 +77,6 @@ MultiplexedConnections::MultiplexedConnections(
void MultiplexedConnections::sendScalarsData(Scalars & data)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot send scalars data: query not yet sent.");
@ -102,7 +92,6 @@ void MultiplexedConnections::sendScalarsData(Scalars & data)
void MultiplexedConnections::sendExternalTablesData(std::vector<ExternalTablesData> & data)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot send external tables data: query not yet sent.");
@ -131,7 +120,6 @@ void MultiplexedConnections::sendQuery(
bool with_pending_data)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query already sent.");
@ -189,7 +177,6 @@ void MultiplexedConnections::sendQuery(
void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot send uuids after query is sent.");
@ -206,7 +193,6 @@ void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuid
void MultiplexedConnections::sendReadTaskResponse(const String & response)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (cancelled)
return;
current_connection->sendReadTaskResponse(response);
@ -216,7 +202,6 @@ void MultiplexedConnections::sendReadTaskResponse(const String & response)
void MultiplexedConnections::sendMergeTreeReadTaskResponse(const ParallelReadResponse & response)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (cancelled)
return;
current_connection->sendMergeTreeReadTaskResponse(response);
@ -226,29 +211,13 @@ void MultiplexedConnections::sendMergeTreeReadTaskResponse(const ParallelReadRes
Packet MultiplexedConnections::receivePacket()
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
Packet packet = receivePacketUnlocked({});
return packet;
}
void MultiplexedConnections::disconnect()
{
/// We've seen this lock mysteriously get stuck forever, without any other thread seeming to
/// hold the mutex. This is temporary code to print some extra information next time it happens.
/// std::lock_guard lock(cancel_mutex);
if (!cancel_mutex.try_lock_for(std::chrono::hours(1)))
{
UInt64 last_locked = mutex_last_locked_by.load();
std::array<UInt8, sizeof(std::timed_mutex)> new_memory_dump;
memcpy(new_memory_dump.data(), &cancel_mutex, new_memory_dump.size());
LOG_ERROR(&Poco::Logger::get("MultiplexedConnections"), "Deadlock in MultiplexedConnections::disconnect()! Mutex was last (instrumentedly) locked by thread {} on line {}, lock balance: {}, mutex memory when last locked: {}, mutex memory now: {}", last_locked >> 32, last_locked & 0xffffffff, mutex_locked.load(), hexString(mutex_memory_dump.data(), mutex_memory_dump.size()), hexString(new_memory_dump.data(), new_memory_dump.size()));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Deadlock in MultiplexedConnections::disconnect()");
}
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wthread-safety-analysis"
std::lock_guard lock(cancel_mutex, std::adopt_lock);
#pragma clang diagnostic pop
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
std::lock_guard lock(cancel_mutex);
for (ReplicaState & state : replica_states)
{
@ -264,7 +233,6 @@ void MultiplexedConnections::disconnect()
void MultiplexedConnections::sendCancel()
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!sent_query || cancelled)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot cancel. Either no query sent or already cancelled.");
@ -282,7 +250,6 @@ void MultiplexedConnections::sendCancel()
Packet MultiplexedConnections::drain()
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!cancelled)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot drain connections: cancel first.");
@ -323,7 +290,6 @@ Packet MultiplexedConnections::drain()
std::string MultiplexedConnections::dumpAddresses() const
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
return dumpAddressesUnlocked();
}

View File

@ -106,14 +106,7 @@ private:
std::optional<ReplicaInfo> replica_info;
/// A mutex for the sendCancel function to execute safely in separate thread.
mutable std::timed_mutex cancel_mutex;
/// Temporary instrumentation to debug a weird deadlock on cancel_mutex.
/// TODO: Once the investigation is done, get rid of these, and of INSTRUMENTED_LOCK_MUTEX, and
/// change cancel_mutex to std::mutex.
mutable std::atomic<UInt64> mutex_last_locked_by{0};
mutable std::atomic<Int64> mutex_locked{0};
mutable std::array<UInt8, sizeof(std::timed_mutex)> mutex_memory_dump;
mutable std::mutex cancel_mutex;
friend struct RemoteQueryExecutorRoutine;
};

View File

@ -4,7 +4,6 @@
#include <Common/assert_cast.h>
#include <Common/WeakHash.h>
#include <Common/HashTable/Hash.h>
#include <Common/RadixSort.h>
#include <base/unaligned.h>
#include <base/sort.h>
@ -16,7 +15,6 @@
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <Columns/RadixSortHelper.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
@ -161,59 +159,6 @@ void ColumnDecimal<T>::getPermutation(IColumn::PermutationSortDirection directio
return data[lhs] > data[rhs];
};
size_t data_size = data.size();
res.resize(data_size);
if (limit >= data_size)
limit = 0;
for (size_t i = 0; i < data_size; ++i)
res[i] = i;
if constexpr (is_arithmetic_v<NativeT> && !is_big_int_v<NativeT>)
{
if (!limit)
{
/// A case for radix sort
/// LSD RadixSort is stable
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
/// TODO: LSD RadixSort is currently not stable if direction is descending
bool use_radix_sort = (sort_is_stable && ascending) || !sort_is_stable;
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
if (data_size >= 256 && data_size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
{
for (size_t i = 0; i < data_size; ++i)
res[i] = i;
bool try_sort = false;
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
try_sort = trySort(res.begin(), res.end(), comparator_ascending);
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
try_sort = trySort(res.begin(), res.end(), comparator_ascending_stable);
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
try_sort = trySort(res.begin(), res.end(), comparator_descending);
else
try_sort = trySort(res.begin(), res.end(), comparator_descending_stable);
if (try_sort)
return;
PaddedPODArray<ValueWithIndex<NativeT>> pairs(data_size);
for (UInt32 i = 0; i < static_cast<UInt32>(data_size); ++i)
pairs[i] = {data[i].value, i};
RadixSort<RadixSortTraits<NativeT>>::executeLSD(pairs.data(), data_size, reverse, res.data());
return;
}
}
}
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
this->getPermutationImpl(limit, res, comparator_ascending, DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
@ -246,37 +191,7 @@ void ColumnDecimal<T>::updatePermutation(IColumn::PermutationSortDirection direc
return data[lhs] < data[rhs];
};
auto equals_comparator = [this](size_t lhs, size_t rhs) { return data[lhs] == data[rhs]; };
auto sort = [&](auto begin, auto end, auto pred)
{
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
/// TODO: LSD RadixSort is currently not stable if direction is descending
bool use_radix_sort = (sort_is_stable && ascending) || !sort_is_stable;
size_t size = end - begin;
if (size >= 256 && size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
{
bool try_sort = trySort(begin, end, pred);
if (try_sort)
return;
PaddedPODArray<ValueWithIndex<NativeT>> pairs(size);
size_t index = 0;
for (auto * it = begin; it != end; ++it)
{
pairs[index] = {data[*it].value, static_cast<UInt32>(*it)};
++index;
}
RadixSort<RadixSortTraits<NativeT>>::executeLSD(pairs.data(), size, reverse, res.data());
return;
}
::sort(begin, end, pred);
};
auto sort = [](auto begin, auto end, auto pred) { ::sort(begin, end, pred); };
auto partial_sort = [](auto begin, auto mid, auto end, auto pred) { ::partial_sort(begin, mid, end, pred); };
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)

View File

@ -3,7 +3,6 @@
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <Columns/RadixSortHelper.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <IO/WriteHelpers.h>
#include <Common/Arena.h>
@ -193,6 +192,26 @@ struct ColumnVector<T>::equals
bool operator()(size_t lhs, size_t rhs) const { return CompareHelper<T>::equals(parent.data[lhs], parent.data[rhs], nan_direction_hint); }
};
namespace
{
template <typename T>
struct ValueWithIndex
{
T value;
UInt32 index;
};
template <typename T>
struct RadixSortTraits : RadixSortNumTraits<T>
{
using Element = ValueWithIndex<T>;
using Result = size_t;
static T & extractKey(Element & elem) { return elem.value; }
static size_t extractResult(Element & elem) { return elem.index; }
};
}
#if USE_EMBEDDED_COMPILER
template <typename T>
@ -235,25 +254,35 @@ template <typename T>
void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const
{
size_t data_size = data.size();
res.resize(data_size);
size_t s = data.size();
res.resize(s);
if (data_size == 0)
if (s == 0)
return;
if (limit >= data_size)
if (limit >= s)
limit = 0;
for (size_t i = 0; i < data_size; ++i)
res[i] = i;
if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
if (limit)
{
if (!limit)
{
/// A case for radix sort
/// LSD RadixSort is stable
for (size_t i = 0; i < s; ++i)
res[i] = i;
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
::partial_sort(res.begin(), res.begin() + limit, res.end(), less_stable(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
::partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
::partial_sort(res.begin(), res.begin() + limit, res.end(), greater_stable(*this, nan_direction_hint));
}
else
{
/// A case for radix sort
/// LSD RadixSort is stable
if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
{
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
@ -262,27 +291,13 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
bool use_radix_sort = (sort_is_stable && ascending && !std::is_floating_point_v<T>) || !sort_is_stable;
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
if (data_size >= 256 && data_size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
if (s >= 256 && s <= std::numeric_limits<UInt32>::max() && use_radix_sort)
{
bool try_sort = false;
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
try_sort = trySort(res.begin(), res.end(), less(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
try_sort = trySort(res.begin(), res.end(), less_stable(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
try_sort = trySort(res.begin(), res.end(), greater(*this, nan_direction_hint));
else
try_sort = trySort(res.begin(), res.end(), greater_stable(*this, nan_direction_hint));
if (try_sort)
return;
PaddedPODArray<ValueWithIndex<T>> pairs(data_size);
for (UInt32 i = 0; i < static_cast<UInt32>(data_size); ++i)
PaddedPODArray<ValueWithIndex<T>> pairs(s);
for (UInt32 i = 0; i < static_cast<UInt32>(s); ++i)
pairs[i] = {data[i], i};
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), data_size, reverse, res.data());
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), s, reverse, res.data());
/// Radix sort treats all NaNs to be greater than all numbers.
/// If the user needs the opposite, we must move them accordingly.
@ -290,9 +305,9 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
{
size_t nans_to_move = 0;
for (size_t i = 0; i < data_size; ++i)
for (size_t i = 0; i < s; ++i)
{
if (isNaN(data[res[reverse ? i : data_size - 1 - i]]))
if (isNaN(data[res[reverse ? i : s - 1 - i]]))
++nans_to_move;
else
break;
@ -300,35 +315,38 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
if (nans_to_move)
{
std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : data_size - nans_to_move), std::end(res));
std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : s - nans_to_move), std::end(res));
}
}
return;
}
}
}
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
this->getPermutationImpl(limit, res, less(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
this->getPermutationImpl(limit, res, less_stable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
this->getPermutationImpl(limit, res, greater(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
else
this->getPermutationImpl(limit, res, greater_stable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
/// Default sorting algorithm.
for (size_t i = 0; i < s; ++i)
res[i] = i;
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
::sort(res.begin(), res.end(), less(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
::sort(res.begin(), res.end(), less_stable(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
::sort(res.begin(), res.end(), greater(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
::sort(res.begin(), res.end(), greater_stable(*this, nan_direction_hint));
}
}
template <typename T>
void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
auto sort = [&](auto begin, auto end, auto pred)
{
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
/// A case for radix sort
if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
{
@ -339,10 +357,6 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
if (size >= 256 && size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
{
bool try_sort = trySort(begin, end, pred);
if (try_sort)
return;
PaddedPODArray<ValueWithIndex<T>> pairs(size);
size_t index = 0;

View File

@ -1,25 +0,0 @@
#pragma once
#include <Common/RadixSort.h>
namespace DB
{
template <typename T>
struct ValueWithIndex
{
T value;
UInt32 index;
};
template <typename T>
struct RadixSortTraits : RadixSortNumTraits<T>
{
using Element = ValueWithIndex<T>;
using Result = size_t;
static T & extractKey(Element & elem) { return elem.value; }
static size_t extractResult(Element & elem) { return elem.index; }
};
}

View File

@ -151,6 +151,8 @@
M(ParquetDecoderThreadsActive, "Number of threads in the ParquetBlockInputFormat thread pool running a task.") \
M(ParquetEncoderThreads, "Number of threads in ParquetBlockOutputFormat thread pool.") \
M(ParquetEncoderThreadsActive, "Number of threads in ParquetBlockOutputFormat thread pool running a task.") \
M(DWARFReaderThreads, "Number of threads in the DWARFBlockInputFormat thread pool.") \
M(DWARFReaderThreadsActive, "Number of threads in the DWARFBlockInputFormat thread pool running a task.") \
M(OutdatedPartsLoadingThreads, "Number of threads in the threadpool for loading Outdated data parts.") \
M(OutdatedPartsLoadingThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \
M(DistributedBytesToInsert, "Number of pending bytes to process for asynchronous insertion into Distributed tables. Number of bytes for every shard is summed.") \

View File

@ -16,15 +16,27 @@ namespace ErrorCodes
}
Elf::Elf(const std::string & path)
: in(path, 0)
Elf::Elf(const std::string & path_)
{
in.emplace(path_, 0);
init(in->buffer().begin(), in->buffer().size(), path_);
}
Elf::Elf(const char * data, size_t size, const std::string & path_)
{
init(data, size, path_);
}
void Elf::init(const char * data, size_t size, const std::string & path_)
{
path = path_;
mapped = data;
elf_size = size;
/// Check if it's an elf.
elf_size = in.buffer().size();
if (elf_size < sizeof(ElfEhdr))
throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The size of supposedly ELF file '{}' is too small", path);
mapped = in.buffer().begin();
header = reinterpret_cast<const ElfEhdr *>(mapped);
if (memcmp(header->e_ident, "\x7F""ELF", 4) != 0)

Some files were not shown because too many files have changed in this diff Show More