Merge branch 'master' into keeper-more-reduce

This commit is contained in:
Antonio Andelic 2024-02-07 14:21:31 +01:00
commit 15c89bdd66
164 changed files with 2609 additions and 741 deletions

View File

@ -17,7 +17,7 @@ assignees: ''
> A link to reproducer in [https://fiddle.clickhouse.com/](https://fiddle.clickhouse.com/). > A link to reproducer in [https://fiddle.clickhouse.com/](https://fiddle.clickhouse.com/).
**Does it reproduce on recent release?** **Does it reproduce on the most recent release?**
[The list of releases](https://github.com/ClickHouse/ClickHouse/blob/master/utils/list-versions/version_date.tsv) [The list of releases](https://github.com/ClickHouse/ClickHouse/blob/master/utils/list-versions/version_date.tsv)
@ -34,11 +34,11 @@ assignees: ''
**How to reproduce** **How to reproduce**
* Which ClickHouse server version to use * Which ClickHouse server version to use
* Which interface to use, if matters * Which interface to use, if it matters
* Non-default settings, if any * Non-default settings, if any
* `CREATE TABLE` statements for all tables involved * `CREATE TABLE` statements for all tables involved
* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary * Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
* Queries to run that lead to unexpected result * Queries to run that lead to an unexpected result
**Expected behavior** **Expected behavior**

View File

@ -55,7 +55,6 @@ jobs:
uses: ./.github/workflows/reusable_docker.yml uses: ./.github/workflows/reusable_docker.yml
with: with:
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
set_latest: true
StyleCheck: StyleCheck:
needs: [RunConfig, BuildDockers] needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
@ -362,14 +361,6 @@ jobs:
test_name: Stateless tests (release) test_name: Stateless tests (release)
runner_type: func-tester runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestReleaseDatabaseOrdinary:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (release, DatabaseOrdinary)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestReleaseDatabaseReplicated: FunctionalStatelessTestReleaseDatabaseReplicated:
needs: [RunConfig, BuilderDebRelease] needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
@ -733,7 +724,6 @@ jobs:
- MarkReleaseReady - MarkReleaseReady
- FunctionalStatelessTestDebug - FunctionalStatelessTestDebug
- FunctionalStatelessTestRelease - FunctionalStatelessTestRelease
- FunctionalStatelessTestReleaseDatabaseOrdinary
- FunctionalStatelessTestReleaseDatabaseReplicated - FunctionalStatelessTestReleaseDatabaseReplicated
- FunctionalStatelessTestReleaseAnalyzer - FunctionalStatelessTestReleaseAnalyzer
- FunctionalStatelessTestReleaseS3 - FunctionalStatelessTestReleaseS3

View File

@ -28,7 +28,7 @@ jobs:
id: runconfig id: runconfig
run: | run: |
echo "::group::configure CI run" echo "::group::configure CI run"
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --skip-jobs --rebuild-all-docker --outfile ${{ runner.temp }}/ci_run_data.json python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --skip-jobs --outfile ${{ runner.temp }}/ci_run_data.json
echo "::endgroup::" echo "::endgroup::"
echo "::group::CI run configure results" echo "::group::CI run configure results"

View File

@ -785,6 +785,15 @@ jobs:
test_name: Integration tests (release) test_name: Integration tests (release)
runner_type: stress-tester runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Integration tests (aarch64)
# FIXME: there is no stress-tester for aarch64. func-tester-aarch64 is ok?
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsFlakyCheck: IntegrationTestsFlakyCheck:
needs: [RunConfig, BuilderDebAsan] needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
@ -924,6 +933,7 @@ jobs:
- IntegrationTestsAnalyzerAsan - IntegrationTestsAnalyzerAsan
- IntegrationTestsTsan - IntegrationTestsTsan
- IntegrationTestsRelease - IntegrationTestsRelease
- IntegrationTestsAarch64
- IntegrationTestsFlakyCheck - IntegrationTestsFlakyCheck
- PerformanceComparisonX86 - PerformanceComparisonX86
- PerformanceComparisonAarch - PerformanceComparisonAarch

View File

@ -46,7 +46,7 @@ jobs:
needs: [DockerBuildAmd64, DockerBuildAarch64] needs: [DockerBuildAmd64, DockerBuildAarch64]
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]
if: | if: |
!failure() && !cancelled() && toJson(fromJson(inputs.data).docker_data.missing_multi) != '[]' !failure() && !cancelled() && (toJson(fromJson(inputs.data).docker_data.missing_multi) != '[]' || inputs.set_latest)
steps: steps:
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
@ -55,14 +55,12 @@ jobs:
- name: Build images - name: Build images
run: | run: |
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"
FLAG_LATEST=''
if [ "${{ inputs.set_latest }}" == "true" ]; then if [ "${{ inputs.set_latest }}" == "true" ]; then
FLAG_LATEST='--set-latest'
echo "latest tag will be set for resulting manifests" echo "latest tag will be set for resulting manifests"
python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 \
--image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \
--missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_multi) }}' \
--set-latest
else
python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 \
--image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \
--missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_multi) }}'
fi fi
python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 \
--image-tags '${{ toJson(fromJson(inputs.data).docker_data.images) }}' \
--missing-images '${{ toJson(fromJson(inputs.data).docker_data.missing_multi) }}' \
$FLAG_LATEST

View File

@ -11,6 +11,7 @@
## To run specified set of tests in CI: ## To run specified set of tests in CI:
#ci_set_<SET_NAME> #ci_set_<SET_NAME>
#ci_set_reduced #ci_set_reduced
#ci_set_arm
## To run specified job in CI: ## To run specified job in CI:
#job_<JOB NAME> #job_<JOB NAME>

View File

@ -6,8 +6,6 @@
### <a id="241"></a> ClickHouse release 24.1, 2024-01-30 ### <a id="241"></a> ClickHouse release 24.1, 2024-01-30
### ClickHouse release master (b4a5b6060ea) FIXME as compared to v23.12.1.1368-stable (a2faa65b080)
#### Backward Incompatible Change #### Backward Incompatible Change
* The setting `print_pretty_type_names` is turned on by default. You can turn it off to keep the old behavior or `SET compatibility = '23.12'`. [#57726](https://github.com/ClickHouse/ClickHouse/pull/57726) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * The setting `print_pretty_type_names` is turned on by default. You can turn it off to keep the old behavior or `SET compatibility = '23.12'`. [#57726](https://github.com/ClickHouse/ClickHouse/pull/57726) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)). * The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)).
@ -24,7 +22,6 @@
* Add `quantileDD` aggregate function as well as the corresponding `quantilesDD` and `medianDD`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)). * Add `quantileDD` aggregate function as well as the corresponding `quantilesDD` and `medianDD`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)).
* Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)). * Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)). * Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)).
* Add function `arrayShingles` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)). * Add function `arrayShingles` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)).
* Added functions `punycodeEncode`, `punycodeDecode`, `idnaEncode` and `idnaDecode` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)). * Added functions `punycodeEncode`, `punycodeDecode`, `idnaEncode` and `idnaDecode` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)).
* Added string similarity functions `dramerauLevenshteinDistance`, `jaroSimilarity` and `jaroWinklerSimilarity`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)). * Added string similarity functions `dramerauLevenshteinDistance`, `jaroSimilarity` and `jaroWinklerSimilarity`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)).

View File

@ -37,7 +37,7 @@ Keep an eye out for upcoming meetups around the world. Somewhere else you want u
## Recent Recordings ## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
* **Recording available**: [**v23.10 Release Webinar**](https://www.youtube.com/watch?v=PGQS6uPb970) All the features of 23.10, one convenient video! Watch it now! * **Recording available**: [**v24.1 Release Webinar**](https://www.youtube.com/watch?v=pBF9g0wGAGs) All the features of 24.1, one convenient video! Watch it now!
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU) * **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)

View File

@ -17,6 +17,7 @@ set (SRCS
getMemoryAmount.cpp getMemoryAmount.cpp
getPageSize.cpp getPageSize.cpp
getThreadId.cpp getThreadId.cpp
int8_to_string.cpp
JSON.cpp JSON.cpp
mremap.cpp mremap.cpp
phdr_cache.cpp phdr_cache.cpp

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include <bit>
#include <cstring> #include <cstring>
#include <algorithm> #include <algorithm>
#include <type_traits> #include <type_traits>

View File

@ -1,8 +1,11 @@
#include <stdexcept>
#include <fstream>
#include <base/getMemoryAmount.h> #include <base/getMemoryAmount.h>
#include <base/getPageSize.h> #include <base/getPageSize.h>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <unistd.h> #include <unistd.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/param.h> #include <sys/param.h>
@ -11,6 +14,80 @@
#endif #endif
namespace
{
std::optional<uint64_t> getCgroupsV2MemoryLimit()
{
#if defined(OS_LINUX)
const std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
/// This file exists iff the host has cgroups v2 enabled.
std::ifstream controllers_file(default_cgroups_mount / "cgroup.controllers");
if (!controllers_file.is_open())
return {};
/// Make sure that the memory controller is enabled.
/// - cgroup.controllers defines which controllers *can* be enabled.
/// - cgroup.subtree_control defines which controllers *are* enabled.
/// (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
/// Caveat: nested groups may disable controllers. For simplicity, check only the top-level group.
/// ReadBufferFromFile subtree_control_file(default_cgroups_mount / "cgroup.subtree_control");
/// std::string subtree_control;
/// readString(subtree_control, subtree_control_file);
/// if (subtree_control.find("memory") == std::string::npos)
/// return {};
std::ifstream subtree_control_file(default_cgroups_mount / "cgroup.subtree_control");
std::stringstream subtree_control_buf;
subtree_control_buf << subtree_control_file.rdbuf();
std::string subtree_control = subtree_control_buf.str();
if (subtree_control.find("memory") == std::string::npos)
return {};
/// Identify the cgroup the process belongs to
/// All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs
/// A simpler way to get the membership is:
std::ifstream cgroup_name_file("/proc/self/cgroup");
if (!cgroup_name_file.is_open())
return {};
std::stringstream cgroup_name_buf;
cgroup_name_buf << cgroup_name_file.rdbuf();
std::string cgroup_name = cgroup_name_buf.str();
if (!cgroup_name.empty() && cgroup_name.back() == '\n')
cgroup_name.pop_back(); /// remove trailing newline, if any
/// With cgroups v2, there will be a *single* line with prefix "0::/"
const std::string v2_prefix = "0::/";
if (!cgroup_name.starts_with(v2_prefix))
return {};
cgroup_name = cgroup_name.substr(v2_prefix.length());
std::filesystem::path current_cgroup = cgroup_name.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup_name);
/// Open the bottom-most nested memory limit setting file. If there is no such file at the current
/// level, try again at the parent level as memory settings are inherited.
while (current_cgroup != default_cgroups_mount.parent_path())
{
std::ifstream setting_file(current_cgroup / "memory.max");
if (setting_file.is_open())
{
uint64_t value;
if (setting_file >> value)
return {value};
else
return {}; /// e.g. the cgroups default "max"
}
current_cgroup = current_cgroup.parent_path();
}
return {};
#else
return {};
#endif
}
}
/** Returns the size of physical memory (RAM) in bytes. /** Returns the size of physical memory (RAM) in bytes.
* Returns 0 on unsupported platform * Returns 0 on unsupported platform
*/ */
@ -26,34 +103,27 @@ uint64_t getMemoryAmountOrZero()
uint64_t memory_amount = num_pages * page_size; uint64_t memory_amount = num_pages * page_size;
#if defined(OS_LINUX) /// Respect the memory limit set by cgroups v2.
// Try to lookup at the Cgroup limit auto limit_v2 = getCgroupsV2MemoryLimit();
if (limit_v2.has_value() && *limit_v2 < memory_amount)
// CGroups v2 memory_amount = *limit_v2;
std::ifstream cgroupv2_limit("/sys/fs/cgroup/memory.max");
if (cgroupv2_limit.is_open())
{
uint64_t memory_limit = 0;
cgroupv2_limit >> memory_limit;
if (memory_limit > 0 && memory_limit < memory_amount)
memory_amount = memory_limit;
}
else else
{ {
// CGroups v1 /// Cgroups v1 were replaced by v2 in 2015. The only reason we keep supporting v1 is that the transition to v2
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes"); /// has been slow. Caveat : Hierarchical groups as in v2 are not supported for v1, the location of the memory
if (cgroup_limit.is_open()) /// limit (virtual) file is hard-coded.
/// TODO: check at the end of 2024 if we can get rid of v1.
std::ifstream limit_file_v1("/sys/fs/cgroup/memory/memory.limit_in_bytes");
if (limit_file_v1.is_open())
{ {
uint64_t memory_limit = 0; // in case of read error uint64_t limit_v1;
cgroup_limit >> memory_limit; if (limit_file_v1 >> limit_v1)
if (memory_limit > 0 && memory_limit < memory_amount) if (limit_v1 < memory_amount)
memory_amount = memory_limit; memory_amount = limit_v1;
} }
} }
#endif
return memory_amount; return memory_amount;
} }

View File

@ -0,0 +1,9 @@
#include <base/int8_to_string.h>
namespace std
{
std::string to_string(Int8 v) /// NOLINT (cert-dcl58-cpp)
{
return to_string(int8_t{v});
}
}

View File

@ -0,0 +1,17 @@
#pragma once
#include <base/defines.h>
#include <base/types.h>
#include <fmt/format.h>
template <>
struct fmt::formatter<Int8> : fmt::formatter<int8_t>
{
};
namespace std
{
std::string to_string(Int8 v); /// NOLINT (cert-dcl58-cpp)
}

View File

@ -3,14 +3,29 @@
#include <cstdint> #include <cstdint>
#include <string> #include <string>
/// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713 /// Using char8_t more strict aliasing (https://stackoverflow.com/a/57453713)
using UInt8 = char8_t; using UInt8 = char8_t;
/// Same for using signed _BitInt(8) (there isn't a signed char8_t, which would be more convenient)
/// See https://godbolt.org/z/fafnWEnnf
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wbit-int-extension"
using Int8 = signed _BitInt(8);
#pragma clang diagnostic pop
namespace std
{
template <>
struct hash<Int8> /// NOLINT (cert-dcl58-cpp)
{
size_t operator()(const Int8 x) const { return std::hash<int8_t>()(int8_t{x}); }
};
}
using UInt16 = uint16_t; using UInt16 = uint16_t;
using UInt32 = uint32_t; using UInt32 = uint32_t;
using UInt64 = uint64_t; using UInt64 = uint64_t;
using Int8 = int8_t;
using Int16 = int16_t; using Int16 = int16_t;
using Int32 = int32_t; using Int32 = int32_t;
using Int64 = int64_t; using Int64 = int64_t;

View File

@ -6,6 +6,7 @@
#include "throwError.h" #include "throwError.h"
#include <bit>
#include <cmath> #include <cmath>
#include <cfloat> #include <cfloat>
#include <cassert> #include <cassert>

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit 4ec215f3607c2111bf2cc91ba842046a6b5eb0c4 Subproject commit 9eb5097a0abfa837722cca7a5114a25837817bf2

View File

@ -62,7 +62,6 @@
"dependent": [] "dependent": []
}, },
"docker/test/integration/runner": { "docker/test/integration/runner": {
"only_amd64": true,
"name": "clickhouse/integration-tests-runner", "name": "clickhouse/integration-tests-runner",
"dependent": [] "dependent": []
}, },

View File

@ -72,7 +72,7 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
zstd \ zstd \
zip \ zip \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# Download toolchain and SDK for Darwin # Download toolchain and SDK for Darwin
RUN curl -sL -O https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz RUN curl -sL -O https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz

View File

@ -23,10 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
tzdata \ tzdata \
wget \ wget \
&& apt-get clean \ && apt-get clean \
&& rm -rf \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
/var/lib/apt/lists/* \
/var/cache/debconf \
/tmp/*
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"

View File

@ -13,7 +13,10 @@ RUN apt-get update \
zstd \ zstd \
locales \ locales \
sudo \ sudo \
--yes --no-install-recommends --yes --no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# Sanitizer options for services (clickhouse-server) # Sanitizer options for services (clickhouse-server)
# Set resident memory limit for TSAN to 45GiB (46080MiB) to avoid OOMs in Stress tests # Set resident memory limit for TSAN to 45GiB (46080MiB) to avoid OOMs in Stress tests

View File

@ -20,7 +20,9 @@ RUN apt-get update \
pv \ pv \
jq \ jq \
zstd \ zstd \
--yes --no-install-recommends --yes --no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3
@ -31,12 +33,14 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \ && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \
&& odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \
&& odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
&& rm -rf /tmp/clickhouse-odbc-tmp \ && rm -rf /tmp/clickhouse-odbc-tmp
# Give suid to gdb to grant it attach permissions
# chmod 777 to make the container user independent
RUN chmod u+s /usr/bin/gdb \
&& mkdir -p /var/lib/clickhouse \ && mkdir -p /var/lib/clickhouse \
&& chmod 777 /var/lib/clickhouse && chmod 777 /var/lib/clickhouse
# chmod 777 to make the container user independent
ENV TZ=Europe/Amsterdam ENV TZ=Europe/Amsterdam
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

View File

@ -29,7 +29,7 @@ RUN apt-get update \
wget \ wget \
&& apt-get autoremove --yes \ && apt-get autoremove --yes \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install Jinja2 RUN pip3 install Jinja2

View File

@ -389,8 +389,8 @@ fi
rg --text -F '<Fatal>' server.log > fatal.log ||: rg --text -F '<Fatal>' server.log > fatal.log ||:
dmesg -T > dmesg.log ||: dmesg -T > dmesg.log ||:
zstd --threads=0 server.log zstd --threads=0 --rm server.log
zstd --threads=0 fuzzer.log zstd --threads=0 --rm fuzzer.log
cat > report.html <<EOF ||: cat > report.html <<EOF ||:
<!DOCTYPE html> <!DOCTYPE html>

View File

@ -10,13 +10,13 @@ ENV \
init=/lib/systemd/systemd init=/lib/systemd/systemd
# install systemd packages # install systemd packages
RUN apt-get update && \ RUN apt-get update \
apt-get install -y --no-install-recommends \ && apt-get install -y --no-install-recommends \
sudo \ sudo \
systemd \ systemd \
&& \ \
apt-get clean && \ && apt-get clean \
rm -rf /var/lib/apt/lists && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# configure systemd # configure systemd
# remove systemd 'wants' triggers # remove systemd 'wants' triggers

View File

@ -1,31 +1,27 @@
FROM ubuntu:20.04 FROM ubuntu:20.04
MAINTAINER lgbo-ustc <lgbo.ustc@gmail.com> MAINTAINER lgbo-ustc <lgbo.ustc@gmail.com>
RUN apt-get update RUN apt-get update \
RUN apt-get install -y wget openjdk-8-jre && apt-get install -y wget openjdk-8-jre \
&& wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz \
RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && \ && tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz \
tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz && wget https://apache.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz \
RUN wget https://apache.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \ && tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz \
tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz && apt install -y vim \
RUN apt install -y vim && apt install -y openssh-server openssh-client \
&& apt install -y mysql-server \
RUN apt install -y openssh-server openssh-client && mkdir -p /root/.ssh \
&& ssh-keygen -t rsa -b 2048 -P '' -f /root/.ssh/id_rsa \
RUN apt install -y mysql-server && cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys \
&& cp /root/.ssh/id_rsa /etc/ssh/ssh_host_rsa_key \
RUN mkdir -p /root/.ssh && \ && cp /root/.ssh/id_rsa.pub /etc/ssh/ssh_host_rsa_key.pub \
ssh-keygen -t rsa -b 2048 -P '' -f /root/.ssh/id_rsa && \ && wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz \
cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys && \ && tar -xf mysql-connector-java-8.0.27.tar.gz \
cp /root/.ssh/id_rsa /etc/ssh/ssh_host_rsa_key && \ && mv mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar /apache-hive-2.3.9-bin/lib/ \
cp /root/.ssh/id_rsa.pub /etc/ssh/ssh_host_rsa_key.pub && rm -rf mysql-connector-java-8.0.27.tar.gz mysql-connector-java-8.0.27 \
&& apt install -y iputils-ping net-tools \
RUN wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz &&\ && apt-get clean \
tar -xf mysql-connector-java-8.0.27.tar.gz && \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
mv mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar /apache-hive-2.3.9-bin/lib/ && \
rm -rf mysql-connector-java-8.0.27.tar.gz mysql-connector-java-8.0.27
RUN apt install -y iputils-ping net-tools
ENV JAVA_HOME=/usr ENV JAVA_HOME=/usr
ENV HADOOP_HOME=/hadoop-3.1.0 ENV HADOOP_HOME=/hadoop-3.1.0
@ -44,4 +40,3 @@ COPY demo_data.txt /
ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH
RUN service ssh start && sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml && hdfs namenode -format RUN service ssh start && sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml && hdfs namenode -format
COPY start.sh / COPY start.sh /

View File

@ -3,14 +3,10 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
RUN apt-get update && \ RUN apt-get update \
apt-get install -y software-properties-common build-essential openjdk-8-jdk curl && apt-get install -y software-properties-common build-essential openjdk-8-jdk curl \
&& apt-get clean \
RUN rm -rf \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
/var/lib/apt/lists/* \
/var/cache/debconf \
/tmp/* \
RUN apt-get clean
ARG ver=42.2.12 ARG ver=42.2.12
RUN curl -L -o /postgresql-java-${ver}.jar https://repo1.maven.org/maven2/org/postgresql/postgresql/${ver}/postgresql-${ver}.jar RUN curl -L -o /postgresql-java-${ver}.jar https://repo1.maven.org/maven2/org/postgresql/postgresql/${ver}/postgresql-${ver}.jar

View File

@ -37,11 +37,8 @@ RUN apt-get update \
libkrb5-dev \ libkrb5-dev \
krb5-user \ krb5-user \
g++ \ g++ \
&& rm -rf \ && apt-get clean \
/var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
/var/cache/debconf \
/tmp/* \
&& apt-get clean
ENV TZ=Etc/UTC ENV TZ=Etc/UTC
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
@ -62,6 +59,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
&& dockerd --version; docker --version && dockerd --version; docker --version
# kazoo 2.10.0 is broken
# https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html
RUN python3 -m pip install --no-cache-dir \ RUN python3 -m pip install --no-cache-dir \
PyMySQL \ PyMySQL \
aerospike==11.1.0 \ aerospike==11.1.0 \
@ -70,7 +69,7 @@ RUN python3 -m pip install --no-cache-dir \
azure-storage-blob \ azure-storage-blob \
boto3 \ boto3 \
cassandra-driver \ cassandra-driver \
confluent-kafka==1.9.2 \ confluent-kafka==2.3.0 \
delta-spark==2.3.0 \ delta-spark==2.3.0 \
dict2xml \ dict2xml \
dicttoxml \ dicttoxml \
@ -79,7 +78,7 @@ RUN python3 -m pip install --no-cache-dir \
grpcio \ grpcio \
grpcio-tools \ grpcio-tools \
kafka-python \ kafka-python \
kazoo \ kazoo==2.9.0 \
lz4 \ lz4 \
minio \ minio \
nats-py \ nats-py \

View File

@ -24,7 +24,10 @@ RUN mkdir "/root/.ssh"
RUN touch "/root/.ssh/known_hosts" RUN touch "/root/.ssh/known_hosts"
# install java # install java
RUN apt-get update && apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends RUN apt-get update && \
apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# install clojure # install clojure
RUN curl -O "https://download.clojure.org/install/linux-install-${CLOJURE_VERSION}.sh" && \ RUN curl -O "https://download.clojure.org/install/linux-install-${CLOJURE_VERSION}.sh" && \

View File

@ -27,7 +27,7 @@ RUN apt-get update \
wget \ wget \
&& apt-get autoremove --yes \ && apt-get autoremove --yes \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install Jinja2 RUN pip3 install Jinja2

View File

@ -37,7 +37,7 @@ RUN apt-get update \
&& apt-get purge --yes python3-dev g++ \ && apt-get purge --yes python3-dev g++ \
&& apt-get autoremove --yes \ && apt-get autoremove --yes \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
COPY run.sh / COPY run.sh /

View File

@ -31,7 +31,9 @@ RUN mkdir "/root/.ssh"
RUN touch "/root/.ssh/known_hosts" RUN touch "/root/.ssh/known_hosts"
# install java # install java
RUN apt-get update && apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends RUN apt-get update && apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# install clojure # install clojure
RUN curl -O "https://download.clojure.org/install/linux-install-${CLOJURE_VERSION}.sh" && \ RUN curl -O "https://download.clojure.org/install/linux-install-${CLOJURE_VERSION}.sh" && \

View File

@ -5,9 +5,10 @@ FROM ubuntu:22.04
ARG apt_archive="http://archive.ubuntu.com" ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update --yes && \ RUN apt-get update --yes \
env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends && \ && env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends \
apt-get clean && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# We need to get the repository's HEAD each time despite, so we invalidate layers' cache # We need to get the repository's HEAD each time despite, so we invalidate layers' cache
ARG CACHE_INVALIDATOR=0 ARG CACHE_INVALIDATOR=0

View File

@ -15,7 +15,8 @@ RUN apt-get update --yes \
unixodbc-dev \ unixodbc-dev \
odbcinst \ odbcinst \
sudo \ sudo \
&& apt-get clean && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install \ RUN pip3 install \
numpy \ numpy \

View File

@ -11,7 +11,8 @@ RUN apt-get update --yes \
python3-dev \ python3-dev \
python3-pip \ python3-pip \
sudo \ sudo \
&& apt-get clean && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install \ RUN pip3 install \
pyyaml \ pyyaml \

View File

@ -9,7 +9,8 @@ RUN apt-get update -y \
python3-requests \ python3-requests \
nodejs \ nodejs \
npm \ npm \
&& apt-get clean && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
COPY create.sql / COPY create.sql /
COPY run.sh / COPY run.sh /

View File

@ -44,7 +44,8 @@ RUN apt-get update -y \
pv \ pv \
zip \ zip \
p7zip-full \ p7zip-full \
&& apt-get clean && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install numpy scipy pandas Jinja2 pyarrow RUN pip3 install numpy scipy pandas Jinja2 pyarrow

View File

@ -9,6 +9,8 @@ FROM ubuntu:20.04 as clickhouse-test-runner-base
VOLUME /packages VOLUME /packages
CMD apt-get update ;\ CMD apt-get update ;\
DEBIAN_FRONTEND=noninteractive \ DEBIAN_FRONTEND=noninteractive \
apt install -y /packages/clickhouse-common-static_*.deb \ apt install -y /packages/clickhouse-common-static_*.deb \
/packages/clickhouse-client_*.deb /packages/clickhouse-client_*.deb \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

View File

@ -19,7 +19,8 @@ RUN apt-get update -y \
openssl \ openssl \
netcat-openbsd \ netcat-openbsd \
brotli \ brotli \
&& apt-get clean && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
COPY run.sh / COPY run.sh /

View File

@ -21,7 +21,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
locales \ locales \
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \ && pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \
&& apt-get clean \ && apt-get clean \
&& rm -rf /root/.cache/pip && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \
&& rm -rf /root/.cache/pip
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8 ENV LC_ALL en_US.UTF-8

View File

@ -19,7 +19,8 @@ RUN apt-get update -y \
openssl \ openssl \
netcat-openbsd \ netcat-openbsd \
brotli \ brotli \
&& apt-get clean && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
COPY run.sh / COPY run.sh /

View File

@ -27,7 +27,9 @@ RUN apt-get update \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \ && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list \ /etc/apt/sources.list \
&& apt-get clean && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# Install cmake 3.20+ for rust support # Install cmake 3.20+ for rust support
# Used https://askubuntu.com/a/1157132 as reference # Used https://askubuntu.com/a/1157132 as reference
@ -60,7 +62,9 @@ RUN apt-get update \
software-properties-common \ software-properties-common \
tzdata \ tzdata \
--yes --no-install-recommends \ --yes --no-install-recommends \
&& apt-get clean && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# This symlink required by gcc to find lld compiler # This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld

View File

@ -38,6 +38,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[nats_username = 'user',] [nats_username = 'user',]
[nats_password = 'password',] [nats_password = 'password',]
[nats_token = 'clickhouse',] [nats_token = 'clickhouse',]
[nats_credential_file = '/var/nats_credentials',]
[nats_startup_connect_tries = '5'] [nats_startup_connect_tries = '5']
[nats_max_rows_per_message = 1,] [nats_max_rows_per_message = 1,]
[nats_handle_error_mode = 'default'] [nats_handle_error_mode = 'default']
@ -63,6 +64,7 @@ Optional parameters:
- `nats_username` - NATS username. - `nats_username` - NATS username.
- `nats_password` - NATS password. - `nats_password` - NATS password.
- `nats_token` - NATS auth token. - `nats_token` - NATS auth token.
- `nats_credential_file` - Path to a NATS credentials file.
- `nats_startup_connect_tries` - Number of connect tries at startup. Default: `5`. - `nats_startup_connect_tries` - Number of connect tries at startup. Default: `5`.
- `nats_max_rows_per_message` — The maximum number of rows written in one NATS message for row-based formats. (default : `1`). - `nats_max_rows_per_message` — The maximum number of rows written in one NATS message for row-based formats. (default : `1`).
- `nats_handle_error_mode` — How to handle errors for RabbitMQ engine. Possible values: default (the exception will be thrown if we fail to parse a message), stream (the exception message and raw message will be saved in virtual columns `_error` and `_raw_message`). - `nats_handle_error_mode` — How to handle errors for RabbitMQ engine. Possible values: default (the exception will be thrown if we fail to parse a message), stream (the exception message and raw message will be saved in virtual columns `_error` and `_raw_message`).

View File

@ -2097,7 +2097,7 @@ SELECT * FROM test_table
## update_insert_deduplication_token_in_dependent_materialized_views {#update-insert-deduplication-token-in-dependent-materialized-views} ## update_insert_deduplication_token_in_dependent_materialized_views {#update-insert-deduplication-token-in-dependent-materialized-views}
Allows to update `insert_deduplication_token` with table identifier during insert in dependent materialized views, if setting `deduplicate_blocks_in_dependent_materialized_views` is enabled and `insert_deduplication_token` is set. Allows to update `insert_deduplication_token` with view identifier during insert in dependent materialized views, if setting `deduplicate_blocks_in_dependent_materialized_views` is enabled and `insert_deduplication_token` is set.
Possible values: Possible values:

View File

@ -1,5 +1,5 @@
--- ---
slug: /en/sql-reference/data-types/json slug: /en/sql-reference/data-types/variant
sidebar_position: 55 sidebar_position: 55
sidebar_label: Variant sidebar_label: Variant
--- ---

View File

@ -6,11 +6,67 @@ sidebar_label: Time Series
# Time Series Functions # Time Series Functions
Below functions are used for time series analysis. Below functions are used for series data analysis.
## seriesOutliersDetectTukey
Detects outliers in series data using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
**Syntax**
``` sql
seriesOutliersDetectTukey(series);
seriesOutliersDetectTukey(series, min_percentile, max_percentile, K);
```
**Arguments**
- `series` - An array of numeric values.
- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25.
- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75.
- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5.
At least four data points are required in `series` to detect outliers.
**Returned value**
- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6]) AS print_0;
```
Result:
``` text
┌───────────print_0─────────────────┐
│[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0] │
└───────────────────────────────────┘
```
Query:
``` sql
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0;
```
Result:
``` text
┌─print_0──────────────────────────────┐
│ [0,0,0,0,0,0,0,0,0,19.5,0,0,0,0,0,0] │
└──────────────────────────────────────┘
```
## seriesPeriodDetectFFT ## seriesPeriodDetectFFT
Finds the period of the given time series data using FFT Finds the period of the given series data data using FFT
FFT - [Fast Fourier transform](https://en.wikipedia.org/wiki/Fast_Fourier_transform) FFT - [Fast Fourier transform](https://en.wikipedia.org/wiki/Fast_Fourier_transform)
**Syntax** **Syntax**
@ -25,7 +81,7 @@ seriesPeriodDetectFFT(series);
**Returned value** **Returned value**
- A real value equal to the period of time series - A real value equal to the period of series data
- Returns NAN when number of data points are less than four. - Returns NAN when number of data points are less than four.
Type: [Float64](../../sql-reference/data-types/float.md). Type: [Float64](../../sql-reference/data-types/float.md).
@ -60,7 +116,7 @@ Result:
## seriesDecomposeSTL ## seriesDecomposeSTL
Decomposes a time series using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component. Decomposes a series data using STL [(Seasonal-Trend Decomposition Procedure Based on Loess)](https://www.wessa.net/download/stl.pdf) into a season, a trend and a residual component.
**Syntax** **Syntax**

View File

@ -139,8 +139,8 @@ ALTER TABLE visits COMMENT COLUMN browser 'This column shows the browser used fo
## MODIFY COLUMN ## MODIFY COLUMN
``` sql ``` sql
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST] MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [settings] [AFTER name_after | FIRST]
ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST] ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [settings] [AFTER name_after | FIRST]
``` ```
This query changes the `name` column properties: This query changes the `name` column properties:
@ -153,10 +153,14 @@ This query changes the `name` column properties:
- TTL - TTL
- Column-level Settings
For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#codecs). For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#codecs).
For examples of columns TTL modifying, see [Column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl). For examples of columns TTL modifying, see [Column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl).
For examples of column-level settings modifying, see [Column-level Settings](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#column-level-settings).
If the `IF EXISTS` clause is specified, the query wont return an error if the column does not exist. If the `IF EXISTS` clause is specified, the query wont return an error if the column does not exist.
When changing the type, values are converted as if the [toType](/docs/en/sql-reference/functions/type-conversion-functions.md) functions were applied to them. If only the default expression is changed, the query does not do anything complex, and is completed almost instantly. When changing the type, values are converted as if the [toType](/docs/en/sql-reference/functions/type-conversion-functions.md) functions were applied to them. If only the default expression is changed, the query does not do anything complex, and is completed almost instantly.
@ -209,7 +213,7 @@ The `ALTER` query for changing columns is replicated. The instructions are saved
## MODIFY COLUMN REMOVE ## MODIFY COLUMN REMOVE
Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`, `SETTING`. Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`, `SETTINGS`.
Syntax: Syntax:
@ -237,7 +241,7 @@ Modify a column setting.
Syntax: Syntax:
```sql ```sql
ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING name=value,...; ALTER TABLE table_name MODIFY COLUMN column_name MODIFY SETTING name=value,...;
``` ```
**Example** **Example**
@ -245,7 +249,7 @@ ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING name=value,...;
Modify column's `max_compress_block_size` to `1MB`: Modify column's `max_compress_block_size` to `1MB`:
```sql ```sql
ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING max_compress_block_size = 1048576; ALTER TABLE table_name MODIFY COLUMN column_name MODIFY SETTING max_compress_block_size = 1048576;
``` ```
## MODIFY COLUMN RESET SETTING ## MODIFY COLUMN RESET SETTING
@ -255,21 +259,21 @@ Reset a column setting, also removes the setting declaration in the column expre
Syntax: Syntax:
```sql ```sql
ALTER TABLE table_name MODIFY COLUMN RESET SETTING name,...; ALTER TABLE table_name MODIFY COLUMN column_name RESET SETTING name,...;
``` ```
**Example** **Example**
Remove column setting `max_compress_block_size` to `1MB`: Reset column setting `max_compress_block_size` to it's default value:
```sql ```sql
ALTER TABLE table_name MODIFY COLUMN REMOVE SETTING max_compress_block_size; ALTER TABLE table_name MODIFY COLUMN column_name RESET SETTING max_compress_block_size;
``` ```
## MATERIALIZE COLUMN ## MATERIALIZE COLUMN
Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`). Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`).
It is used if it is necessary to add or update a column with a complicated expression, because evaluating such an expression directly on `SELECT` executing turns out to be expensive. It is used if it is necessary to add or update a column with a complicated expression, because evaluating such an expression directly on `SELECT` executing turns out to be expensive.
Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
Syntax: Syntax:

View File

@ -504,7 +504,7 @@ void Client::connect()
<< "It may lack support for new features." << std::endl << "It may lack support for new features." << std::endl
<< std::endl; << std::endl;
} }
else if (client_version_tuple > server_version_tuple) else if (client_version_tuple > server_version_tuple && server_display_name != "clickhouse-cloud")
{ {
std::cout << "ClickHouse server version is older than ClickHouse client. " std::cout << "ClickHouse server version is older than ClickHouse client. "
<< "It may indicate that the server is out of date and can be upgraded." << std::endl << "It may indicate that the server is out of date and can be upgraded." << std::endl

View File

@ -826,6 +826,11 @@ try
0, // We don't need any threads one all the parts will be deleted 0, // We don't need any threads one all the parts will be deleted
server_settings.max_parts_cleaning_thread_pool_size); server_settings.max_parts_cleaning_thread_pool_size);
getDatabaseReplicatedCreateTablesThreadPool().initialize(
server_settings.max_database_replicated_create_table_thread_pool_size,
0, // We don't need any threads once all the tables will be created
server_settings.max_database_replicated_create_table_thread_pool_size);
/// Initialize global local cache for remote filesystem. /// Initialize global local cache for remote filesystem.
if (config().has("local_cache_for_remote_fs")) if (config().has("local_cache_for_remote_fs"))
{ {

View File

@ -1380,11 +1380,13 @@ document.getElementById('params').onsubmit = function(event) {
event.preventDefault(); event.preventDefault();
} }
const decodeState = (x) => JSON.parse(LZString.decompressFromEncodedURIComponent(x) || atob(x));
const encodeState = (x) => LZString.compressToEncodedURIComponent(JSON.stringify(x));
function saveState() { function saveState() {
const state = { host, user, queries, params, search_query, customized }; const state = { host, user, queries, params, search_query, customized };
history.pushState(state, '', history.pushState(state, '',
window.location.pathname + (window.location.search || '') + '#' + LZString.compressToEncodedURIComponent(JSON.stringify(state))); window.location.pathname + (window.location.search || '') + '#' + encodeState(state));
} }
async function searchQueries() { async function searchQueries() {
@ -1450,12 +1452,7 @@ window.onpopstate = function(event) {
if (window.location.hash) { if (window.location.hash) {
try { try {
let search_query_, customized_; let search_query_, customized_;
try { ({host, user, queries, params, search_query_, customized_} = decodeState(window.location.hash.substring(1)));
({host, user, queries, params, search_query_, customized_} = JSON.parse(LZString.decompressFromEncodedURIComponent(window.location.hash.substring(1))));
} catch {
// For compatibility with uncompressed state
({host, user, queries, params, search_query_, customized_} = JSON.parse(atob(window.location.hash.substring(1))));
}
// For compatibility with old URLs' hashes // For compatibility with old URLs' hashes
search_query = search_query_ !== undefined ? search_query_ : search_query; search_query = search_query_ !== undefined ? search_query_ : search_query;

View File

@ -82,7 +82,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos); return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
} }
constexpr int nan_direction_hint = 1; constexpr int nan_null_direction_hint = -1;
auto const & column = *columns[0]; auto const & column = *columns[0];
if (if_argument_pos >= 0) if (if_argument_pos >= 0)
{ {
@ -95,7 +95,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
for (size_t i = index + 1; i < row_end; i++) for (size_t i = index + 1; i < row_end; i++)
{ {
if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0)) if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
index = i; index = i;
} }
this->data(place).changeIfGreater(column, index, arena); this->data(place).changeIfGreater(column, index, arena);
@ -111,7 +111,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
size_t index = row_begin; size_t index = row_begin;
for (size_t i = index + 1; i < row_end; i++) for (size_t i = index + 1; i < row_end; i++)
{ {
if (column.compareAt(i, index, column, nan_direction_hint) > 0) if (column.compareAt(i, index, column, nan_null_direction_hint) > 0)
index = i; index = i;
} }
this->data(place).changeIfGreater(column, index, arena); this->data(place).changeIfGreater(column, index, arena);
@ -122,7 +122,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable; constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
IColumn::Permutation permutation; IColumn::Permutation permutation;
constexpr UInt64 limit = 1; constexpr UInt64 limit = 1;
column.getPermutation(direction, stability, limit, nan_direction_hint, permutation); column.getPermutation(direction, stability, limit, nan_null_direction_hint, permutation);
this->data(place).changeIfGreater(column, permutation[0], arena); this->data(place).changeIfGreater(column, permutation[0], arena);
} }
} }
@ -177,7 +177,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos); return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
} }
constexpr int nan_direction_hint = 1; constexpr int nan_null_direction_hint = -1;
auto const & column = *columns[0]; auto const & column = *columns[0];
if (if_argument_pos >= 0) if (if_argument_pos >= 0)
{ {
@ -190,7 +190,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
for (size_t i = index + 1; i < row_end; i++) for (size_t i = index + 1; i < row_end; i++)
{ {
if ((if_flags[i] != 0) && (null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0)) if ((if_flags[i] != 0) && (null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
index = i; index = i;
} }
this->data(place).changeIfGreater(column, index, arena); this->data(place).changeIfGreater(column, index, arena);
@ -205,7 +205,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
for (size_t i = index + 1; i < row_end; i++) for (size_t i = index + 1; i < row_end; i++)
{ {
if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0)) if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) > 0))
index = i; index = i;
} }
this->data(place).changeIfGreater(column, index, arena); this->data(place).changeIfGreater(column, index, arena);

View File

@ -83,7 +83,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos); return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
} }
constexpr int nan_direction_hint = 1; constexpr int nan_null_direction_hint = 1;
auto const & column = *columns[0]; auto const & column = *columns[0];
if (if_argument_pos >= 0) if (if_argument_pos >= 0)
{ {
@ -96,7 +96,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
for (size_t i = index + 1; i < row_end; i++) for (size_t i = index + 1; i < row_end; i++)
{ {
if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0)) if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
index = i; index = i;
} }
this->data(place).changeIfLess(column, index, arena); this->data(place).changeIfLess(column, index, arena);
@ -112,7 +112,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
size_t index = row_begin; size_t index = row_begin;
for (size_t i = index + 1; i < row_end; i++) for (size_t i = index + 1; i < row_end; i++)
{ {
if (column.compareAt(i, index, column, nan_direction_hint) < 0) if (column.compareAt(i, index, column, nan_null_direction_hint) < 0)
index = i; index = i;
} }
this->data(place).changeIfLess(column, index, arena); this->data(place).changeIfLess(column, index, arena);
@ -123,7 +123,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable; constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
IColumn::Permutation permutation; IColumn::Permutation permutation;
constexpr UInt64 limit = 1; constexpr UInt64 limit = 1;
column.getPermutation(direction, stability, limit, nan_direction_hint, permutation); column.getPermutation(direction, stability, limit, nan_null_direction_hint, permutation);
this->data(place).changeIfLess(column, permutation[0], arena); this->data(place).changeIfLess(column, permutation[0], arena);
} }
} }
@ -178,7 +178,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos); return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
} }
constexpr int nan_direction_hint = 1; constexpr int nan_null_direction_hint = 1;
auto const & column = *columns[0]; auto const & column = *columns[0];
if (if_argument_pos >= 0) if (if_argument_pos >= 0)
{ {
@ -191,7 +191,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
for (size_t i = index + 1; i < row_end; i++) for (size_t i = index + 1; i < row_end; i++)
{ {
if ((if_flags[i] != 0) && (null_map[index] == 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0)) if ((if_flags[i] != 0) && (null_map[index] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
index = i; index = i;
} }
this->data(place).changeIfLess(column, index, arena); this->data(place).changeIfLess(column, index, arena);
@ -206,7 +206,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
for (size_t i = index + 1; i < row_end; i++) for (size_t i = index + 1; i < row_end; i++)
{ {
if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0)) if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_null_direction_hint) < 0))
index = i; index = i;
} }
this->data(place).changeIfLess(column, index, arena); this->data(place).changeIfLess(column, index, arena);

View File

@ -234,6 +234,9 @@ public:
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{ {
if (!this->data(rhs).value.size())
return;
auto & set = this->data(place).value; auto & set = this->data(place).value;
if (set.capacity() != reserved) if (set.capacity() != reserved)
set.resize(reserved); set.resize(reserved);

View File

@ -424,6 +424,7 @@ struct TableExpressionData
bool should_qualify_columns = true; bool should_qualify_columns = true;
NamesAndTypes column_names_and_types; NamesAndTypes column_names_and_types;
ColumnNameToColumnNodeMap column_name_to_column_node; ColumnNameToColumnNodeMap column_name_to_column_node;
std::unordered_set<std::string> subcolumn_names; /// Subset columns that are subcolumns of other columns
std::unordered_set<std::string, StringTransparentHash, std::equal_to<>> column_identifier_first_parts; std::unordered_set<std::string, StringTransparentHash, std::equal_to<>> column_identifier_first_parts;
bool hasFullIdentifierName(IdentifierView identifier_view) const bool hasFullIdentifierName(IdentifierView identifier_view) const
@ -1306,6 +1307,12 @@ private:
const QueryTreeNodePtr & table_expression_node, const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope); IdentifierResolveScope & scope);
QueryTreeNodePtr matchArrayJoinSubcolumns(
const QueryTreeNodePtr & array_join_column_inner_expression,
const ColumnNode & array_join_column_expression_typed,
const QueryTreeNodePtr & resolved_expression,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression, QueryTreeNodePtr tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression,
const QueryTreeNodePtr & table_expression_node, const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope); IdentifierResolveScope & scope);
@ -2759,7 +2766,13 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
{ {
if (identifier_lookup.isExpressionLookup()) if (identifier_lookup.isExpressionLookup())
{ {
return tryResolveIdentifierFromCompoundExpression(identifier_lookup.identifier, 1 /*identifier_bind_size*/, it->second, {}, scope); return tryResolveIdentifierFromCompoundExpression(
identifier_lookup.identifier,
1 /*identifier_bind_size*/,
it->second,
{} /* compound_expression_source */,
scope,
identifier_resolve_settings.allow_to_check_join_tree /* can_be_not_found */);
} }
else if (identifier_lookup.isFunctionLookup() || identifier_lookup.isTableExpressionLookup()) else if (identifier_lookup.isFunctionLookup() || identifier_lookup.isTableExpressionLookup())
{ {
@ -2913,8 +2926,23 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage(
QueryTreeNodePtr result_expression; QueryTreeNodePtr result_expression;
bool match_full_identifier = false; bool match_full_identifier = false;
auto it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.getFullName()); const auto & identifier_full_name = identifier_without_column_qualifier.getFullName();
if (it != table_expression_data.column_name_to_column_node.end()) auto it = table_expression_data.column_name_to_column_node.find(identifier_full_name);
bool can_resolve_directly_from_storage = it != table_expression_data.column_name_to_column_node.end();
if (can_resolve_directly_from_storage && table_expression_data.subcolumn_names.contains(identifier_full_name))
{
/** In the case when we have an ARRAY JOIN, we should not resolve subcolumns directly from storage.
* For example, consider the following SQL query:
* SELECT ProfileEvents.Values FROM system.query_log ARRAY JOIN ProfileEvents
* In this case, ProfileEvents.Values should also be array joined, not directly resolved from storage.
*/
auto * nearest_query_scope = scope.getNearestQueryScope();
auto * nearest_query_scope_query_node = nearest_query_scope ? nearest_query_scope->scope_node->as<QueryNode>() : nullptr;
if (nearest_query_scope_query_node && nearest_query_scope_query_node->getJoinTree()->getNodeType() == QueryTreeNodeType::ARRAY_JOIN)
can_resolve_directly_from_storage = false;
}
if (can_resolve_directly_from_storage)
{ {
match_full_identifier = true; match_full_identifier = true;
result_expression = it->second; result_expression = it->second;
@ -3397,6 +3425,68 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
return resolved_identifier; return resolved_identifier;
} }
QueryTreeNodePtr QueryAnalyzer::matchArrayJoinSubcolumns(
const QueryTreeNodePtr & array_join_column_inner_expression,
const ColumnNode & array_join_column_expression_typed,
const QueryTreeNodePtr & resolved_expression,
IdentifierResolveScope & scope)
{
const auto * resolved_function = resolved_expression->as<FunctionNode>();
if (!resolved_function || resolved_function->getFunctionName() != "getSubcolumn")
return {};
const auto * array_join_parent_column = array_join_column_inner_expression.get();
/** If both resolved and array-joined expressions are subcolumns, try to match them:
* For example, in `SELECT t.map.values FROM (SELECT * FROM tbl) ARRAY JOIN t.map`
* Identifier `t.map.values` is resolved into `getSubcolumn(t, 'map.values')` and t.map is resolved into `getSubcolumn(t, 'map')`
* Since we need to perform array join on `getSubcolumn(t, 'map')`, `t.map.values` should become `getSubcolumn(getSubcolumn(t, 'map'), 'values')`
*
* Note: It doesn't work when subcolumn in ARRAY JOIN is transformed by another expression, for example
* SELECT c.map, c.map.values FROM (SELECT * FROM tbl) ARRAY JOIN mapApply(x -> x, t.map);
*/
String array_join_subcolumn_prefix;
auto * array_join_column_inner_expression_function = array_join_column_inner_expression->as<FunctionNode>();
if (array_join_column_inner_expression_function &&
array_join_column_inner_expression_function->getFunctionName() == "getSubcolumn")
{
const auto & argument_nodes = array_join_column_inner_expression_function->getArguments().getNodes();
if (argument_nodes.size() == 2 && argument_nodes.at(1)->getNodeType() == QueryTreeNodeType::CONSTANT)
{
const auto & constant_node = argument_nodes.at(1)->as<ConstantNode &>();
const auto & constant_node_value = constant_node.getValue();
if (constant_node_value.getType() == Field::Types::String)
{
array_join_subcolumn_prefix = constant_node_value.get<String>() + ".";
array_join_parent_column = argument_nodes.at(0).get();
}
}
}
const auto & argument_nodes = resolved_function->getArguments().getNodes();
if (argument_nodes.size() != 2 && !array_join_parent_column->isEqual(*argument_nodes.at(0)))
return {};
const auto * second_argument = argument_nodes.at(1)->as<ConstantNode>();
if (!second_argument || second_argument->getValue().getType() != Field::Types::String)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected constant string as second argument of getSubcolumn function {}", resolved_function->dumpTree());
const auto & resolved_subcolumn_path = second_argument->getValue().get<String &>();
if (!startsWith(resolved_subcolumn_path, array_join_subcolumn_prefix))
return {};
auto get_subcolumn_function = std::make_shared<FunctionNode>("getSubcolumn");
get_subcolumn_function->getArguments().getNodes().push_back(
std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(), array_join_column_expression_typed.getColumnSource()));
get_subcolumn_function->getArguments().getNodes().push_back(
std::make_shared<ConstantNode>(resolved_subcolumn_path.substr(array_join_subcolumn_prefix.size())));
QueryTreeNodePtr function_query_node = get_subcolumn_function;
resolveFunction(function_query_node, scope);
return function_query_node;
}
QueryTreeNodePtr QueryAnalyzer::tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression, QueryTreeNodePtr QueryAnalyzer::tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression,
const QueryTreeNodePtr & table_expression_node, const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope) IdentifierResolveScope & scope)
@ -3465,8 +3555,12 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveExpressionFromArrayJoinExpressions(con
array_join_column_expression_typed.getColumnSource()); array_join_column_expression_typed.getColumnSource());
break; break;
} }
}
/// When we select subcolumn of array joined column it also should be array joined
array_join_resolved_expression = matchArrayJoinSubcolumns(array_join_column_inner_expression, array_join_column_expression_typed, resolved_expression, scope);
if (array_join_resolved_expression)
break;
}
return array_join_resolved_expression; return array_join_resolved_expression;
} }
@ -5570,6 +5664,14 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
column = function_base->getConstantResultForNonConstArguments(argument_columns, result_type); column = function_base->getConstantResultForNonConstArguments(argument_columns, result_type);
} }
if (column && column->getDataType() != result_type->getColumnType())
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Unexpected return type from {}. Expected {}. Got {}",
function->getName(),
result_type->getColumnType(),
column->getDataType());
/** Do not perform constant folding if there are aggregate or arrayJoin functions inside function. /** Do not perform constant folding if there are aggregate or arrayJoin functions inside function.
* Example: SELECT toTypeName(sum(number)) FROM numbers(10); * Example: SELECT toTypeName(sum(number)) FROM numbers(10);
*/ */
@ -6427,6 +6529,8 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
*/ */
for (const auto & column_name_and_type : table_expression_data.column_names_and_types) for (const auto & column_name_and_type : table_expression_data.column_names_and_types)
{ {
for (const auto & subcolumn : columns_description.getSubcolumns(column_name_and_type.name))
table_expression_data.subcolumn_names.insert(subcolumn.name);
const auto & column_default = columns_description.getDefault(column_name_and_type.name); const auto & column_default = columns_description.getDefault(column_name_and_type.name);
if (column_default && column_default->kind == ColumnDefaultKind::Alias) if (column_default && column_default->kind == ColumnDefaultKind::Alias)

View File

@ -903,6 +903,68 @@ void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
remove_fuzzed_table(insert->getTable()); remove_fuzzed_table(insert->getTable());
} }
ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child)
{
auto * l = child->as<ASTLiteral>();
chassert(l);
auto type = l->value.getType();
if (type == Field::Types::Which::String && fuzz_rand() % 7 == 0)
{
String value = l->value.get<String>();
child = makeASTFunction(
"toFixedString", std::make_shared<ASTLiteral>(value), std::make_shared<ASTLiteral>(static_cast<UInt64>(value.size())));
}
if (fuzz_rand() % 7 == 0)
child = makeASTFunction("toNullable", child);
if (fuzz_rand() % 7 == 0)
child = makeASTFunction("toLowCardinality", child);
if (fuzz_rand() % 7 == 0)
child = makeASTFunction("materialize", child);
return child;
}
/// Tries to remove the functions added in fuzzLiteralUnderExpressionList
/// Note that it removes them even if the child is not a literal
ASTPtr QueryFuzzer::reverseLiteralFuzzing(ASTPtr child)
{
if (auto * function = child.get()->as<ASTFunction>())
{
std::unordered_set<String> can_be_reverted{"toNullable", "toLowCardinality", "materialize"};
if (can_be_reverted.contains(function->name) && function->children.size() == 1)
{
if (fuzz_rand() % 7 == 0)
return function->children[0];
}
}
return nullptr;
}
void QueryFuzzer::fuzzExpressionList(ASTExpressionList & expr_list)
{
for (auto & child : expr_list.children)
{
if (auto * literal = typeid_cast<ASTLiteral *>(child.get()))
{
if (fuzz_rand() % 13 == 0)
child = fuzzLiteralUnderExpressionList(child);
}
else
{
auto new_child = reverseLiteralFuzzing(child);
if (new_child)
child = new_child;
else
fuzz(child);
}
}
}
void QueryFuzzer::fuzz(ASTs & asts) void QueryFuzzer::fuzz(ASTs & asts)
{ {
for (auto & ast : asts) for (auto & ast : asts)
@ -989,7 +1051,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
} }
else if (auto * expr_list = typeid_cast<ASTExpressionList *>(ast.get())) else if (auto * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
{ {
fuzz(expr_list->children); fuzzExpressionList(*expr_list);
} }
else if (auto * order_by_element = typeid_cast<ASTOrderByElement *>(ast.get())) else if (auto * order_by_element = typeid_cast<ASTOrderByElement *>(ast.get()))
{ {
@ -1108,7 +1170,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
} }
/* /*
* The time to fuzz the settings has not yet come. * The time to fuzz the settings has not yet come.
* Apparently we don't have any infractructure to validate the values of * Apparently we don't have any infrastructure to validate the values of
* the settings, and the first query with max_block_size = -1 breaks * the settings, and the first query with max_block_size = -1 breaks
* because of overflows here and there. * because of overflows here and there.
*//* *//*
@ -1131,9 +1193,8 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
// are ASTPtr -- this is redundant ownership, but hides the error if the // are ASTPtr -- this is redundant ownership, but hides the error if the
// child field is replaced. Others can be ASTLiteral * or the like, which // child field is replaced. Others can be ASTLiteral * or the like, which
// leads to segfault if the pointed-to AST is replaced. // leads to segfault if the pointed-to AST is replaced.
// Replacing children is safe in case of ASTExpressionList. In a more // Replacing children is safe in case of ASTExpressionList (done in fuzzExpressionList). In a more
// general case, we can change the value of ASTLiteral, which is what we // general case, we can change the value of ASTLiteral, which is what we do here
// do here.
if (fuzz_rand() % 11 == 0) if (fuzz_rand() % 11 == 0)
{ {
literal->value = fuzzField(literal->value); literal->value = fuzzField(literal->value);

View File

@ -95,6 +95,9 @@ struct QueryFuzzer
void fuzzExplainSettings(ASTSetQuery & settings_ast, ASTExplainQuery::ExplainKind kind); void fuzzExplainSettings(ASTSetQuery & settings_ast, ASTExplainQuery::ExplainKind kind);
void fuzzColumnDeclaration(ASTColumnDeclaration & column); void fuzzColumnDeclaration(ASTColumnDeclaration & column);
void fuzzTableName(ASTTableExpression & table); void fuzzTableName(ASTTableExpression & table);
ASTPtr fuzzLiteralUnderExpressionList(ASTPtr child);
ASTPtr reverseLiteralFuzzing(ASTPtr child);
void fuzzExpressionList(ASTExpressionList & expr_list);
void fuzz(ASTs & asts); void fuzz(ASTs & asts);
void fuzz(ASTPtr & ast); void fuzz(ASTPtr & ast);
void collectFuzzInfoMain(ASTPtr ast); void collectFuzzInfoMain(ASTPtr ast);

View File

@ -178,6 +178,9 @@
M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \ M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \
M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \ M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \
M(MergeTreePartsCleanerThreadsScheduled, "Number of queued or active jobs in the MergeTree parts cleaner thread pool.") \ M(MergeTreePartsCleanerThreadsScheduled, "Number of queued or active jobs in the MergeTree parts cleaner thread pool.") \
M(DatabaseReplicatedCreateTablesThreads, "Number of threads in the threadpool for table creation in DatabaseReplicated.") \
M(DatabaseReplicatedCreateTablesThreadsActive, "Number of active threads in the threadpool for table creation in DatabaseReplicated.") \
M(DatabaseReplicatedCreateTablesThreadsScheduled, "Number of queued or active jobs in the threadpool for table creation in DatabaseReplicated.") \
M(IDiskCopierThreads, "Number of threads for copying data between disks of different types.") \ M(IDiskCopierThreads, "Number of threads for copying data between disks of different types.") \
M(IDiskCopierThreadsActive, "Number of threads for copying data between disks of different types running a task.") \ M(IDiskCopierThreadsActive, "Number of threads for copying data between disks of different types running a task.") \
M(IDiskCopierThreadsScheduled, "Number of queued or active jobs for copying data between disks of different types.") \ M(IDiskCopierThreadsScheduled, "Number of queued or active jobs for copying data between disks of different types.") \

View File

@ -14,12 +14,13 @@ enum Time
Seconds, Seconds,
}; };
template <Time time> template <Time unit>
struct ProfileEventTimeIncrement struct ProfileEventTimeIncrement
{ {
explicit ProfileEventTimeIncrement<time>(ProfileEvents::Event event_) explicit ProfileEventTimeIncrement<time>(ProfileEvents::Event event_)
: event(event_), watch(CLOCK_MONOTONIC) {} : event(event_), watch(CLOCK_MONOTONIC) {}
template <Time time = unit>
UInt64 elapsed() UInt64 elapsed()
{ {
if constexpr (time == Time::Nanoseconds) if constexpr (time == Time::Nanoseconds)

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <cerrno> #include <cerrno>
#include <exception>
#include <vector> #include <vector>
#include <memory> #include <memory>
@ -8,10 +9,11 @@
#include <base/defines.h> #include <base/defines.h>
#include <base/errnoToString.h> #include <base/errnoToString.h>
#include <base/int8_to_string.h>
#include <base/scope_guard.h> #include <base/scope_guard.h>
#include <Common/LoggingFormatStringHelpers.h>
#include <Common/Logger.h>
#include <Common/AtomicLogger.h> #include <Common/AtomicLogger.h>
#include <Common/Logger.h>
#include <Common/LoggingFormatStringHelpers.h>
#include <Common/StackTrace.h> #include <Common/StackTrace.h>
#include <fmt/format.h> #include <fmt/format.h>

View File

@ -2,6 +2,7 @@
#include <atomic> #include <atomic>
#include <memory> #include <memory>
#include <mutex>
#include <base/defines.h> #include <base/defines.h>
@ -20,6 +21,9 @@
* } // now we finish own current version; if the version is outdated and no one else is using it - it will be destroyed. * } // now we finish own current version; if the version is outdated and no one else is using it - it will be destroyed.
* *
* All methods are thread-safe. * All methods are thread-safe.
*
* Standard library does not have atomic_shared_ptr, and we do not use std::atomic* operations on shared_ptr,
* because standard library implementation uses fixed table of mutexes, and it is better to avoid contention here.
*/ */
template <typename T> template <typename T>
class MultiVersion class MultiVersion
@ -42,25 +46,37 @@ public:
MultiVersion & operator=(MultiVersion && src) MultiVersion & operator=(MultiVersion && src)
{ {
if (this != &src) if (this != &src)
std::atomic_store(&current_version, std::atomic_exchange(&src.current_version, Version{})); {
Version version;
{
std::lock_guard<std::mutex> lock(src.mutex);
src.current_version.swap(version);
}
std::lock_guard<std::mutex> lock(mutex);
current_version = std::move(version);
}
return *this; return *this;
} }
/// Obtain current version for read-only usage. Returns shared_ptr, that manages lifetime of version. /// Obtain current version for read-only usage. Returns shared_ptr, that manages lifetime of version.
Version get() const Version get() const
{ {
return std::atomic_load(&current_version); std::lock_guard<std::mutex> lock(mutex);
return current_version;
} }
/// TODO: replace atomic_load/store() on shared_ptr (which is deprecated as of C++20) by C++20 std::atomic<std::shared_ptr>.
/// Clang 15 currently does not support it.
/// Update an object with new version. /// Update an object with new version.
void set(std::unique_ptr<const T> && value) void set(std::unique_ptr<const T> && value)
{ {
std::atomic_store(&current_version, Version{std::move(value)}); Version version{std::move(value)};
std::lock_guard<std::mutex> lock(mutex);
current_version = std::move(version);
} }
private: private:
mutable std::mutex mutex;
Version current_version; Version current_version;
}; };

View File

@ -197,6 +197,15 @@
M(MergeTreeDataWriterBlocks, "Number of blocks INSERTed to MergeTree tables. Each block forms a data part of level zero.") \ M(MergeTreeDataWriterBlocks, "Number of blocks INSERTed to MergeTree tables. Each block forms a data part of level zero.") \
M(MergeTreeDataWriterBlocksAlreadySorted, "Number of blocks INSERTed to MergeTree tables that appeared to be already sorted.") \ M(MergeTreeDataWriterBlocksAlreadySorted, "Number of blocks INSERTed to MergeTree tables that appeared to be already sorted.") \
\ \
M(MergeTreeDataWriterSkipIndicesCalculationMicroseconds, "Time spent calculating skip indices") \
M(MergeTreeDataWriterStatisticsCalculationMicroseconds, "Time spent calculating statistics") \
M(MergeTreeDataWriterSortingBlocksMicroseconds, "Time spent sorting blocks") \
M(MergeTreeDataWriterMergingBlocksMicroseconds, "Time spent merging input blocks (for special MergeTree engines)") \
M(MergeTreeDataWriterProjectionsCalculationMicroseconds, "Time spent calculating projections") \
M(MergeTreeDataProjectionWriterSortingBlocksMicroseconds, "Time spent sorting blocks (for projection it might be a key different from table's sorting key)") \
M(MergeTreeDataProjectionWriterMergingBlocksMicroseconds, "Time spent merging blocks") \
M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections") \
\
M(InsertedWideParts, "Number of parts inserted in Wide format.") \ M(InsertedWideParts, "Number of parts inserted in Wide format.") \
M(InsertedCompactParts, "Number of parts inserted in Compact format.") \ M(InsertedCompactParts, "Number of parts inserted in Compact format.") \
M(MergedIntoWideParts, "Number of parts merged into Wide format.") \ M(MergedIntoWideParts, "Number of parts merged into Wide format.") \

View File

@ -206,6 +206,9 @@ public:
*/ */
void merge(const Self & rhs) void merge(const Self & rhs)
{ {
if (!rhs.size())
return;
UInt64 m1 = 0; UInt64 m1 = 0;
UInt64 m2 = 0; UInt64 m2 = 0;

View File

@ -1,7 +1,6 @@
#include "getNumberOfPhysicalCPUCores.h" #include "getNumberOfPhysicalCPUCores.h"
#include <filesystem> #include <filesystem>
#include "config.h"
#if defined(OS_LINUX) #if defined(OS_LINUX)
# include <cmath> # include <cmath>
# include <fstream> # include <fstream>
@ -34,9 +33,9 @@ int32_t readFrom(const std::filesystem::path & filename, int default_value)
uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count) uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
{ {
uint32_t quota_count = default_cpu_count; uint32_t quota_count = default_cpu_count;
std::filesystem::path prefix = "/sys/fs/cgroup"; std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
/// cgroupsv2 /// cgroupsv2
std::ifstream contr_file(prefix / "cgroup.controllers"); std::ifstream contr_file(default_cgroups_mount / "cgroup.controllers");
if (contr_file.is_open()) if (contr_file.is_open())
{ {
/// First, we identify the cgroup the process belongs /// First, we identify the cgroup the process belongs
@ -51,16 +50,15 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
std::filesystem::path current_cgroup; std::filesystem::path current_cgroup;
if (cgroup_name.empty()) if (cgroup_name.empty())
current_cgroup = prefix; current_cgroup = default_cgroups_mount;
else else
current_cgroup = prefix / cgroup_name; current_cgroup = default_cgroups_mount / cgroup_name;
// Looking for cpu.max in directories from the current cgroup to the top level // Looking for cpu.max in directories from the current cgroup to the top level
// It does not stop on the first time since the child could have a greater value than parent // It does not stop on the first time since the child could have a greater value than parent
while (current_cgroup != prefix.parent_path()) while (current_cgroup != default_cgroups_mount.parent_path())
{ {
std::ifstream cpu_max_file(current_cgroup / "cpu.max"); std::ifstream cpu_max_file(current_cgroup / "cpu.max");
current_cgroup = current_cgroup.parent_path();
if (cpu_max_file.is_open()) if (cpu_max_file.is_open())
{ {
std::string cpu_limit_str; std::string cpu_limit_str;
@ -72,10 +70,11 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
quota_count = std::min(static_cast<uint32_t>(ceil(cpu_limit / cpu_period)), quota_count); quota_count = std::min(static_cast<uint32_t>(ceil(cpu_limit / cpu_period)), quota_count);
} }
} }
current_cgroup = current_cgroup.parent_path();
} }
current_cgroup = prefix / cgroup_name; current_cgroup = default_cgroups_mount / cgroup_name;
// Looking for cpuset.cpus.effective in directories from the current cgroup to the top level // Looking for cpuset.cpus.effective in directories from the current cgroup to the top level
while (current_cgroup != prefix.parent_path()) while (current_cgroup != default_cgroups_mount.parent_path())
{ {
std::ifstream cpuset_cpus_file(current_cgroup / "cpuset.cpus.effective"); std::ifstream cpuset_cpus_file(current_cgroup / "cpuset.cpus.effective");
current_cgroup = current_cgroup.parent_path(); current_cgroup = current_cgroup.parent_path();
@ -113,8 +112,8 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count)
/// cgroupsv1 /// cgroupsv1
/// Return the number of milliseconds per period process is guaranteed to run. /// Return the number of milliseconds per period process is guaranteed to run.
/// -1 for no quota /// -1 for no quota
int cgroup_quota = readFrom(prefix / "cpu/cpu.cfs_quota_us", -1); int cgroup_quota = readFrom(default_cgroups_mount / "cpu/cpu.cfs_quota_us", -1);
int cgroup_period = readFrom(prefix / "cpu/cpu.cfs_period_us", -1); int cgroup_period = readFrom(default_cgroups_mount / "cpu/cpu.cfs_period_us", -1);
if (cgroup_quota > -1 && cgroup_period > 0) if (cgroup_quota > -1 && cgroup_period > 0)
quota_count = static_cast<uint32_t>(ceil(static_cast<float>(cgroup_quota) / static_cast<float>(cgroup_period))); quota_count = static_cast<uint32_t>(ceil(static_cast<float>(cgroup_quota) / static_cast<float>(cgroup_period)));
@ -178,24 +177,25 @@ catch (...)
unsigned getNumberOfPhysicalCPUCoresImpl() unsigned getNumberOfPhysicalCPUCoresImpl()
{ {
unsigned cpu_count = std::thread::hardware_concurrency(); /// logical cores (with SMT/HyperThreading) unsigned cores = std::thread::hardware_concurrency(); /// logical cores (with SMT/HyperThreading)
#if defined(__x86_64__) && defined(OS_LINUX)
/// Most x86_64 CPUs have 2-way SMT (Hyper-Threading). /// Most x86_64 CPUs have 2-way SMT (Hyper-Threading).
/// Aarch64 and RISC-V don't have SMT so far. /// Aarch64 and RISC-V don't have SMT so far.
/// POWER has SMT and it can be multi-way (e.g. 8-way), but we don't know how ClickHouse really behaves, so use all of them. /// POWER has SMT and it can be multi-way (e.g. 8-way), but we don't know how ClickHouse really behaves, so use all of them.
///
#if defined(__x86_64__) && defined(OS_LINUX)
/// On really big machines, SMT is detrimental to performance (+ ~5% overhead in ClickBench). On such machines, we limit ourself to the physical cores. /// On really big machines, SMT is detrimental to performance (+ ~5% overhead in ClickBench). On such machines, we limit ourself to the physical cores.
/// Few cores indicate it is a small machine, runs in a VM or is a limited cloud instance --> it is reasonable to use all the cores. /// Few cores indicate it is a small machine, runs in a VM or is a limited cloud instance --> it is reasonable to use all the cores.
if (cpu_count >= 32) if (cores >= 32)
cpu_count = physical_concurrency(); cores = physical_concurrency();
#endif #endif
#if defined(OS_LINUX) #if defined(OS_LINUX)
cpu_count = getCGroupLimitedCPUCores(cpu_count); cores = getCGroupLimitedCPUCores(cores);
#endif #endif
return cpu_count; return cores;
} }
} }
@ -203,6 +203,6 @@ unsigned getNumberOfPhysicalCPUCoresImpl()
unsigned getNumberOfPhysicalCPUCores() unsigned getNumberOfPhysicalCPUCores()
{ {
/// Calculate once. /// Calculate once.
static auto res = getNumberOfPhysicalCPUCoresImpl(); static auto cores = getNumberOfPhysicalCPUCoresImpl();
return res; return cores;
} }

View File

@ -216,9 +216,8 @@ using NearestFieldType = typename NearestFieldTypeImpl<T>::Type;
template <> struct NearestFieldTypeImpl<char> { using Type = std::conditional_t<is_signed_v<char>, Int64, UInt64>; }; template <> struct NearestFieldTypeImpl<char> { using Type = std::conditional_t<is_signed_v<char>, Int64, UInt64>; };
template <> struct NearestFieldTypeImpl<signed char> { using Type = Int64; }; template <> struct NearestFieldTypeImpl<signed char> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<unsigned char> { using Type = UInt64; }; template <> struct NearestFieldTypeImpl<unsigned char> { using Type = UInt64; };
#ifdef __cpp_char8_t
template <> struct NearestFieldTypeImpl<char8_t> { using Type = UInt64; }; template <> struct NearestFieldTypeImpl<char8_t> { using Type = UInt64; };
#endif template <> struct NearestFieldTypeImpl<Int8> { using Type = Int64; };
template <> struct NearestFieldTypeImpl<UInt16> { using Type = UInt64; }; template <> struct NearestFieldTypeImpl<UInt16> { using Type = UInt64; };
template <> struct NearestFieldTypeImpl<UInt32> { using Type = UInt64; }; template <> struct NearestFieldTypeImpl<UInt32> { using Type = UInt64; };
@ -306,7 +305,6 @@ static constexpr auto DBMS_MIN_FIELD_SIZE = 32;
*/ */
class Field class Field
{ {
static constexpr int nan_direction_hint = 1; // When comparing Floats NaN are considered to be larger than all numbers
public: public:
struct Types struct Types
{ {
@ -511,6 +509,7 @@ public:
case Types::IPv4: return get<IPv4>() < rhs.get<IPv4>(); case Types::IPv4: return get<IPv4>() < rhs.get<IPv4>();
case Types::IPv6: return get<IPv6>() < rhs.get<IPv6>(); case Types::IPv6: return get<IPv6>() < rhs.get<IPv6>();
case Types::Float64: case Types::Float64:
static constexpr int nan_direction_hint = 1; /// Put NaN at the end
return FloatCompareHelper<Float64>::less(get<Float64>(), rhs.get<Float64>(), nan_direction_hint); return FloatCompareHelper<Float64>::less(get<Float64>(), rhs.get<Float64>(), nan_direction_hint);
case Types::String: return get<String>() < rhs.get<String>(); case Types::String: return get<String>() < rhs.get<String>();
case Types::Array: return get<Array>() < rhs.get<Array>(); case Types::Array: return get<Array>() < rhs.get<Array>();
@ -555,6 +554,7 @@ public:
case Types::IPv6: return get<IPv6>() <= rhs.get<IPv6>(); case Types::IPv6: return get<IPv6>() <= rhs.get<IPv6>();
case Types::Float64: case Types::Float64:
{ {
static constexpr int nan_direction_hint = 1; /// Put NaN at the end
Float64 f1 = get<Float64>(); Float64 f1 = get<Float64>();
Float64 f2 = get<Float64>(); Float64 f2 = get<Float64>();
return FloatCompareHelper<Float64>::less(f1, f2, nan_direction_hint) return FloatCompareHelper<Float64>::less(f1, f2, nan_direction_hint)
@ -595,6 +595,7 @@ public:
case Types::UInt64: return get<UInt64>() == rhs.get<UInt64>(); case Types::UInt64: return get<UInt64>() == rhs.get<UInt64>();
case Types::Int64: return get<Int64>() == rhs.get<Int64>(); case Types::Int64: return get<Int64>() == rhs.get<Int64>();
case Types::Float64: case Types::Float64:
static constexpr int nan_direction_hint = 1; /// Put NaN at the end
return FloatCompareHelper<Float64>::equals(get<Float64>(), rhs.get<Float64>(), nan_direction_hint); return FloatCompareHelper<Float64>::equals(get<Float64>(), rhs.get<Float64>(), nan_direction_hint);
case Types::UUID: return get<UUID>() == rhs.get<UUID>(); case Types::UUID: return get<UUID>() == rhs.get<UUID>();
case Types::IPv4: return get<IPv4>() == rhs.get<IPv4>(); case Types::IPv4: return get<IPv4>() == rhs.get<IPv4>();

View File

@ -114,6 +114,7 @@ namespace DB
M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \ M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \ M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \ M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
M(UInt64, max_database_replicated_create_table_thread_pool_size, 0, "The number of threads to create tables during replica recovery in DatabaseReplicated. Value less than two means tables will be created sequentially.", 0) \
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp

View File

@ -559,7 +559,7 @@ class IColumn;
M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
\ \
M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \ M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \
M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \ M(DefaultTableEngine, default_table_engine, DefaultTableEngine::MergeTree, "Default table engine used when ENGINE is not set in CREATE statement.",0) \
M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \ M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \ M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
@ -748,7 +748,7 @@ class IColumn;
M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \ M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \
M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \ M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \
M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \ M(UInt64, async_insert_max_data_size, 10485760, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
M(UInt64, async_insert_max_query_number, 450, "Maximum number of insert queries before being inserted", 0) \ M(UInt64, async_insert_max_query_number, 450, "Maximum number of insert queries before being inserted", 0) \
M(Milliseconds, async_insert_poll_timeout_ms, 10, "Timeout for polling data from asynchronous insert queue", 0) \ M(Milliseconds, async_insert_poll_timeout_ms, 10, "Timeout for polling data from asynchronous insert queue", 0) \
M(Bool, async_insert_use_adaptive_busy_timeout, true, "If it is set to true, use adaptive busy timeout for asynchronous inserts", 0) \ M(Bool, async_insert_use_adaptive_busy_timeout, true, "If it is set to true, use adaptive busy timeout for asynchronous inserts", 0) \

View File

@ -84,7 +84,8 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history = static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{ {
{"24.2", {{"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, {"24.2", {{"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
{"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
{"async_insert_use_adaptive_busy_timeout", true, true, "Use adaptive asynchronous insert timeout"}, {"async_insert_use_adaptive_busy_timeout", true, true, "Use adaptive asynchronous insert timeout"},
{"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
{"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},
@ -108,6 +109,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
{"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
{"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
{"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
{"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
{"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
{"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}}, {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},

View File

@ -29,6 +29,7 @@
#include <IO/ReadBufferFromString.h> #include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <IO/SharedThreadPools.h>
#include <Parsers/ASTAlterQuery.h> #include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTDropQuery.h> #include <Parsers/ASTDropQuery.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
@ -1091,31 +1092,57 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
} }
tables_dependencies.checkNoCyclicDependencies(); tables_dependencies.checkNoCyclicDependencies();
auto tables_to_create = tables_dependencies.getTablesSortedByDependency();
for (const auto & table_id : tables_to_create) auto allow_concurrent_table_creation = getContext()->getServerSettings().max_database_replicated_create_table_thread_pool_size > 1;
auto tables_to_create_by_level = tables_dependencies.getTablesSortedByDependencyWithLevels();
auto create_tables_runner = threadPoolCallbackRunner<void>(getDatabaseReplicatedCreateTablesThreadPool().get(), "CreateTables");
std::vector<std::future<void>> create_table_futures;
for (const auto & [_, tables_to_create] : tables_to_create_by_level)
{ {
auto table_name = table_id.getTableName(); for (const auto & table_id : tables_to_create)
auto metadata_it = table_name_to_metadata.find(table_name);
if (metadata_it == table_name_to_metadata.end())
{ {
/// getTablesSortedByDependency() may return some not existing tables or tables from other databases auto task = [&]()
LOG_WARNING(log, "Got table name {} when resolving table dependencies, " {
"but database {} does not have metadata for that table. Ignoring it", table_id.getNameForLogs(), getDatabaseName()); auto table_name = table_id.getTableName();
continue; auto metadata_it = table_name_to_metadata.find(table_name);
if (metadata_it == table_name_to_metadata.end())
{
/// getTablesSortedByDependency() may return some not existing tables or tables from other databases
LOG_WARNING(log, "Got table name {} when resolving table dependencies, "
"but database {} does not have metadata for that table. Ignoring it", table_id.getNameForLogs(), getDatabaseName());
return;
}
const auto & create_query_string = metadata_it->second;
if (isTableExist(table_name, getContext()))
{
assert(create_query_string == readMetadataFile(table_name) || getTableUUIDIfReplicated(create_query_string, getContext()) != UUIDHelpers::Nil);
return;
}
auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_query_string);
LOG_INFO(log, "Executing {}", serializeAST(*query_ast));
auto create_query_context = make_query_context();
InterpreterCreateQuery(query_ast, create_query_context).execute();
};
if (allow_concurrent_table_creation)
create_table_futures.push_back(create_tables_runner(task, Priority{0}));
else
task();
} }
const auto & create_query_string = metadata_it->second; /// First wait for all tasks to finish.
if (isTableExist(table_name, getContext())) for (auto & future : create_table_futures)
{ future.wait();
assert(create_query_string == readMetadataFile(table_name) || getTableUUIDIfReplicated(create_query_string, getContext()) != UUIDHelpers::Nil);
continue;
}
auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_query_string); /// Now rethrow the first exception if any.
LOG_INFO(log, "Executing {}", serializeAST(*query_ast)); for (auto & future : create_table_futures)
auto create_query_context = make_query_context(); future.get();
InterpreterCreateQuery(query_ast, create_query_context).execute();
create_table_futures.clear();
} }
LOG_INFO(log, "All tables are created successfully"); LOG_INFO(log, "All tables are created successfully");

View File

@ -699,6 +699,17 @@ std::vector<StorageID> TablesDependencyGraph::getTablesSortedByDependency() cons
} }
std::map<size_t, std::vector<StorageID>> TablesDependencyGraph::getTablesSortedByDependencyWithLevels() const
{
std::map<size_t, std::vector<StorageID>> tables_by_level;
for (const auto * node : getNodesSortedByLevel())
{
tables_by_level[node->level].emplace_back(node->storage_id);
}
return tables_by_level;
}
void TablesDependencyGraph::log() const void TablesDependencyGraph::log() const
{ {
if (nodes.empty()) if (nodes.empty())

View File

@ -107,6 +107,12 @@ public:
/// tables which depend on the tables which depend on the tables without dependencies, and so on. /// tables which depend on the tables which depend on the tables without dependencies, and so on.
std::vector<StorageID> getTablesSortedByDependency() const; std::vector<StorageID> getTablesSortedByDependency() const;
/// Returns a map of lists of tables by the number of dependencies they have:
/// tables without dependencies first with level 0, then
/// tables with depend on the tables without dependencies with level 1, then
/// tables which depend on the tables which depend on the tables without dependencies with level 2, and so on.
std::map<size_t, std::vector<StorageID>> getTablesSortedByDependencyWithLevels() const;
/// Outputs information about this graph as a bunch of logging messages. /// Outputs information about this graph as a bunch of logging messages.
void log() const; void log() const;

View File

@ -670,8 +670,7 @@ struct ArrayElementStringImpl
ColumnArray::Offset current_offset = 0; ColumnArray::Offset current_offset = 0;
/// get the total result bytes at first, and reduce the cost of result_data.resize. /// get the total result bytes at first, and reduce the cost of result_data.resize.
size_t total_result_bytes = 0; size_t total_result_bytes = 0;
ColumnString::Chars zero_buf(1); ColumnString::Chars zero_buf(16, '\0'); /// Needs 15 extra bytes for memcpySmallAllowReadWriteOverflow15
zero_buf.push_back(0);
std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs; std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs;
selected_bufs.reserve(size); selected_bufs.reserve(size);
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
@ -737,8 +736,7 @@ struct ArrayElementStringImpl
size_t size = offsets.size(); size_t size = offsets.size();
result_offsets.resize(size); result_offsets.resize(size);
ColumnString::Chars zero_buf(1); ColumnString::Chars zero_buf(16, '\0'); /// Needs 15 extra bytes for memcpySmallAllowReadWriteOverflow15
zero_buf.push_back(0);
ColumnArray::Offset current_offset = 0; ColumnArray::Offset current_offset = 0;
/// get the total result bytes at first, and reduce the cost of result_data.resize. /// get the total result bytes at first, and reduce the cost of result_data.resize.
size_t total_result_bytes = 0; size_t total_result_bytes = 0;

View File

@ -49,9 +49,9 @@ template void divideImpl<uint32_t, char8_t, uint32_t>(const uint32_t * __restric
template void divideImpl<int64_t, int64_t, int64_t>(const int64_t * __restrict, int64_t, int64_t * __restrict, size_t); template void divideImpl<int64_t, int64_t, int64_t>(const int64_t * __restrict, int64_t, int64_t * __restrict, size_t);
template void divideImpl<int64_t, int32_t, int64_t>(const int64_t * __restrict, int32_t, int64_t * __restrict, size_t); template void divideImpl<int64_t, int32_t, int64_t>(const int64_t * __restrict, int32_t, int64_t * __restrict, size_t);
template void divideImpl<int64_t, int16_t, int64_t>(const int64_t * __restrict, int16_t, int64_t * __restrict, size_t); template void divideImpl<int64_t, int16_t, int64_t>(const int64_t * __restrict, int16_t, int64_t * __restrict, size_t);
template void divideImpl<int64_t, int8_t, int64_t>(const int64_t * __restrict, int8_t, int64_t * __restrict, size_t); template void divideImpl<int64_t, Int8, int64_t>(const int64_t * __restrict, Int8, int64_t * __restrict, size_t);
template void divideImpl<int32_t, int64_t, int32_t>(const int32_t * __restrict, int64_t, int32_t * __restrict, size_t); template void divideImpl<int32_t, int64_t, int32_t>(const int32_t * __restrict, int64_t, int32_t * __restrict, size_t);
template void divideImpl<int32_t, int32_t, int32_t>(const int32_t * __restrict, int32_t, int32_t * __restrict, size_t); template void divideImpl<int32_t, int32_t, int32_t>(const int32_t * __restrict, int32_t, int32_t * __restrict, size_t);
template void divideImpl<int32_t, int16_t, int32_t>(const int32_t * __restrict, int16_t, int32_t * __restrict, size_t); template void divideImpl<int32_t, int16_t, int32_t>(const int32_t * __restrict, int16_t, int32_t * __restrict, size_t);
template void divideImpl<int32_t, int8_t, int32_t>(const int32_t * __restrict, int8_t, int32_t * __restrict, size_t); template void divideImpl<int32_t, Int8, int32_t>(const int32_t * __restrict, Int8, int32_t * __restrict, size_t);

View File

@ -12,6 +12,10 @@
#include <libdivide.h> #include <libdivide.h>
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wbit-int-extension"
using Int8 = signed _BitInt(8);
#pragma clang diagnostic pop
namespace NAMESPACE namespace NAMESPACE
{ {
@ -62,11 +66,11 @@ template void divideImpl<uint32_t, char8_t, uint32_t>(const uint32_t * __restric
template void divideImpl<int64_t, int64_t, int64_t>(const int64_t * __restrict, int64_t, int64_t * __restrict, size_t); template void divideImpl<int64_t, int64_t, int64_t>(const int64_t * __restrict, int64_t, int64_t * __restrict, size_t);
template void divideImpl<int64_t, int32_t, int64_t>(const int64_t * __restrict, int32_t, int64_t * __restrict, size_t); template void divideImpl<int64_t, int32_t, int64_t>(const int64_t * __restrict, int32_t, int64_t * __restrict, size_t);
template void divideImpl<int64_t, int16_t, int64_t>(const int64_t * __restrict, int16_t, int64_t * __restrict, size_t); template void divideImpl<int64_t, int16_t, int64_t>(const int64_t * __restrict, int16_t, int64_t * __restrict, size_t);
template void divideImpl<int64_t, int8_t, int64_t>(const int64_t * __restrict, int8_t, int64_t * __restrict, size_t); template void divideImpl<int64_t, Int8, int64_t>(const int64_t * __restrict, Int8, int64_t * __restrict, size_t);
template void divideImpl<int32_t, int64_t, int32_t>(const int32_t * __restrict, int64_t, int32_t * __restrict, size_t); template void divideImpl<int32_t, int64_t, int32_t>(const int32_t * __restrict, int64_t, int32_t * __restrict, size_t);
template void divideImpl<int32_t, int32_t, int32_t>(const int32_t * __restrict, int32_t, int32_t * __restrict, size_t); template void divideImpl<int32_t, int32_t, int32_t>(const int32_t * __restrict, int32_t, int32_t * __restrict, size_t);
template void divideImpl<int32_t, int16_t, int32_t>(const int32_t * __restrict, int16_t, int32_t * __restrict, size_t); template void divideImpl<int32_t, int16_t, int32_t>(const int32_t * __restrict, int16_t, int32_t * __restrict, size_t);
template void divideImpl<int32_t, int8_t, int32_t>(const int32_t * __restrict, int8_t, int32_t * __restrict, size_t); template void divideImpl<int32_t, Int8, int32_t>(const int32_t * __restrict, Int8, int32_t * __restrict, size_t);
} }

View File

@ -1,32 +1,34 @@
#include <DataTypes/DataTypesNumber.h> #include <Columns/ColumnArray.h>
#include <DataTypes/DataTypesDecimal.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnVariant.h>
#include <Columns/ColumnVector.h>
#include <Columns/MaskOperations.h>
#include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeFixedString.h> #include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeTuple.h> #include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeVariant.h> #include <DataTypes/DataTypeVariant.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/NumberTraits.h> #include <DataTypes/NumberTraits.h>
#include <DataTypes/getLeastSupertype.h> #include <DataTypes/getLeastSupertype.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnVariant.h>
#include <Columns/MaskOperations.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/GatherUtils/Algorithms.h>
#include <Functions/FunctionIfBase.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/Context.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionIfBase.h>
#include <Functions/GatherUtils/Algorithms.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#include <Interpreters/castColumn.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
#include <type_traits> #include <type_traits>
namespace DB namespace DB
@ -36,6 +38,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
} }
namespace namespace
@ -227,13 +230,8 @@ inline void fillConstantConstant(const ArrayCond & cond, A a, B b, ArrayResult &
{ {
size_t size = cond.size(); size_t size = cond.size();
/// Int8(alias type of uint8_t) has special aliasing properties that prevents compiler from auto-vectorizing for below codes, refer to https://gist.github.com/alexei-zaripov/dcc14c78819c5f1354afe8b70932007c /// We manually optimize the loop for types like (U)Int128|256 or Decimal128/256 to avoid branches
/// if constexpr (is_over_big_int<ResultType>)
/// for (size_t i = 0; i < size; ++i)
/// res[i] = cond[i] ? static_cast<Int8>(a) : static_cast<Int8>(b);
///
/// Therefore, we manually optimize it by avoiding branch miss when ResultType is Int8. Other types like (U)Int128|256 or Decimal128/256 also benefit from this optimization.
if constexpr (std::is_same_v<ResultType, Int8> || is_over_big_int<ResultType>)
{ {
alignas(64) const ResultType ab[2] = {static_cast<ResultType>(a), static_cast<ResultType>(b)}; alignas(64) const ResultType ab[2] = {static_cast<ResultType>(a), static_cast<ResultType>(b)};
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
@ -724,7 +722,6 @@ private:
conditional(ConstSource<GenericArraySource>(*col_arr_then_const), ConstSource<GenericArraySource>(*col_arr_else_const), GenericArraySink(col_res->getData(), col_res->getOffsets(), rows), cond_data); conditional(ConstSource<GenericArraySource>(*col_arr_then_const), ConstSource<GenericArraySource>(*col_arr_else_const), GenericArraySink(col_res->getData(), col_res->getOffsets(), rows), cond_data);
else else
return nullptr; return nullptr;
return res; return res;
} }
@ -776,6 +773,90 @@ private:
return ColumnTuple::create(tuple_columns); return ColumnTuple::create(tuple_columns);
} }
ColumnPtr executeMap(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
{
auto extract_kv_from_map = [](const ColumnMap * map)
{
const ColumnTuple & tuple = map->getNestedData();
const auto & keys = tuple.getColumnPtr(0);
const auto & values = tuple.getColumnPtr(1);
const auto & offsets = map->getNestedColumn().getOffsetsPtr();
return std::make_pair(ColumnArray::create(keys, offsets), ColumnArray::create(values, offsets));
};
/// Extract keys and values from both arguments
Columns key_cols(2);
Columns value_cols(2);
for (size_t i = 0; i < 2; ++i)
{
const auto & arg = arguments[i + 1];
if (const ColumnMap * map = checkAndGetColumn<ColumnMap>(arg.column.get()))
{
auto [key_col, value_col] = extract_kv_from_map(map);
key_cols[i] = std::move(key_col);
value_cols[i] = std::move(value_col);
}
else if (const ColumnConst * const_map = checkAndGetColumnConst<ColumnMap>(arg.column.get()))
{
const ColumnMap * map_data = assert_cast<const ColumnMap *>(&const_map->getDataColumn());
auto [key_col, value_col] = extract_kv_from_map(map_data);
size_t size = const_map->size();
key_cols[i] = ColumnConst::create(std::move(key_col), size);
value_cols[i] = ColumnConst::create(std::move(value_col), size);
}
else
return nullptr;
}
/// Compose temporary columns for keys and values
ColumnsWithTypeAndName key_columns(3);
key_columns[0] = arguments[0];
ColumnsWithTypeAndName value_columns(3);
value_columns[0] = arguments[0];
for (size_t i = 0; i < 2; ++i)
{
const auto & arg = arguments[i + 1];
const DataTypeMap & type = static_cast<const DataTypeMap &>(*arg.type);
const auto & key_type = type.getKeyType();
const auto & value_type = type.getValueType();
key_columns[i + 1] = {key_cols[i], std::make_shared<DataTypeArray>(key_type), {}};
value_columns[i + 1] = {value_cols[i], std::make_shared<DataTypeArray>(value_type), {}};
}
/// Calculate function corresponding keys and values in map
const DataTypeMap & map_result_type = static_cast<const DataTypeMap &>(*result_type);
auto key_result_type = std::make_shared<DataTypeArray>(map_result_type.getKeyType());
auto value_result_type = std::make_shared<DataTypeArray>(map_result_type.getValueType());
ColumnPtr key_result = executeImpl(key_columns, key_result_type, input_rows_count);
ColumnPtr value_result = executeImpl(value_columns, value_result_type, input_rows_count);
/// key_result and value_result are not constant columns otherwise we won't reach here in executeMap
const auto * key_array = assert_cast<const ColumnArray *>(key_result.get());
const auto * value_array = assert_cast<const ColumnArray *>(value_result.get());
if (!key_array)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Key result column should be {} instead of {} in executeMap of function {}",
key_result_type->getName(),
key_result->getName(),
getName());
if (!value_array)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Value result column should be {} instead of {} in executeMap of function {}",
key_result_type->getName(),
value_result->getName(),
getName());
if (!key_array->hasEqualOffsets(*value_array))
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Key array and value array must have equal sizes in executeMap of function {}", getName());
auto nested_column = ColumnArray::create(
ColumnTuple::create(Columns{key_array->getDataPtr(), value_array->getDataPtr()}), key_array->getOffsetsPtr());
return ColumnMap::create(std::move(nested_column));
}
static ColumnPtr executeGeneric( static ColumnPtr executeGeneric(
const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, size_t input_rows_count, bool use_variant_when_no_common_type) const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, size_t input_rows_count, bool use_variant_when_no_common_type)
{ {
@ -1289,7 +1370,8 @@ public:
|| (res = executeTyped<UUID, UUID>(cond_col, arguments, result_type, input_rows_count)) || (res = executeTyped<UUID, UUID>(cond_col, arguments, result_type, input_rows_count))
|| (res = executeString(cond_col, arguments, result_type)) || (res = executeString(cond_col, arguments, result_type))
|| (res = executeGenericArray(cond_col, arguments, result_type)) || (res = executeGenericArray(cond_col, arguments, result_type))
|| (res = executeTuple(arguments, result_type, input_rows_count)))) || (res = executeTuple(arguments, result_type, input_rows_count))
|| (res = executeMap(arguments, result_type, input_rows_count))))
{ {
return executeGeneric(cond_col, arguments, input_rows_count, use_variant_when_no_common_type); return executeGeneric(cond_col, arguments, input_rows_count, use_variant_when_no_common_type);
} }

View File

@ -1,14 +1,14 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypesNumber.h>
#include <Core/ColumnNumbers.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h> #include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnVariant.h> #include <Columns/ColumnVariant.h>
#include <Core/ColumnNumbers.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Functions/PerformanceAdaptors.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
namespace DB namespace DB
{ {
namespace namespace
@ -21,10 +21,7 @@ class FunctionIsNotNull : public IFunction
public: public:
static constexpr auto name = "isNotNull"; static constexpr auto name = "isNotNull";
static FunctionPtr create(ContextPtr) static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionIsNotNull>(); }
{
return std::make_shared<FunctionIsNotNull>();
}
std::string getName() const override std::string getName() const override
{ {
@ -52,9 +49,9 @@ public:
const auto & discriminators = checkAndGetColumn<ColumnVariant>(*elem.column)->getLocalDiscriminators(); const auto & discriminators = checkAndGetColumn<ColumnVariant>(*elem.column)->getLocalDiscriminators();
auto res = DataTypeUInt8().createColumn(); auto res = DataTypeUInt8().createColumn();
auto & data = typeid_cast<ColumnUInt8 &>(*res).getData(); auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
data.reserve(discriminators.size()); data.resize(discriminators.size());
for (auto discr : discriminators) for (size_t i = 0; i < discriminators.size(); ++i)
data.push_back(discr != ColumnVariant::NULL_DISCRIMINATOR); data[i] = discriminators[i] != ColumnVariant::NULL_DISCRIMINATOR;
return res; return res;
} }
@ -64,9 +61,9 @@ public:
const size_t null_index = low_cardinality_column->getDictionary().getNullValueIndex(); const size_t null_index = low_cardinality_column->getDictionary().getNullValueIndex();
auto res = DataTypeUInt8().createColumn(); auto res = DataTypeUInt8().createColumn();
auto & data = typeid_cast<ColumnUInt8 &>(*res).getData(); auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
data.reserve(low_cardinality_column->size()); data.resize(low_cardinality_column->size());
for (size_t i = 0; i != low_cardinality_column->size(); ++i) for (size_t i = 0; i != low_cardinality_column->size(); ++i)
data.push_back(low_cardinality_column->getIndexAt(i) != null_index); data[i] = (low_cardinality_column->getIndexAt(i) != null_index);
return res; return res;
} }
@ -76,10 +73,7 @@ public:
auto res_column = ColumnUInt8::create(input_rows_count); auto res_column = ColumnUInt8::create(input_rows_count);
const auto & src_data = nullable->getNullMapData(); const auto & src_data = nullable->getNullMapData();
auto & res_data = assert_cast<ColumnUInt8 &>(*res_column).getData(); auto & res_data = assert_cast<ColumnUInt8 &>(*res_column).getData();
vector(src_data, res_data);
for (size_t i = 0; i < input_rows_count; ++i)
res_data[i] = !src_data[i];
return res_column; return res_column;
} }
else else
@ -88,8 +82,34 @@ public:
return DataTypeUInt8().createColumnConst(elem.column->size(), 1u); return DataTypeUInt8().createColumnConst(elem.column->size(), 1u);
} }
} }
};
private:
MULTITARGET_FUNCTION_AVX2_SSE42(
MULTITARGET_FUNCTION_HEADER(static void NO_INLINE), vectorImpl, MULTITARGET_FUNCTION_BODY((const PaddedPODArray<UInt8> & null_map, PaddedPODArray<UInt8> & res) /// NOLINT
{
size_t size = null_map.size();
for (size_t i = 0; i < size; ++i)
res[i] = !null_map[i];
}))
static void NO_INLINE vector(const PaddedPODArray<UInt8> & null_map, PaddedPODArray<UInt8> & res)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX2))
{
vectorImplAVX2(null_map, res);
return;
}
if (isArchSupported(TargetArch::SSE42))
{
vectorImplSSE42(null_map, res);
return;
}
#endif
vectorImpl(null_map, res);
}
};
} }
REGISTER_FUNCTION(IsNotNull) REGISTER_FUNCTION(IsNotNull)

View File

@ -0,0 +1,262 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Common/NaNUtils.h>
#include <cmath>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/// Detects a possible anomaly in series using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences)
class FunctionSeriesOutliersDetectTukey : public IFunction
{
public:
static constexpr auto name = "seriesOutliersDetectTukey";
static constexpr Float64 min_quartile = 2.0;
static constexpr Float64 max_quartile = 98.0;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSeriesOutliersDetectTukey>(); }
std::string getName() const override { return name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 1 && arguments.size() != 4)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} needs either 1 or 4 arguments; passed {}.",
getName(),
arguments.size());
FunctionArgumentDescriptors mandatory_args{{"time_series", &isArray<IDataType>, nullptr, "Array"}};
FunctionArgumentDescriptors optional_args{
{"min_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
{"max_percentile", &isNativeNumber<IDataType>, isColumnConst, "Number"},
{"k", &isNativeNumber<IDataType>, isColumnConst, "Number"}};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>());
}
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
ColumnPtr col = arguments[0].column;
const ColumnArray * col_arr = checkAndGetColumn<ColumnArray>(col.get());
const IColumn & arr_data = col_arr->getData();
const ColumnArray::Offsets & arr_offsets = col_arr->getOffsets();
ColumnPtr col_res;
if (input_rows_count == 0)
return ColumnArray::create(ColumnFloat64::create());
Float64 min_percentile = 0.25; /// default 25th percentile
Float64 max_percentile = 0.75; /// default 75th percentile
Float64 k = 1.50;
if (arguments.size() > 1)
{
Float64 p_min = arguments[1].column->getFloat64(0);
if (isnan(p_min) || !isFinite(p_min) || p_min < min_quartile|| p_min > max_quartile)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} must be in range [2.0, 98.0]", getName());
min_percentile = p_min / 100;
Float64 p_max = arguments[2].column->getFloat64(0);
if (isnan(p_max) || !isFinite(p_max) || p_max < min_quartile || p_max > max_quartile || p_max < min_percentile * 100)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The third argument of function {} must be in range [2.0, 98.0]", getName());
max_percentile = p_max / 100;
auto k_val = arguments[3].column->getFloat64(0);
if (k_val < 0.0 || isnan(k_val) || !isFinite(k_val))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The fourth argument of function {} must be a positive number", getName());
k = k_val;
}
if (executeNumber<UInt8>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
|| executeNumber<UInt16>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
|| executeNumber<UInt32>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
|| executeNumber<UInt64>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
|| executeNumber<Int8>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
|| executeNumber<Int16>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
|| executeNumber<Int32>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
|| executeNumber<Int64>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
|| executeNumber<Float32>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res)
|| executeNumber<Float64>(arr_data, arr_offsets, min_percentile, max_percentile, k, col_res))
{
return col_res;
}
else
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of first argument of function {}",
arguments[0].column->getName(),
getName());
}
private:
template <typename T>
bool executeNumber(
const IColumn & arr_data,
const ColumnArray::Offsets & arr_offsets,
Float64 min_percentile,
Float64 max_percentile,
Float64 k,
ColumnPtr & res_ptr) const
{
const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&arr_data);
if (!src_data_concrete)
return false;
const PaddedPODArray<T> & src_vec = src_data_concrete->getData();
auto outliers = ColumnFloat64::create();
auto & outlier_data = outliers->getData();
ColumnArray::ColumnOffsets::MutablePtr res_offsets = ColumnArray::ColumnOffsets::create();
auto & res_offsets_data = res_offsets->getData();
std::vector<Float64> src_sorted;
ColumnArray::Offset prev_src_offset = 0;
for (auto src_offset : arr_offsets)
{
chassert(prev_src_offset <= src_offset);
size_t len = src_offset - prev_src_offset;
if (len < 4)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least four data points are needed for function {}", getName());
src_sorted.assign(src_vec.begin() + prev_src_offset, src_vec.begin() + src_offset);
std::sort(src_sorted.begin(), src_sorted.end());
Float64 q1, q2;
Float64 p1 = len * min_percentile;
if (p1 == static_cast<Int64>(p1))
{
size_t index = static_cast<size_t>(p1) - 1;
q1 = (src_sorted[index] + src_sorted[index + 1]) / 2;
}
else
{
size_t index = static_cast<size_t>(std::ceil(p1)) - 1;
q1 = src_sorted[index];
}
Float64 p2 = len * max_percentile;
if (p2 == static_cast<Int64>(p2))
{
size_t index = static_cast<size_t>(p2) - 1;
q2 = (src_sorted[index] + src_sorted[index + 1]) / 2;
}
else
{
size_t index = static_cast<size_t>(std::ceil(p2)) - 1;
q2 = src_sorted[index];
}
Float64 iqr = q2 - q1; /// interquantile range
Float64 lower_fence = q1 - k * iqr;
Float64 upper_fence = q2 + k * iqr;
for (ColumnArray::Offset j = prev_src_offset; j < src_offset; ++j)
{
auto score = std::min((src_vec[j] - lower_fence), 0.0) + std::max((src_vec[j] - upper_fence), 0.0);
outlier_data.push_back(score);
}
res_offsets_data.push_back(outlier_data.size());
prev_src_offset = src_offset;
}
res_ptr = ColumnArray::create(std::move(outliers), std::move(res_offsets));
return true;
}
};
REGISTER_FUNCTION(SeriesOutliersDetectTukey)
{
factory.registerFunction<FunctionSeriesOutliersDetectTukey>(FunctionDocumentation{
.description = R"(
Detects outliers in series data using [Tukey Fences](https://en.wikipedia.org/wiki/Outlier#Tukey%27s_fences).
**Syntax**
``` sql
seriesOutliersDetectTukey(series);
seriesOutliersDetectTukey(series, min_percentile, max_percentile, k);
```
**Arguments**
- `series` - An array of numeric values.
- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25.
- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75.
- `k` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5
At least four data points are required in `series` to detect outliers.
**Returned value**
- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4, 5, 12, 45, 12, 3, 3, 4, 5, 6]) AS print_0;
```
Result:
``` text
print_0
[0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0]
```
Query:
``` sql
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0;
```
Result:
``` text
print_0
[0,0,0,0,0,0,0,0,0,19.5,0,0,0,0,0,0]
```)",
.categories{"Time series analysis"}});
}
}

View File

@ -20,6 +20,9 @@ namespace CurrentMetrics
extern const Metric MergeTreeOutdatedPartsLoaderThreads; extern const Metric MergeTreeOutdatedPartsLoaderThreads;
extern const Metric MergeTreeOutdatedPartsLoaderThreadsActive; extern const Metric MergeTreeOutdatedPartsLoaderThreadsActive;
extern const Metric MergeTreeOutdatedPartsLoaderThreadsScheduled; extern const Metric MergeTreeOutdatedPartsLoaderThreadsScheduled;
extern const Metric DatabaseReplicatedCreateTablesThreads;
extern const Metric DatabaseReplicatedCreateTablesThreadsActive;
extern const Metric DatabaseReplicatedCreateTablesThreadsScheduled;
} }
namespace DB namespace DB
@ -148,4 +151,10 @@ StaticThreadPool & getOutdatedPartsLoadingThreadPool()
return instance; return instance;
} }
StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool()
{
static StaticThreadPool instance("CreateTablesThreadPool", CurrentMetrics::DatabaseReplicatedCreateTablesThreads, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsActive, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsScheduled);
return instance;
}
} }

View File

@ -64,4 +64,7 @@ StaticThreadPool & getPartsCleaningThreadPool();
/// the number of threads by calling enableTurboMode() :-) /// the number of threads by calling enableTurboMode() :-)
StaticThreadPool & getOutdatedPartsLoadingThreadPool(); StaticThreadPool & getOutdatedPartsLoadingThreadPool();
/// ThreadPool used for creating tables in DatabaseReplicated.
StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool();
} }

View File

@ -100,7 +100,10 @@ void RewriteSumFunctionWithSumAndCountMatcher::visit(const ASTFunction & functio
if (!new_ast) if (!new_ast)
return; return;
else else
{
new_ast->setAlias(ast->tryGetAlias());
ast = new_ast; ast = new_ast;
}
} }
else if (column_id == 1) else if (column_id == 1)
{ {
@ -116,7 +119,10 @@ void RewriteSumFunctionWithSumAndCountMatcher::visit(const ASTFunction & functio
if (!new_ast) if (!new_ast)
return; return;
else else
{
new_ast->setAlias(ast->tryGetAlias());
ast = new_ast; ast = new_ast;
}
} }
} }

View File

@ -211,16 +211,17 @@ std::shared_ptr<TSystemLog> createSystemLog(
if (!settings.empty()) if (!settings.empty())
log_settings.engine += (storage_policy.empty() ? " " : ", ") + settings; log_settings.engine += (storage_policy.empty() ? " " : ", ") + settings;
} }
/// Add comment to AST. So it will be saved when the table will be renamed.
log_settings.engine += fmt::format(" COMMENT {} ", quoteString(comment));
} }
/// Validate engine definition syntax to prevent some configuration errors. /// Validate engine definition syntax to prevent some configuration errors.
ParserStorageWithComment storage_parser; ParserStorageWithComment storage_parser;
auto storage_ast = parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(),
parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(),
"Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
auto & storage_with_comment = storage_ast->as<StorageWithComment &>();
/// Add comment to AST. So it will be saved when the table will be renamed.
if (!storage_with_comment.comment || storage_with_comment.comment->as<ASTLiteral &>().value.safeGet<String>().empty())
log_settings.engine += fmt::format(" COMMENT {} ", quoteString(comment));
log_settings.queue_settings.flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds", log_settings.queue_settings.flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds",
TSystemLog::getDefaultFlushIntervalMilliseconds()); TSystemLog::getDefaultFlushIntervalMilliseconds());

View File

@ -104,6 +104,16 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
{ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << " REMOVE " << remove_property; settings.ostr << (settings.hilite ? hilite_keyword : "") << " REMOVE " << remove_property;
} }
else if (settings_changes)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " MODIFY SETTING " << (settings.hilite ? hilite_none : "");
settings_changes->formatImpl(settings, state, frame);
}
else if (settings_resets)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " RESET SETTING " << (settings.hilite ? hilite_none : "");
settings_resets->formatImpl(settings, state, frame);
}
else else
{ {
if (first) if (first)

View File

@ -130,6 +130,11 @@ namespace DB
reinterpret_cast<const uint8_t *>(internal_data.data() + start), reinterpret_cast<const uint8_t *>(internal_data.data() + start),
end - start, end - start,
reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr)); reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
else if constexpr (std::is_same_v<NumericType, Int8>)
status = builder.AppendValues(
reinterpret_cast<const int8_t *>(internal_data.data() + start),
end - start,
reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
else else
status = builder.AppendValues(internal_data.data() + start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr)); status = builder.AppendValues(internal_data.data() + start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
checkStatus(status, write_column->getName(), format_name); checkStatus(status, write_column->getName(), format_name);

View File

@ -603,6 +603,8 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(
memcpy(buf, istr.position(), bytes_to_copy); memcpy(buf, istr.position(), bytes_to_copy);
buf[bytes_to_copy] = 0; buf[bytes_to_copy] = 0;
const bool hex_like = bytes_to_copy >= 2 && buf[0] == '0' && (buf[1] == 'x' || buf[1] == 'X');
char * pos_double = buf; char * pos_double = buf;
errno = 0; errno = 0;
Float64 float_value = std::strtod(buf, &pos_double); Float64 float_value = std::strtod(buf, &pos_double);
@ -614,13 +616,13 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(
char * pos_integer = buf; char * pos_integer = buf;
errno = 0; errno = 0;
UInt64 uint_value = std::strtoull(buf, &pos_integer, 0); UInt64 uint_value = std::strtoull(buf, &pos_integer, hex_like ? 16 : 10);
if (pos_integer == pos_double && errno != ERANGE && (!negative || uint_value <= (1ULL << 63))) if (pos_integer == pos_double && errno != ERANGE && (!negative || uint_value <= (1ULL << 63)))
{ {
istr.position() += pos_integer - buf; istr.position() += pos_integer - buf;
if (negative && type_info.main_type == Type::Int64) if (negative && type_info.main_type == Type::Int64)
number = static_cast<Int64>(-uint_value); number = static_cast<Int64>(-uint_value);
else if (!negative && type_info.main_type == Type::UInt64) else if (type_info.main_type == Type::UInt64 && (!negative || uint_value == 0))
number = uint_value; number = uint_value;
else else
return false; return false;

View File

@ -247,30 +247,6 @@ Chain buildPushingToViewsChain(
{ {
insert_context->setSetting("insert_deduplicate", Field{false}); insert_context->setSetting("insert_deduplicate", Field{false});
} }
else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views &&
!insert_settings.insert_deduplication_token.value.empty())
{
/** Update deduplication token passed to dependent MV with current table id. So it is possible to properly handle
* deduplication in complex INSERT flows.
*
* Example:
*
* landing ---> mv_1_1 ---> ds_1_1 ---> mv_2_1 ---> ds_2_1 ---> mv_3_1 ---> ds_3_1
* | |
* --> mv_1_2 ---> ds_1_2 ---> mv_2_2 --
*
* Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will
* be inserted into `ds_2_1`.
*/
auto insert_deduplication_token = insert_settings.insert_deduplication_token.value;
if (table_id.hasUUID())
insert_deduplication_token += "_" + toString(table_id.uuid);
else
insert_deduplication_token += "_" + table_id.getFullNameNotQuoted();
insert_context->setSetting("insert_deduplication_token", insert_deduplication_token);
}
// Processing of blocks for MVs is done block by block, and there will // Processing of blocks for MVs is done block by block, and there will
// be no parallel reading after (plus it is not a costless operation) // be no parallel reading after (plus it is not a costless operation)
@ -327,6 +303,46 @@ Chain buildPushingToViewsChain(
auto & target_name = runtime_stats->target_name; auto & target_name = runtime_stats->target_name;
auto * view_counter_ms = &runtime_stats->elapsed_ms; auto * view_counter_ms = &runtime_stats->elapsed_ms;
const auto & insert_settings = insert_context->getSettingsRef();
ContextMutablePtr view_insert_context = insert_context;
if (!disable_deduplication_for_children &&
insert_settings.update_insert_deduplication_token_in_dependent_materialized_views &&
!insert_settings.insert_deduplication_token.value.empty())
{
/** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle
* deduplication in complex INSERT flows.
*
* Example:
*
* landing ---> mv_1_1 ---> ds_1_1 ---> mv_2_1 ---> ds_2_1 ---> mv_3_1 ---> ds_3_1
* | |
* --> mv_1_2 ---> ds_1_2 ---> mv_2_2 --
*
* Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will
* be inserted into `ds_2_1`.
*
* We are forced to use view id instead of table id because there are some possible INSERT flows where no tables
* are involved.
*
* Example:
*
* landing ---> mv_1_1 ---> ds_1_1
* | |
* --> mv_1_2 --
*
*/
auto insert_deduplication_token = insert_settings.insert_deduplication_token.value;
if (view_id.hasUUID())
insert_deduplication_token += "_" + toString(view_id.uuid);
else
insert_deduplication_token += "_" + view_id.getFullNameNotQuoted();
view_insert_context = Context::createCopy(insert_context);
view_insert_context->setSetting("insert_deduplication_token", insert_deduplication_token);
}
if (auto * materialized_view = dynamic_cast<StorageMaterializedView *>(view.get())) if (auto * materialized_view = dynamic_cast<StorageMaterializedView *>(view.get()))
{ {
auto lock = materialized_view->tryLockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); auto lock = materialized_view->tryLockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
@ -394,7 +410,7 @@ Chain buildPushingToViewsChain(
insert_columns.emplace_back(column.name); insert_columns.emplace_back(column.name);
} }
InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); InterpreterInsertQuery interpreter(nullptr, view_insert_context, false, false, false);
out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms); out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms);
out.addStorageHolder(view); out.addStorageHolder(view);
out.addStorageHolder(inner_table); out.addStorageHolder(inner_table);
@ -404,7 +420,7 @@ Chain buildPushingToViewsChain(
runtime_stats->type = QueryViewsLogElement::ViewType::LIVE; runtime_stats->type = QueryViewsLogElement::ViewType::LIVE;
query = live_view->getInnerQuery(); // Used only to log in system.query_views_log query = live_view->getInnerQuery(); // Used only to log in system.query_views_log
out = buildPushingToViewsChain( out = buildPushingToViewsChain(
view, view_metadata_snapshot, insert_context, ASTPtr(), view, view_metadata_snapshot, view_insert_context, ASTPtr(),
/* no_destination= */ true, /* no_destination= */ true,
thread_status_holder, running_group, view_counter_ms, async_insert, storage_header); thread_status_holder, running_group, view_counter_ms, async_insert, storage_header);
} }
@ -413,13 +429,13 @@ Chain buildPushingToViewsChain(
runtime_stats->type = QueryViewsLogElement::ViewType::WINDOW; runtime_stats->type = QueryViewsLogElement::ViewType::WINDOW;
query = window_view->getMergeableQuery(); // Used only to log in system.query_views_log query = window_view->getMergeableQuery(); // Used only to log in system.query_views_log
out = buildPushingToViewsChain( out = buildPushingToViewsChain(
view, view_metadata_snapshot, insert_context, ASTPtr(), view, view_metadata_snapshot, view_insert_context, ASTPtr(),
/* no_destination= */ true, /* no_destination= */ true,
thread_status_holder, running_group, view_counter_ms, async_insert); thread_status_holder, running_group, view_counter_ms, async_insert);
} }
else else
out = buildPushingToViewsChain( out = buildPushingToViewsChain(
view, view_metadata_snapshot, insert_context, ASTPtr(), view, view_metadata_snapshot, view_insert_context, ASTPtr(),
/* no_destination= */ false, /* no_destination= */ false,
thread_status_holder, running_group, view_counter_ms, async_insert); thread_status_holder, running_group, view_counter_ms, async_insert);

View File

@ -419,7 +419,11 @@ namespace
void read(GRPCQueryInfo & query_info_, const CompletionCallback & callback) override void read(GRPCQueryInfo & query_info_, const CompletionCallback & callback) override
{ {
if (!query_info.has_value()) if (!query_info.has_value())
{
callback(false); callback(false);
return;
}
query_info_ = std::move(query_info).value(); query_info_ = std::move(query_info).value();
query_info.reset(); query_info.reset();
callback(true); callback(true);
@ -486,7 +490,11 @@ namespace
void read(GRPCQueryInfo & query_info_, const CompletionCallback & callback) override void read(GRPCQueryInfo & query_info_, const CompletionCallback & callback) override
{ {
if (!query_info.has_value()) if (!query_info.has_value())
{
callback(false); callback(false);
return;
}
query_info_ = std::move(query_info).value(); query_info_ = std::move(query_info).value();
query_info.reset(); query_info.reset();
callback(true); callback(true);

View File

@ -1,6 +1,14 @@
#include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h> #include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h>
#include <Storages/MergeTree/MergeTreeIndexInverted.h> #include <Storages/MergeTree/MergeTreeIndexInverted.h>
#include <Common/ElapsedTimeProfileEventIncrement.h>
#include <Common/MemoryTrackerBlockerInThread.h> #include <Common/MemoryTrackerBlockerInThread.h>
#include <Common/logger_useful.h>
namespace ProfileEvents
{
extern const Event MergeTreeDataWriterSkipIndicesCalculationMicroseconds;
extern const Event MergeTreeDataWriterStatisticsCalculationMicroseconds;
}
namespace DB namespace DB
{ {
@ -148,6 +156,8 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
, default_codec(default_codec_) , default_codec(default_codec_)
, compute_granularity(index_granularity.empty()) , compute_granularity(index_granularity.empty())
, compress_primary_key(settings.compress_primary_key) , compress_primary_key(settings.compress_primary_key)
, execution_stats(skip_indices.size(), stats.size())
, log(getLogger(storage.getLogName() + " (DataPartWriter)"))
{ {
if (settings.blocks_are_granules_size && !index_granularity.empty()) if (settings.blocks_are_granules_size && !index_granularity.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, throw Exception(ErrorCodes::LOGICAL_ERROR,
@ -329,9 +339,12 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc
void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block) void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block)
{ {
for (const auto & stat_ptr : stats) for (size_t i = 0; i < stats.size(); ++i)
{ {
const auto & stat_ptr = stats[i];
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterStatisticsCalculationMicroseconds);
stat_ptr->update(block.getByName(stat_ptr->columnName()).column); stat_ptr->update(block.getByName(stat_ptr->columnName()).column);
execution_stats.statistics_build_us[i] += watch.elapsed();
} }
} }
@ -378,10 +391,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
writeBinaryLittleEndian(1UL, marks_out); writeBinaryLittleEndian(1UL, marks_out);
} }
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterSkipIndicesCalculationMicroseconds);
size_t pos = granule.start_row; size_t pos = granule.start_row;
skip_indices_aggregators[i]->update(skip_indexes_block, &pos, granule.rows_to_write); skip_indices_aggregators[i]->update(skip_indexes_block, &pos, granule.rows_to_write);
if (granule.is_complete) if (granule.is_complete)
++skip_index_accumulated_marks[i]; ++skip_index_accumulated_marks[i];
execution_stats.skip_indices_build_us[i] += watch.elapsed();
} }
} }
} }
@ -481,6 +498,9 @@ void MergeTreeDataPartWriterOnDisk::finishStatisticsSerialization(bool sync)
if (sync) if (sync)
stream->sync(); stream->sync();
} }
for (size_t i = 0; i < stats.size(); ++i)
LOG_DEBUG(log, "Spent {} ms calculating statistics {} for the part {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName(), data_part->name);
} }
void MergeTreeDataPartWriterOnDisk::fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums) void MergeTreeDataPartWriterOnDisk::fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums)
@ -504,6 +524,10 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync)
} }
for (auto & store: gin_index_stores) for (auto & store: gin_index_stores)
store.second->finalize(); store.second->finalize();
for (size_t i = 0; i < skip_indices.size(); ++i)
LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name, data_part->name);
gin_index_stores.clear(); gin_index_stores.clear();
skip_indices_streams.clear(); skip_indices_streams.clear();
skip_indices_aggregators.clear(); skip_indices_aggregators.clear();

View File

@ -190,6 +190,20 @@ private:
void initStatistics(); void initStatistics();
virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0; virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0;
struct ExecutionStatistics
{
ExecutionStatistics(size_t skip_indices_cnt, size_t stats_cnt)
: skip_indices_build_us(skip_indices_cnt, 0), statistics_build_us(stats_cnt, 0)
{
}
std::vector<size_t> skip_indices_build_us; // [i] corresponds to the i-th index
std::vector<size_t> statistics_build_us; // [i] corresponds to the i-th stat
};
ExecutionStatistics execution_stats;
LoggerPtr log;
}; };
} }

View File

@ -1,21 +1,22 @@
#include <Storages/MergeTree/MergeTreeDataWriter.h>
#include <Storages/MergeTree/MergedBlockOutputStream.h>
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Common/OpenTelemetryTraceContext.h> #include <DataTypes/DataTypeDate.h>
#include <Common/HashTable/HashMap.h> #include <DataTypes/DataTypeDateTime.h>
#include <Common/Exception.h> #include <DataTypes/ObjectUtils.h>
#include <Disks/createVolume.h> #include <Disks/createVolume.h>
#include <IO/HashingWriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/AggregationCommon.h> #include <Interpreters/AggregationCommon.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/MergeTreeTransaction.h> #include <Interpreters/MergeTreeTransaction.h>
#include <IO/HashingWriteBuffer.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/ObjectUtils.h>
#include <IO/WriteHelpers.h>
#include <Common/typeid_cast.h>
#include <Processors/TTL/ITTLAlgorithm.h> #include <Processors/TTL/ITTLAlgorithm.h>
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Storages/MergeTree/MergeTreeDataWriter.h>
#include <Storages/MergeTree/MergedBlockOutputStream.h>
#include <Common/ElapsedTimeProfileEventIncrement.h>
#include <Common/Exception.h>
#include <Common/HashTable/HashMap.h>
#include <Common/OpenTelemetryTraceContext.h>
#include <Common/typeid_cast.h>
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
@ -35,11 +36,16 @@ namespace ProfileEvents
extern const Event MergeTreeDataWriterRows; extern const Event MergeTreeDataWriterRows;
extern const Event MergeTreeDataWriterUncompressedBytes; extern const Event MergeTreeDataWriterUncompressedBytes;
extern const Event MergeTreeDataWriterCompressedBytes; extern const Event MergeTreeDataWriterCompressedBytes;
extern const Event MergeTreeDataWriterSortingBlocksMicroseconds;
extern const Event MergeTreeDataWriterMergingBlocksMicroseconds;
extern const Event MergeTreeDataWriterProjectionsCalculationMicroseconds;
extern const Event MergeTreeDataProjectionWriterBlocks; extern const Event MergeTreeDataProjectionWriterBlocks;
extern const Event MergeTreeDataProjectionWriterBlocksAlreadySorted; extern const Event MergeTreeDataProjectionWriterBlocksAlreadySorted;
extern const Event MergeTreeDataProjectionWriterRows; extern const Event MergeTreeDataProjectionWriterRows;
extern const Event MergeTreeDataProjectionWriterUncompressedBytes; extern const Event MergeTreeDataProjectionWriterUncompressedBytes;
extern const Event MergeTreeDataProjectionWriterCompressedBytes; extern const Event MergeTreeDataProjectionWriterCompressedBytes;
extern const Event MergeTreeDataProjectionWriterSortingBlocksMicroseconds;
extern const Event MergeTreeDataProjectionWriterMergingBlocksMicroseconds;
extern const Event RejectedInserts; extern const Event RejectedInserts;
} }
@ -472,6 +478,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
IColumn::Permutation perm; IColumn::Permutation perm;
if (!sort_description.empty()) if (!sort_description.empty())
{ {
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterSortingBlocksMicroseconds);
if (!isAlreadySorted(block, sort_description)) if (!isAlreadySorted(block, sort_description))
{ {
stableGetPermutation(block, sort_description, perm); stableGetPermutation(block, sort_description, perm);
@ -483,7 +491,10 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names; Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names;
if (context->getSettingsRef().optimize_on_insert) if (context->getSettingsRef().optimize_on_insert)
{
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterMergingBlocksMicroseconds);
block = mergeBlock(block, sort_description, partition_key_columns, perm_ptr, data.merging_params); block = mergeBlock(block, sort_description, partition_key_columns, perm_ptr, data.merging_params);
}
/// Size of part would not be greater than block.bytes() + epsilon /// Size of part would not be greater than block.bytes() + epsilon
size_t expected_size = block.bytes(); size_t expected_size = block.bytes();
@ -588,7 +599,13 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
for (const auto & projection : metadata_snapshot->getProjections()) for (const auto & projection : metadata_snapshot->getProjections())
{ {
auto projection_block = projection.calculate(block, context); Block projection_block;
{
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataWriterProjectionsCalculationMicroseconds);
projection_block = projection.calculate(block, context);
LOG_DEBUG(log, "Spent {} ms calculating projection {} for the part {}", watch.elapsed() / 1000, projection.name, new_data_part->name);
}
if (projection_block.rows()) if (projection_block.rows())
{ {
auto proj_temp_part = writeProjectionPart(data, log, projection_block, projection, new_data_part.get()); auto proj_temp_part = writeProjectionPart(data, log, projection_block, projection, new_data_part.get());
@ -685,6 +702,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
IColumn::Permutation perm; IColumn::Permutation perm;
if (!sort_description.empty()) if (!sort_description.empty())
{ {
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataProjectionWriterSortingBlocksMicroseconds);
if (!isAlreadySorted(block, sort_description)) if (!isAlreadySorted(block, sort_description))
{ {
stableGetPermutation(block, sort_description, perm); stableGetPermutation(block, sort_description, perm);
@ -696,6 +715,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
if (projection.type == ProjectionDescription::Type::Aggregate) if (projection.type == ProjectionDescription::Type::Aggregate)
{ {
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MergeTreeDataProjectionWriterMergingBlocksMicroseconds);
MergeTreeData::MergingParams projection_merging_params; MergeTreeData::MergingParams projection_merging_params;
projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating; projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating;
block = mergeBlock(block, sort_description, {}, perm_ptr, projection_merging_params); block = mergeBlock(block, sort_description, {}, perm_ptr, projection_merging_params);

View File

@ -28,6 +28,11 @@
#include <Common/ProfileEventsScope.h> #include <Common/ProfileEventsScope.h>
namespace ProfileEvents
{
extern const Event MutateTaskProjectionsCalculationMicroseconds;
}
namespace CurrentMetrics namespace CurrentMetrics
{ {
extern const Metric PartMutation; extern const Metric PartMutation;
@ -1242,7 +1247,13 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i)
{ {
const auto & projection = *ctx->projections_to_build[i]; const auto & projection = *ctx->projections_to_build[i];
auto projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context));
Block projection_block;
{
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds);
projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context));
}
if (projection_block) if (projection_block)
{ {
auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart(

View File

@ -91,6 +91,8 @@ void NATSConnectionManager::connectImpl()
natsOptions_SetUserInfo(options, configuration.username.c_str(), configuration.password.c_str()); natsOptions_SetUserInfo(options, configuration.username.c_str(), configuration.password.c_str());
if (!configuration.token.empty()) if (!configuration.token.empty())
natsOptions_SetToken(options, configuration.token.c_str()); natsOptions_SetToken(options, configuration.token.c_str());
if (!configuration.credential_file.empty())
natsOptions_SetUserCredentialsFromFiles(options, configuration.credential_file.c_str(), nullptr);
if (configuration.secure) if (configuration.secure)
{ {

View File

@ -14,6 +14,7 @@ struct NATSConfiguration
String username; String username;
String password; String password;
String token; String token;
String credential_file;
int max_reconnect; int max_reconnect;
int reconnect_wait; int reconnect_wait;

View File

@ -25,6 +25,7 @@ class ASTStorage;
M(String, nats_username, "", "NATS username", 0) \ M(String, nats_username, "", "NATS username", 0) \
M(String, nats_password, "", "NATS password", 0) \ M(String, nats_password, "", "NATS password", 0) \
M(String, nats_token, "", "NATS token", 0) \ M(String, nats_token, "", "NATS token", 0) \
M(String, nats_credential_file, "", "Path to a NATS credentials file", 0) \
M(UInt64, nats_startup_connect_tries, 5, "Number of connect tries at startup", 0) \ M(UInt64, nats_startup_connect_tries, 5, "Number of connect tries at startup", 0) \
M(UInt64, nats_max_rows_per_message, 1, "The maximum number of rows produced in one message for row-based formats.", 0) \ M(UInt64, nats_max_rows_per_message, 1, "The maximum number of rows produced in one message for row-based formats.", 0) \
M(StreamingHandleErrorMode, nats_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for NATS engine. Possible values: default (throw an exception after nats_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \ M(StreamingHandleErrorMode, nats_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for NATS engine. Possible values: default (throw an exception after nats_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \

View File

@ -67,6 +67,7 @@ StorageNATS::StorageNATS(
auto nats_username = getContext()->getMacros()->expand(nats_settings->nats_username); auto nats_username = getContext()->getMacros()->expand(nats_settings->nats_username);
auto nats_password = getContext()->getMacros()->expand(nats_settings->nats_password); auto nats_password = getContext()->getMacros()->expand(nats_settings->nats_password);
auto nats_token = getContext()->getMacros()->expand(nats_settings->nats_token); auto nats_token = getContext()->getMacros()->expand(nats_settings->nats_token);
auto nats_credential_file = getContext()->getMacros()->expand(nats_settings->nats_credential_file);
configuration = configuration =
{ {
@ -75,6 +76,7 @@ StorageNATS::StorageNATS(
.username = nats_username.empty() ? getContext()->getConfigRef().getString("nats.user", "") : nats_username, .username = nats_username.empty() ? getContext()->getConfigRef().getString("nats.user", "") : nats_username,
.password = nats_password.empty() ? getContext()->getConfigRef().getString("nats.password", "") : nats_password, .password = nats_password.empty() ? getContext()->getConfigRef().getString("nats.password", "") : nats_password,
.token = nats_token.empty() ? getContext()->getConfigRef().getString("nats.token", "") : nats_token, .token = nats_token.empty() ? getContext()->getConfigRef().getString("nats.token", "") : nats_token,
.credential_file = nats_credential_file.empty() ? getContext()->getConfigRef().getString("nats.credential_file", "") : nats_credential_file,
.max_reconnect = static_cast<int>(nats_settings->nats_max_reconnect.value), .max_reconnect = static_cast<int>(nats_settings->nats_max_reconnect.value),
.reconnect_wait = static_cast<int>(nats_settings->nats_reconnect_wait.value), .reconnect_wait = static_cast<int>(nats_settings->nats_reconnect_wait.value),
.secure = nats_settings->nats_secure.value .secure = nats_settings->nats_secure.value

View File

@ -313,7 +313,7 @@ void MaterializedPostgreSQLConsumer::readTupleData(
Int32 col_len = readInt32(message, pos, size); Int32 col_len = readInt32(message, pos, size);
String value; String value;
for (Int32 i = 0; i < col_len; ++i) for (Int32 i = 0; i < col_len; ++i)
value += readInt8(message, pos, size); value += static_cast<char>(readInt8(message, pos, size));
insertValue(storage_data, value, column_idx); insertValue(storage_data, value, column_idx);
break; break;

View File

@ -1533,10 +1533,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(
IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result) IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, const ColumnWithTypeAndName & result)
{ {
const auto & slot_to_shard = cluster->getSlotToShard(); const auto & slot_to_shard = cluster->getSlotToShard();
const IColumn * column = result.column.get(); const IColumn * column = result.column.get();
if (const auto * col_const = typeid_cast<const ColumnConst *>(column))
column = &col_const->getDataColumn();
// If result.type is DataTypeLowCardinality, do shard according to its dictionaryType // If result.type is DataTypeLowCardinality, do shard according to its dictionaryType
#define CREATE_FOR_TYPE(TYPE) \ #define CREATE_FOR_TYPE(TYPE) \

View File

@ -180,7 +180,7 @@ using Paths = std::deque<std::pair<String, ZkPathType>>;
class ReadFromSystemZooKeeper final : public SourceStepWithFilter class ReadFromSystemZooKeeper final : public SourceStepWithFilter
{ {
public: public:
ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info_, ContextPtr context_); ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info_, UInt64 max_block_size_, ContextPtr context_);
String getName() const override { return "ReadFromSystemZooKeeper"; } String getName() const override { return "ReadFromSystemZooKeeper"; }
@ -189,13 +189,41 @@ public:
void applyFilters() override; void applyFilters() override;
private: private:
void fillData(MutableColumns & res_columns);
std::shared_ptr<const StorageLimitsList> storage_limits; std::shared_ptr<const StorageLimitsList> storage_limits;
const UInt64 max_block_size;
ContextPtr context; ContextPtr context;
Paths paths; Paths paths;
}; };
class SystemZooKeeperSource : public ISource
{
public:
SystemZooKeeperSource(
Paths && paths_,
Block header_,
UInt64 max_block_size_,
ContextPtr context_)
: ISource(header_)
, max_block_size(max_block_size_)
, paths(std::move(paths_))
, context(std::move(context_))
{
}
String getName() const override { return "SystemZooKeeper"; }
protected:
Chunk generate() override;
private:
const UInt64 max_block_size;
Paths paths;
ContextPtr context;
bool started = false;
};
StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_) StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_)
: IStorage(table_id_) : IStorage(table_id_)
{ {
@ -211,11 +239,11 @@ void StorageSystemZooKeeper::read(
SelectQueryInfo & query_info, SelectQueryInfo & query_info,
ContextPtr context, ContextPtr context,
QueryProcessingStage::Enum /*processed_stage*/, QueryProcessingStage::Enum /*processed_stage*/,
size_t /*max_block_size*/, size_t max_block_size,
size_t /*num_streams*/) size_t /*num_streams*/)
{ {
auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals());
auto read_step = std::make_unique<ReadFromSystemZooKeeper>(header, query_info, context); auto read_step = std::make_unique<ReadFromSystemZooKeeper>(header, query_info, max_block_size, context);
query_plan.addStep(std::move(read_step)); query_plan.addStep(std::move(read_step));
} }
@ -414,7 +442,7 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont
for (const auto * node : filter_nodes) for (const auto * node : filter_nodes)
extractPathImpl(*node, res, context, allow_unrestricted); extractPathImpl(*node, res, context, allow_unrestricted);
if (filter_nodes.empty() && allow_unrestricted) if (res.empty() && allow_unrestricted)
res.emplace_back("/", ZkPathType::Recurse); res.emplace_back("/", ZkPathType::Recurse);
return res; return res;
@ -426,8 +454,26 @@ void ReadFromSystemZooKeeper::applyFilters()
paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper);
} }
void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
Chunk SystemZooKeeperSource::generate()
{ {
if (paths.empty())
{
if (!started)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"SELECT from system.zookeeper table must contain condition like path = 'path' "
"or path IN ('path1','path2'...) or path IN (subquery) "
"in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`.");
/// No more work
return {};
}
started = true;
MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns();
size_t row_count = 0;
QueryStatusPtr query_status = context->getProcessListElement(); QueryStatusPtr query_status = context->getProcessListElement();
const auto & settings = context->getSettingsRef(); const auto & settings = context->getSettingsRef();
@ -453,12 +499,6 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
return zookeeper; return zookeeper;
}; };
if (paths.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"SELECT from system.zookeeper table must contain condition like path = 'path' "
"or path IN ('path1','path2'...) or path IN (subquery) "
"in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`.");
const Int64 max_inflight_requests = std::max<Int64>(1, context->getSettingsRef().max_download_threads.value); const Int64 max_inflight_requests = std::max<Int64>(1, context->getSettingsRef().max_download_threads.value);
struct ListTask struct ListTask
@ -476,6 +516,16 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
if (query_status) if (query_status)
query_status->checkTimeLimit(); query_status->checkTimeLimit();
/// Check if the block is big enough already
if (max_block_size > 0 && row_count > 0)
{
size_t total_size = 0;
for (const auto & column : res_columns)
total_size += column->byteSize();
if (total_size > max_block_size)
break;
}
list_tasks.clear(); list_tasks.clear();
std::vector<String> paths_to_list; std::vector<String> paths_to_list;
while (!paths.empty() && static_cast<Int64>(list_tasks.size()) < max_inflight_requests) while (!paths.empty() && static_cast<Int64>(list_tasks.size()) < max_inflight_requests)
@ -519,8 +569,8 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
continue; continue;
auto & task = list_tasks[list_task_idx]; auto & task = list_tasks[list_task_idx];
if (auto elem = context->getProcessListElement()) if (query_status)
elem->checkTimeLimit(); query_status->checkTimeLimit();
Strings nodes = std::move(list_result.names); Strings nodes = std::move(list_result.names);
@ -557,8 +607,8 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
auto & get_task = get_tasks[i]; auto & get_task = get_tasks[i];
auto & list_task = list_tasks[get_task.list_task_idx]; auto & list_task = list_tasks[get_task.list_task_idx];
if (auto elem = context->getProcessListElement()) if (query_status)
elem->checkTimeLimit(); query_status->checkTimeLimit();
// Deduplication // Deduplication
String key = list_task.path_part + '/' + get_task.node; String key = list_task.path_part + '/' + get_task.node;
@ -584,17 +634,22 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns)
res_columns[col_num++]->insert( res_columns[col_num++]->insert(
list_task.path); /// This is the original path. In order to process the request, condition in WHERE should be triggered. list_task.path); /// This is the original path. In order to process the request, condition in WHERE should be triggered.
++row_count;
if (list_task.path_type != ZkPathType::Exact && res.stat.numChildren > 0) if (list_task.path_type != ZkPathType::Exact && res.stat.numChildren > 0)
{ {
paths.emplace_back(key, ZkPathType::Recurse); paths.emplace_back(key, ZkPathType::Recurse);
} }
} }
} }
return Chunk(std::move(res_columns), row_count);
} }
ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info, ContextPtr context_) ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info, UInt64 max_block_size_, ContextPtr context_)
: SourceStepWithFilter({.header = header}) : SourceStepWithFilter({.header = header})
, storage_limits(query_info.storage_limits) , storage_limits(query_info.storage_limits)
, max_block_size(max_block_size_)
, context(std::move(context_)) , context(std::move(context_))
{ {
} }
@ -602,13 +657,7 @@ ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQue
void ReadFromSystemZooKeeper::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) void ReadFromSystemZooKeeper::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
{ {
const auto & header = getOutputStream().header; const auto & header = getOutputStream().header;
MutableColumns res_columns = header.cloneEmptyColumns(); auto source = std::make_shared<SystemZooKeeperSource>(std::move(paths), header, max_block_size, context);
fillData(res_columns);
UInt64 num_rows = res_columns.at(0)->size();
Chunk chunk(std::move(res_columns), num_rows);
auto source = std::make_shared<SourceFromSingleChunk>(header, std::move(chunk));
source->setStorageLimits(storage_limits); source->setStorageLimits(storage_limits);
processors.emplace_back(source); processors.emplace_back(source);
pipeline.init(Pipe(std::move(source))); pipeline.init(Pipe(std::move(source)));

View File

@ -7,14 +7,20 @@
namespace DB namespace DB
{ {
template<typename StorageT, typename... StorageArgs> template <int Length>
void attach(ContextPtr context, IDatabase & system_database, const String & table_name, const String & comment, StorageArgs && ... args) using StringLiteral = const char(&)[Length];
template<typename StorageT, int CommentSize, typename... StorageArgs>
void attach(ContextPtr context, IDatabase & system_database, const String & table_name, StringLiteral<CommentSize> comment, StorageArgs && ... args)
{ {
static_assert(CommentSize > 15, "The comment for a system table is too short or empty");
assert(system_database.getDatabaseName() == DatabaseCatalog::SYSTEM_DATABASE); assert(system_database.getDatabaseName() == DatabaseCatalog::SYSTEM_DATABASE);
auto table_id = StorageID::createEmpty();
if (system_database.getUUID() == UUIDHelpers::Nil) if (system_database.getUUID() == UUIDHelpers::Nil)
{ {
/// Attach to Ordinary database. /// Attach to Ordinary database.
auto table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name); table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name);
system_database.attachTable(context, table_name, std::make_shared<StorageT>(table_id, std::forward<StorageArgs>(args)...)); system_database.attachTable(context, table_name, std::make_shared<StorageT>(table_id, std::forward<StorageArgs>(args)...));
} }
else else
@ -22,18 +28,18 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl
/// Attach to Atomic database. /// Attach to Atomic database.
/// NOTE: UUIDs are not persistent, but it's ok since no data are stored on disk for these storages /// NOTE: UUIDs are not persistent, but it's ok since no data are stored on disk for these storages
/// and path is actually not used /// and path is actually not used
auto table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name, UUIDHelpers::generateV4()); table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name, UUIDHelpers::generateV4());
DatabaseCatalog::instance().addUUIDMapping(table_id.uuid); DatabaseCatalog::instance().addUUIDMapping(table_id.uuid);
String path = "store/" + DatabaseCatalog::getPathForUUID(table_id.uuid); String path = "store/" + DatabaseCatalog::getPathForUUID(table_id.uuid);
system_database.attachTable(context, table_name, std::make_shared<StorageT>(table_id, std::forward<StorageArgs>(args)...), path); system_database.attachTable(context, table_name, std::make_shared<StorageT>(table_id, std::forward<StorageArgs>(args)...), path);
/// Set the comment
auto table = DatabaseCatalog::instance().getTable(table_id, context);
assert(table);
auto metadata = table->getInMemoryMetadata();
metadata.comment = comment;
table->setInMemoryMetadata(metadata);
} }
/// Set the comment
auto table = DatabaseCatalog::instance().getTable(table_id, context);
assert(table);
auto metadata = table->getInMemoryMetadata();
metadata.comment = comment;
table->setInMemoryMetadata(metadata);
} }
} }

View File

@ -10,7 +10,6 @@ test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_ove
test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task
test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database
test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster
test_profile_events_s3/test.py::test_profile_events
test_replicating_constants/test.py::test_different_versions test_replicating_constants/test.py::test_different_versions
test_select_access_rights/test_main.py::test_alias_columns test_select_access_rights/test_main.py::test_alias_columns
test_select_access_rights/test_main.py::test_select_count test_select_access_rights/test_main.py::test_select_count

View File

@ -23,7 +23,6 @@
02428_parameterized_view 02428_parameterized_view
02493_inconsistent_hex_and_binary_number 02493_inconsistent_hex_and_binary_number
02575_merge_prewhere_different_default_kind 02575_merge_prewhere_different_default_kind
00917_multiple_joins_denny_crane
02725_agg_projection_resprect_PK 02725_agg_projection_resprect_PK
02763_row_policy_storage_merge_alias 02763_row_policy_storage_merge_alias
02818_parameterized_view_with_cte_multiple_usage 02818_parameterized_view_with_cte_multiple_usage

View File

@ -10,14 +10,17 @@ from pathlib import Path
from shutil import copy2 from shutil import copy2
from typing import List, Optional, Union from typing import List, Optional, Union
# isort: off
from github.Commit import Commit from github.Commit import Commit
# isort: on
from build_download_helper import download_build_with_progress from build_download_helper import download_build_with_progress
from commit_status_helper import post_commit_status from commit_status_helper import post_commit_status
from compress_files import SUFFIX, compress_fast, decompress_fast from compress_files import SUFFIX, compress_fast, decompress_fast
from env_helper import CI, RUNNER_TEMP, S3_BUILDS_BUCKET from env_helper import CI, RUNNER_TEMP, S3_BUILDS_BUCKET
from git_helper import SHA_REGEXP from git_helper import SHA_REGEXP
from report import HEAD_HTML_TEMPLATE, FOOTER_HTML_TEMPLATE from report import FOOTER_HTML_TEMPLATE, HEAD_HTML_TEMPLATE, SUCCESS
from s3_helper import S3Helper from s3_helper import S3Helper
ARTIFACTS_PATH = Path(RUNNER_TEMP) / "artifacts" ARTIFACTS_PATH = Path(RUNNER_TEMP) / "artifacts"
@ -128,9 +131,7 @@ class ArtifactsHelper:
@staticmethod @staticmethod
def post_commit_status(commit: Commit, url: str) -> None: def post_commit_status(commit: Commit, url: str) -> None:
post_commit_status( post_commit_status(commit, SUCCESS, url, "Artifacts for workflow", "Artifacts")
commit, "success", url, "Artifacts for workflow", "Artifacts"
)
def _regenerate_index(self) -> None: def _regenerate_index(self) -> None:
if CI: if CI:

View File

@ -7,13 +7,11 @@ import sys
from pathlib import Path from pathlib import Path
from build_download_helper import get_build_name_for_check, read_build_urls from build_download_helper import get_build_name_for_check, read_build_urls
from clickhouse_helper import ( from clickhouse_helper import CiLogsCredentials
CiLogsCredentials,
)
from docker_images_helper import DockerImage, get_docker_image, pull_image from docker_images_helper import DockerImage, get_docker_image, pull_image
from env_helper import REPORT_PATH, TEMP_PATH from env_helper import REPORT_PATH, TEMP_PATH
from pr_info import PRInfo from pr_info import PRInfo
from report import JobReport from report import FAIL, FAILURE, OK, SUCCESS, JobReport, TestResult
from stopwatch import Stopwatch from stopwatch import Stopwatch
from tee_popen import TeePopen from tee_popen import TeePopen
@ -113,7 +111,6 @@ def main():
paths = { paths = {
"run.log": run_log_path, "run.log": run_log_path,
"main.log": main_log_path, "main.log": main_log_path,
"fuzzer.log": workspace_path / "fuzzer.log",
"report.html": workspace_path / "report.html", "report.html": workspace_path / "report.html",
"core.zst": workspace_path / "core.zst", "core.zst": workspace_path / "core.zst",
"dmesg.log": workspace_path / "dmesg.log", "dmesg.log": workspace_path / "dmesg.log",
@ -122,12 +119,20 @@ def main():
compressed_server_log_path = workspace_path / "server.log.zst" compressed_server_log_path = workspace_path / "server.log.zst"
if compressed_server_log_path.exists(): if compressed_server_log_path.exists():
paths["server.log.zst"] = compressed_server_log_path paths["server.log.zst"] = compressed_server_log_path
else:
# The script can fail before the invocation of `zstd`, but we are still interested in its log:
not_compressed_server_log_path = workspace_path / "server.log"
if not_compressed_server_log_path.exists():
paths["server.log"] = not_compressed_server_log_path
# The script can fail before the invocation of `zstd`, but we are still interested in its log: # Same idea but with the fuzzer log
compressed_fuzzer_log_path = workspace_path / "fuzzer.log.zst"
not_compressed_server_log_path = workspace_path / "server.log" if compressed_fuzzer_log_path.exists():
if not_compressed_server_log_path.exists(): paths["fuzzer.log.zst"] = compressed_fuzzer_log_path
paths["server.log"] = not_compressed_server_log_path else:
not_compressed_fuzzer_log_path = workspace_path / "fuzzer.log"
if not_compressed_fuzzer_log_path.exists():
paths["fuzzer.log"] = not_compressed_fuzzer_log_path
# Try to get status message saved by the fuzzer # Try to get status message saved by the fuzzer
try: try:
@ -137,12 +142,16 @@ def main():
with open(workspace_path / "description.txt", "r", encoding="utf-8") as desc_f: with open(workspace_path / "description.txt", "r", encoding="utf-8") as desc_f:
description = desc_f.readline().rstrip("\n") description = desc_f.readline().rstrip("\n")
except: except:
status = "failure" status = FAILURE
description = "Task failed: $?=" + str(retcode) description = "Task failed: $?=" + str(retcode)
test_result = TestResult(description, OK)
if "fail" in status:
test_result.status = FAIL
JobReport( JobReport(
description=description, description=description,
test_results=[], test_results=[test_result],
status=status, status=status,
start_time=stopwatch.start_time_str, start_time=stopwatch.start_time_str,
duration=stopwatch.duration_seconds, duration=stopwatch.duration_seconds,
@ -151,7 +160,7 @@ def main():
).dump() ).dump()
logging.info("Result: '%s', '%s'", status, description) logging.info("Result: '%s', '%s'", status, description)
if status == "failure": if status != SUCCESS:
sys.exit(1) sys.exit(1)

Some files were not shown because too many files have changed in this diff Show More