Merge branch 'master' into Azure_backup

This commit is contained in:
Smita Kulkarni 2024-02-07 13:48:46 +01:00
commit e594608c42
479 changed files with 23737 additions and 4730 deletions

View File

@ -138,19 +138,26 @@ jobs:
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker server and keeper images
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################

View File

@ -35,7 +35,7 @@ jobs:
- name: PrepareRunConfig
id: runconfig
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --rebuild-all-binaries --outfile ${{ runner.temp }}/ci_run_data.json
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json
echo "::group::CI configuration"
python3 -m json.tool ${{ runner.temp }}/ci_run_data.json
@ -242,20 +242,26 @@ jobs:
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker server and keeper images
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
# FIXME: avoid using 0 checkout
checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################
@ -313,22 +319,15 @@ jobs:
run_command: |
python3 build_report_check.py "$CHECK_NAME"
MarkReleaseReady:
needs: [RunConfig, BuilderBinDarwin, BuilderBinDarwinAarch64, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
- BuilderDebRelease
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Mark Commit Release Ready
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 mark_release_ready.py
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Mark Commit Release Ready
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 mark_release_ready.py
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################

View File

@ -104,7 +104,7 @@ jobs:
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Fast tests
test_name: Fast test
runner_type: builder
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
@ -273,19 +273,26 @@ jobs:
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker server and keeper images
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################
@ -484,11 +491,11 @@ jobs:
run_command: |
TEMP_PATH="${TEMP_PATH}/integration" \
python3 integration_test_check.py "Integration $CHECK_NAME" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
--validate-bugfix --post-commit-status=file || echo "ignore exit code"
TEMP_PATH="${TEMP_PATH}/stateless" \
python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \
--validate-bugfix --post-commit-status=file || echo 'ignore exit code'
--validate-bugfix --post-commit-status=file || echo "ignore exit code"
python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/functional_commit_status.tsv" "${TEMP_PATH}/integration/integration_commit_status.tsv"
##############################################################################################

View File

@ -41,7 +41,7 @@ jobs:
id: runconfig
run: |
echo "::group::configure CI run"
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --rebuild-all-binaries --outfile ${{ runner.temp }}/ci_run_data.json
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json
echo "::endgroup::"
echo "::group::CI run configure results"
python3 -m json.tool ${{ runner.temp }}/ci_run_data.json
@ -153,19 +153,26 @@ jobs:
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker server and keeper images
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
checkout_depth: 0
run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################
@ -456,7 +463,8 @@ jobs:
FinishCheck:
if: ${{ !failure() && !cancelled() }}
needs:
- DockerServerImages
- DockerServerImage
- DockerKeeperImage
- BuilderReport
- BuilderSpecialReport
- MarkReleaseReady

View File

@ -55,7 +55,7 @@ jobs:
python3 ./utils/security-generator/generate_security.py > SECURITY.md
git diff HEAD
- name: Create Pull Request
uses: peter-evans/create-pull-request@v3
uses: peter-evans/create-pull-request@v6
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}

2
.gitmodules vendored
View File

@ -99,7 +99,7 @@
url = https://github.com/awslabs/aws-c-event-stream
[submodule "aws-c-common"]
path = contrib/aws-c-common
url = https://github.com/ClickHouse/aws-c-common
url = https://github.com/awslabs/aws-c-common.git
[submodule "aws-checksums"]
path = contrib/aws-checksums
url = https://github.com/awslabs/aws-checksums

File diff suppressed because it is too large Load Diff

View File

@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported |
|:-|:-|
| 24.1 | ✔️ |
| 23.12 | ✔️ |
| 23.11 | ✔️ |
| 23.10 | ✔️ |
| 23.10 | |
| 23.9 | ❌ |
| 23.8 | ✔️ |
| 23.7 | ❌ |

View File

@ -64,19 +64,14 @@ using ComparatorWrapper = Comparator;
#include <miniselect/floyd_rivest_select.h>
template <typename RandomIt>
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
template <typename RandomIt, typename Compare>
void nth_element(RandomIt first, RandomIt nth, RandomIt last, Compare compare)
{
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
comparator compare;
ComparatorWrapper<comparator> compare_wrapper = compare;
#ifndef NDEBUG
::shuffle(first, last);
#endif
ComparatorWrapper<Compare> compare_wrapper = compare;
::miniselect::floyd_rivest_select(first, nth, last, compare_wrapper);
#ifndef NDEBUG
@ -87,6 +82,15 @@ void nth_element(RandomIt first, RandomIt nth, RandomIt last)
#endif
}
template <typename RandomIt>
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
{
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
::nth_element(first, nth, last, comparator());
}
template <typename RandomIt, typename Compare>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare)
{

View File

@ -904,13 +904,6 @@ public:
/// Returns a pointer to the Logger with the given name if it
/// exists, or a null pointer otherwise.
static bool destroy(const std::string & name);
/// Destroys the logger with the specified name. Does nothing
/// if the logger is not found.
///
/// After a logger has been destroyed, all references to it
/// become invalid.
static void shutdown();
/// Shuts down the logging framework and releases all
/// Loggers.
@ -940,8 +933,6 @@ public:
static const std::string ROOT; /// The name of the root logger ("").
protected:
typedef std::map<std::string, Logger *> LoggerMap;
Logger(const std::string & name, Channel * pChannel, int level);
~Logger();
@ -962,8 +953,6 @@ private:
std::string _name;
Channel * _pChannel;
std::atomic_int _level;
static LoggerMap * _pLoggerMap;
};

View File

@ -20,6 +20,7 @@
#include "Poco/NumberParser.h"
#include "Poco/String.h"
#include <cassert>
#include <mutex>
namespace
@ -37,12 +38,20 @@ std::mutex & getLoggerMutex()
return *logger_mutex;
}
struct LoggerEntry
{
Poco::Logger * logger;
bool owned_by_shared_ptr = false;
};
using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
LoggerMap * _pLoggerMap = nullptr;
}
namespace Poco {
Logger::LoggerMap* Logger::_pLoggerMap = 0;
const std::string Logger::ROOT;
@ -134,12 +143,12 @@ void Logger::setLevel(const std::string& name, int level)
if (_pLoggerMap)
{
std::string::size_type len = name.length();
for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it)
for (auto & it : *_pLoggerMap)
{
if (len == 0 ||
(it->first.compare(0, len, name) == 0 && (it->first.length() == len || it->first[len] == '.')))
(it.first.compare(0, len, name) == 0 && (it.first.length() == len || it.first[len] == '.')))
{
it->second->setLevel(level);
it.second.logger->setLevel(level);
}
}
}
@ -153,12 +162,12 @@ void Logger::setChannel(const std::string& name, Channel* pChannel)
if (_pLoggerMap)
{
std::string::size_type len = name.length();
for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it)
for (auto & it : *_pLoggerMap)
{
if (len == 0 ||
(it->first.compare(0, len, name) == 0 && (it->first.length() == len || it->first[len] == '.')))
(it.first.compare(0, len, name) == 0 && (it.first.length() == len || it.first[len] == '.')))
{
it->second->setChannel(pChannel);
it.second.logger->setChannel(pChannel);
}
}
}
@ -172,12 +181,12 @@ void Logger::setProperty(const std::string& loggerName, const std::string& prope
if (_pLoggerMap)
{
std::string::size_type len = loggerName.length();
for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it)
for (auto & it : *_pLoggerMap)
{
if (len == 0 ||
(it->first.compare(0, len, loggerName) == 0 && (it->first.length() == len || it->first[len] == '.')))
(it.first.compare(0, len, loggerName) == 0 && (it.first.length() == len || it.first[len] == '.')))
{
it->second->setProperty(propertyName, value);
it.second.logger->setProperty(propertyName, value);
}
}
}
@ -304,35 +313,84 @@ struct LoggerDeleter
{
void operator()(Poco::Logger * logger)
{
if (Logger::destroy(logger->name()))
return;
std::lock_guard<std::mutex> lock(getLoggerMutex());
logger->release();
/// If logger infrastructure is destroyed just decrement logger reference count
if (!_pLoggerMap)
{
logger->release();
return;
}
auto it = _pLoggerMap->find(logger->name());
assert(it != _pLoggerMap->end());
/** If reference count is 1, this means this shared pointer owns logger
* and need destroy it.
*/
size_t reference_count_before_release = logger->release();
if (reference_count_before_release == 1)
{
assert(it->second.owned_by_shared_ptr);
_pLoggerMap->erase(it);
}
}
};
inline LoggerPtr makeLoggerPtr(Logger & logger)
{
logger.duplicate();
return std::shared_ptr<Logger>(&logger, LoggerDeleter());
}
}
Logger& Logger::get(const std::string& name)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
return unsafeGet(name);
Logger & logger = unsafeGet(name);
/** If there are already shared pointer created for this logger
* we need to increment Logger reference count and now logger
* is owned by logger infrastructure.
*/
auto it = _pLoggerMap->find(name);
if (it->second.owned_by_shared_ptr)
{
it->second.logger->duplicate();
it->second.owned_by_shared_ptr = false;
}
return logger;
}
LoggerPtr Logger::getShared(const std::string & name)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
bool logger_exists = _pLoggerMap && _pLoggerMap->contains(name);
return makeLoggerPtr(unsafeGet(name));
Logger & logger = unsafeGet(name);
/** If logger already exists, then this shared pointer does not own it.
* If logger does not exists, logger infrastructure could be already destroyed
* or logger was created.
*/
if (logger_exists)
{
logger.duplicate();
}
else if (_pLoggerMap)
{
_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
}
return makeLoggerPtr(logger);
}
Logger& Logger::unsafeGet(const std::string& name)
{
Logger* pLogger = find(name);
@ -364,7 +422,10 @@ LoggerPtr Logger::createShared(const std::string & name, Channel * pChannel, int
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
return makeLoggerPtr(unsafeCreate(name, pChannel, level));
Logger & logger = unsafeCreate(name, pChannel, level);
_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
return makeLoggerPtr(logger);
}
Logger& Logger::root()
@ -389,10 +450,14 @@ void Logger::shutdown()
if (_pLoggerMap)
{
for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it)
for (auto & it : *_pLoggerMap)
{
it->second->release();
if (it.second.owned_by_shared_ptr)
continue;
it.second.logger->release();
}
delete _pLoggerMap;
_pLoggerMap = 0;
}
@ -405,32 +470,12 @@ Logger* Logger::find(const std::string& name)
{
LoggerMap::iterator it = _pLoggerMap->find(name);
if (it != _pLoggerMap->end())
return it->second;
return it->second.logger;
}
return 0;
}
bool Logger::destroy(const std::string& name)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
if (_pLoggerMap)
{
LoggerMap::iterator it = _pLoggerMap->find(name);
if (it != _pLoggerMap->end())
{
if (it->second->release() == 1)
_pLoggerMap->erase(it);
return true;
}
}
return false;
}
void Logger::names(std::vector<std::string>& names)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
@ -538,7 +583,8 @@ void Logger::add(Logger* pLogger)
{
if (!_pLoggerMap)
_pLoggerMap = new LoggerMap;
_pLoggerMap->insert(LoggerMap::value_type(pLogger->name(), pLogger));
_pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
}

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54482)
SET(VERSION_REVISION 54483)
SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 1)
SET(VERSION_MINOR 2)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH a2faa65b080a587026c86844f3a20c74d23a86f8)
SET(VERSION_DESCRIBE v24.1.1.1-testing)
SET(VERSION_STRING 24.1.1.1)
SET(VERSION_GITHASH 5a024dfc0936e062770d0cfaad0805b57c1fba17)
SET(VERSION_DESCRIBE v24.2.1.1-testing)
SET(VERSION_STRING 24.2.1.1)
# end of autochange

View File

@ -79,7 +79,10 @@ if (SANITIZE_COVERAGE)
# But the actual coverage will be enabled on per-library basis: for ClickHouse code, but not for 3rd-party.
set (COVERAGE_FLAGS "-fsanitize-coverage=trace-pc-guard,pc-table")
endif()
set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table")
set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table)
set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table")
set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table)
else()
set (WITHOUT_COVERAGE_FLAGS "")
set (WITHOUT_COVERAGE_FLAGS_LIST "")
endif()

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit ca02358dcc7ce3ab733dd4cbcc32734eecfa4ee3
Subproject commit 4ec215f3607c2111bf2cc91ba842046a6b5eb0c4

2
contrib/aws-c-auth vendored

@ -1 +1 @@
Subproject commit 97133a2b5dbca1ccdf88cd6f44f39d0531d27d12
Subproject commit baeffa791d9d1cf61460662a6d9ac2186aaf05df

2
contrib/aws-c-cal vendored

@ -1 +1 @@
Subproject commit 85dd7664b786a389c6fb1a6f031ab4bb2282133d
Subproject commit 9453687ff5493ba94eaccf8851200565c4364c77

@ -1 +1 @@
Subproject commit 45dcb2849c891dba2100b270b4676765c92949ff
Subproject commit 80f21b3cac5ac51c6b8a62c7d2a5ef58a75195ee

@ -1 +1 @@
Subproject commit b517b7decd0dac30be2162f5186c250221c53aff
Subproject commit 99ec79ee2970f1a045d4ced1501b97ee521f2f85

@ -1 +1 @@
Subproject commit 2f9b60c42f90840ec11822acda3d8cdfa97a773d
Subproject commit 08f24e384e5be20bcffa42b49213d24dad7881ae

2
contrib/aws-c-http vendored

@ -1 +1 @@
Subproject commit dd34461987947672444d0bc872c5a733dfdb9711
Subproject commit a082f8a2067e4a31db73f1d4ffd702a8dc0f7089

2
contrib/aws-c-io vendored

@ -1 +1 @@
Subproject commit d58ed4f272b1cb4f89ac9196526ceebe5f2b0d89
Subproject commit 11ce3c750a1dac7b04069fc5bff89e97e91bad4d

2
contrib/aws-c-mqtt vendored

@ -1 +1 @@
Subproject commit 33c3455cec82b16feb940e12006cefd7b3ef4194
Subproject commit 6d36cd3726233cb757468d0ea26f6cd8dad151ec

2
contrib/aws-c-s3 vendored

@ -1 +1 @@
Subproject commit d7bfe602d6925948f1fff95784e3613cca6a3900
Subproject commit de36fee8fe7ab02f10987877ae94a805bf440c1f

@ -1 +1 @@
Subproject commit 208a701fa01e99c7c8cc3dcebc8317da71362972
Subproject commit fd8c0ba2e233997eaaefe82fb818b8b444b956d3

@ -1 +1 @@
Subproject commit ad53be196a25bbefa3700a01187fdce573a7d2d0
Subproject commit 321b805559c8e911be5bddba13fcbd222a3e2d3a

View File

@ -25,6 +25,7 @@ include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsFeatureTests.cmake")
include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsThreadAffinity.cmake")
include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsThreadName.cmake")
include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsSIMD.cmake")
include("${ClickHouse_SOURCE_DIR}/contrib/aws-crt-cpp/cmake/AwsGetVersion.cmake")
# Gather sources and options.
@ -35,6 +36,8 @@ set(AWS_PUBLIC_COMPILE_DEFS)
set(AWS_PRIVATE_COMPILE_DEFS)
set(AWS_PRIVATE_LIBS)
list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DINTEL_NO_ITTNOTIFY_API")
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DDEBUG_BUILD")
endif()
@ -85,14 +88,20 @@ file(GLOB AWS_SDK_CORE_SRC
"${AWS_SDK_CORE_DIR}/source/external/cjson/*.cpp"
"${AWS_SDK_CORE_DIR}/source/external/tinyxml2/*.cpp"
"${AWS_SDK_CORE_DIR}/source/http/*.cpp"
"${AWS_SDK_CORE_DIR}/source/http/crt/*.cpp"
"${AWS_SDK_CORE_DIR}/source/http/standard/*.cpp"
"${AWS_SDK_CORE_DIR}/source/internal/*.cpp"
"${AWS_SDK_CORE_DIR}/source/monitoring/*.cpp"
"${AWS_SDK_CORE_DIR}/source/net/*.cpp"
"${AWS_SDK_CORE_DIR}/source/net/linux-shared/*.cpp"
"${AWS_SDK_CORE_DIR}/source/platform/linux-shared/*.cpp"
"${AWS_SDK_CORE_DIR}/source/smithy/tracing/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/base64/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/component-registry/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/crypto/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/crypto/openssl/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/crypto/factory/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/crypto/openssl/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/event/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/json/*.cpp"
"${AWS_SDK_CORE_DIR}/source/utils/logging/*.cpp"
@ -115,9 +124,8 @@ OPTION(USE_AWS_MEMORY_MANAGEMENT "Aws memory management" OFF)
configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY)
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MAJOR=1")
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MINOR=10")
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_PATCH=36")
aws_get_version(AWS_CRT_CPP_VERSION_MAJOR AWS_CRT_CPP_VERSION_MINOR AWS_CRT_CPP_VERSION_PATCH FULL_VERSION GIT_HASH)
configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${AWS_CRT_DIR}/include/aws/crt/Config.h" @ONLY)
list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC})
@ -176,6 +184,7 @@ file(GLOB AWS_COMMON_SRC
"${AWS_COMMON_DIR}/source/*.c"
"${AWS_COMMON_DIR}/source/external/*.c"
"${AWS_COMMON_DIR}/source/posix/*.c"
"${AWS_COMMON_DIR}/source/linux/*.c"
)
file(GLOB AWS_COMMON_ARCH_SRC

2
contrib/aws-crt-cpp vendored

@ -1 +1 @@
Subproject commit 8a301b7e842f1daed478090c869207300972379f
Subproject commit f532d6abc0d2b0d8b5d6fe9e7c51eaedbe4afbd0

2
contrib/aws-s2n-tls vendored

@ -1 +1 @@
Subproject commit 71f4794b7580cf780eb4aca77d69eded5d3c7bb4
Subproject commit 9a1e75454023e952b366ce1eab9c54007250119f

View File

@ -16,29 +16,30 @@ message(STATUS "Checking Rust toolchain for current target")
# See https://doc.rust-lang.org/nightly/rustc/platform-support.html
if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le")
set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64")
set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64")
set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
set(Rust_CARGO_TARGET "x86_64-apple-darwin")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64"))
set(Rust_CARGO_TARGET "aarch64-apple-darwin")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
set(Rust_CARGO_TARGET "x86_64-unknown-freebsd")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64")
set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu")
else()
message(FATAL_ERROR "Unsupported rust target")
endif()
message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}")
if(DEFINED CMAKE_TOOLCHAIN_FILE)
if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le")
set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64")
set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl"))
set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64")
set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
set(Rust_CARGO_TARGET "x86_64-apple-darwin")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64"))
set(Rust_CARGO_TARGET "aarch64-apple-darwin")
elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64"))
set(Rust_CARGO_TARGET "x86_64-unknown-freebsd")
elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64")
set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu")
else()
message(FATAL_ERROR "Unsupported rust target")
endif()
message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}")
endif ()
# FindRust.cmake
list(APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake")

2
contrib/curl vendored

@ -1 +1 @@
Subproject commit d755a5f7c009dd63a61b2c745180d8ba937cbfeb
Subproject commit 7161cb17c01dcff1dc5bf89a18437d9d729f1ecd

View File

@ -1,5 +1,6 @@
if (APPLE OR SANITIZE STREQUAL "undefined" OR SANITIZE STREQUAL "memory")
# llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
if (APPLE OR SANITIZE STREQUAL "memory")
# llvm-tblgen, that is used during LLVM build, will throw MSAN errors when running (breaking the build)
# TODO: Retest when upgrading LLVM or build only llvm-tblgen without sanitizers
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
set (ENABLE_DWARF_PARSER_DEFAULT OFF)
else()

2
contrib/simdjson vendored

@ -1 +1 @@
Subproject commit 1075e8609c4afa253162d441437af929c29e31bb
Subproject commit 6060be2fdf62edf4a8f51a8b0883d57d09397b30

View File

@ -24,7 +24,7 @@ git config --file .gitmodules --get-regexp '.*path' | sed 's/[^ ]* //' | xargs -
# We don't want to depend on any third-party CMake files.
# To check it, find and delete them.
grep -o -P '"contrib/[^"]+"' .gitmodules |
grep -v -P 'contrib/(llvm-project|google-protobuf|grpc|abseil-cpp|corrosion)' |
grep -v -P 'contrib/(llvm-project|google-protobuf|grpc|abseil-cpp|corrosion|aws-crt-cpp)' |
xargs -I@ find @ \
-'(' -name 'CMakeLists.txt' -or -name '*.cmake' -')' -and -not -name '*.h.cmake' \
-delete

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.12.2.59"
ARG VERSION="24.1.2.5"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.12.2.59"
ARG VERSION="24.1.2.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.12.2.59"
ARG VERSION="24.1.2.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -211,6 +211,17 @@ function build
echo "build_clickhouse_fasttest_binary: [ OK ] $BUILD_SECONDS_ELAPSED sec." \
| ts '%Y-%m-%d %H:%M:%S' \
| tee "$FASTTEST_OUTPUT/test_result.txt"
(
# This query should fail, and print stacktrace with proper symbol names (even on a stripped binary)
clickhouse_output=$(programs/clickhouse-stripped --stacktrace -q 'select' 2>&1 || :)
if [[ $clickhouse_output =~ DB::LocalServer::main ]]; then
echo "stripped_clickhouse_shows_symbols_names: [ OK ] 0 sec."
else
echo -e "stripped_clickhouse_shows_symbols_names: [ FAIL ] 0 sec. - clickhouse output:\n\n$clickhouse_output\n"
fi
) | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_result.txt"
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
mkdir -p "$FASTTEST_OUTPUT/binaries/"
cp programs/clickhouse "$FASTTEST_OUTPUT/binaries/clickhouse"

View File

@ -293,10 +293,10 @@ if [ $failed_to_save_logs -ne 0 ]; then
# for files >64MB, we want this files to be compressed explicitly
for table in query_log zookeeper_log trace_log transactions_info_log metric_log
do
clickhouse-local "$data_path_config" --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
fi
done
fi

View File

@ -78,6 +78,8 @@ function configure()
randomize_config_boolean_value use_compression zookeeper
fi
randomize_config_boolean_value allow_experimental_block_number_column block_number
# for clickhouse-server (via service)
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
# for clickhouse-client

View File

@ -122,6 +122,7 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
rm /etc/clickhouse-server/config.d/block_number.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml

View File

@ -0,0 +1,31 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.11.5.29-stable (d83b108deca) FIXME as compared to v23.11.4.24-stable (e79d840d7fe)
#### Improvement
* Backported in [#58815](https://github.com/ClickHouse/ClickHouse/issues/58815): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#59234](https://github.com/ClickHouse/ClickHouse/issues/59234): Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)).
* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)).
* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)).
* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* refine error message [#57991](https://github.com/ClickHouse/ClickHouse/pull/57991) ([Han Fei](https://github.com/hanfei1991)).
* Fix rare race in external sort/aggregation with temporary data in cache [#58013](https://github.com/ClickHouse/ClickHouse/pull/58013) ([Anton Popov](https://github.com/CurtizJ)).
* Follow-up to [#58482](https://github.com/ClickHouse/ClickHouse/issues/58482) [#58574](https://github.com/ClickHouse/ClickHouse/pull/58574) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Decrease log level for one log message [#59168](https://github.com/ClickHouse/ClickHouse/pull/59168) ([Kseniia Sumarokova](https://github.com/kssenii)).

View File

@ -0,0 +1,36 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.12.3.40-stable (a594704ae75) FIXME as compared to v23.12.2.59-stable (17ab210e761)
#### Improvement
* Backported in [#58660](https://github.com/ClickHouse/ClickHouse/issues/58660): When executing some queries, which require a lot of streams for reading data, the error `"Paste JOIN requires sorted tables only"` was previously thrown. Now the numbers of streams resize to 1 in that case. [#58608](https://github.com/ClickHouse/ClickHouse/pull/58608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Backported in [#58817](https://github.com/ClickHouse/ClickHouse/issues/58817): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#59235](https://github.com/ClickHouse/ClickHouse/issues/59235): Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Delay reading from StorageKafka to allow multiple reads in materialized views [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable max_joined_block_rows in ConcurrentHashJoin [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)).
* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)).
* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)).
* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)).
* Multiple read file log storage in mv [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Follow-up to [#58482](https://github.com/ClickHouse/ClickHouse/issues/58482) [#58574](https://github.com/ClickHouse/ClickHouse/pull/58574) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Change log level for super imporant message in Keeper [#59010](https://github.com/ClickHouse/ClickHouse/pull/59010) ([alesapin](https://github.com/alesapin)).
* Decrease log level for one log message [#59168](https://github.com/ClickHouse/ClickHouse/pull/59168) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix fasttest by pinning pip dependencies [#59256](https://github.com/ClickHouse/ClickHouse/pull/59256) ([Azat Khuzhin](https://github.com/azat)).
* No debug symbols in Rust [#59306](https://github.com/ClickHouse/ClickHouse/pull/59306) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -0,0 +1,438 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.1.1.2048-stable (5a024dfc093) FIXME as compared to v23.12.1.1368-stable (a2faa65b080)
#### Backward Incompatible Change
* The setting `print_pretty_type_names` is turned on by default. You can turn it off to keep the old behavior or `SET compatibility = '23.12'`. [#57726](https://github.com/ClickHouse/ClickHouse/pull/57726) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)).
* The function `reverseDNSQuery` is no longer available. This closes [#58368](https://github.com/ClickHouse/ClickHouse/issues/58368). [#58369](https://github.com/ClickHouse/ClickHouse/pull/58369) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Enable various changes to improve the access control in the configuration file. These changes affect the behavior, and you check the `config.xml` in the `access_control_improvements` section. In case you are not confident, keep the values in the configuration file as they were in the previous version. [#58584](https://github.com/ClickHouse/ClickHouse/pull/58584) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow queries without aliases for subqueries for `PASTE JOIN`. [#58654](https://github.com/ClickHouse/ClickHouse/pull/58654) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix sumMapFiltered with NaN values. NaN values are now placed at the end (instead of randomly) and considered different from any values. `-0` is now also treated as equal to `0`; since 0 values are discarded, `-0` values are discarded too. [#58959](https://github.com/ClickHouse/ClickHouse/pull/58959) ([Raúl Marín](https://github.com/Algunenano)).
* The function `visibleWidth` will behave according to the docs. In previous versions, it simply counted code points after string serialization, like the `lengthUTF8` function, but didn't consider zero-width and combining characters, full-width characters, tabs, and deletes. Now the behavior is changed accordingly. If you want to keep the old behavior, set `function_visible_width_behavior` to `0`, or set `compatibility` to `23.12` or lower. [#59022](https://github.com/ClickHouse/ClickHouse/pull/59022) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Kusto dialect is disabled until these two bugs will be fixed: [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037) and [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036). [#59305](https://github.com/ClickHouse/ClickHouse/pull/59305) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### New Feature
* Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/ split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)).
* Added statement `SYSTEM RELOAD ASYNCHRONOUS METRICS` which updates the asynchronous metrics. Mostly useful for testing and development. [#53710](https://github.com/ClickHouse/ClickHouse/pull/53710) ([Robert Schulze](https://github.com/rschu1ze)).
* Certain settings (currently `min_compress_block_size` and `max_compress_block_size`) can now be specified at column-level where they take precedence over the corresponding table-level setting. Example: `CREATE TABLE tab (col String SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840)) ENGINE = MergeTree ORDER BY tuple();`. [#55201](https://github.com/ClickHouse/ClickHouse/pull/55201) ([Duc Canh Le](https://github.com/canhld94)).
* Add `quantileDDSketch` aggregate function as well as the corresponding `quantilesDDSketch` and `medianDDSketch`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)).
* Added function `seriesDecomposeSTL()` which decomposes a time series into a season, a trend and a residual component. [#57078](https://github.com/ClickHouse/ClickHouse/pull/57078) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* Introduced MySQL Binlog Client for MaterializedMySQL: One binlog connection for many databases. [#57323](https://github.com/ClickHouse/ClickHouse/pull/57323) ([Val Doroshchuk](https://github.com/valbok)).
* Intel QuickAssist Technology (QAT) provides hardware-accelerated compression and cryptograpy. ClickHouse got a new compression codec `ZSTD_QAT` which utilizes QAT for zstd compression. The codec uses [Intel's QATlib](https://github.com/intel/qatlib) and [Inte's QAT ZSTD Plugin](https://github.com/intel/QAT-ZSTD-Plugin). Right now, only compression can be accelerated in hardware (a software fallback kicks in in case QAT could not be initialized), decompression always runs in software. [#57509](https://github.com/ClickHouse/ClickHouse/pull/57509) ([jasperzhu](https://github.com/jinjunzh)).
* Implementing the new way how object storage keys are generated for s3 disks. Now the format could be defined in terms of `re2` regex syntax with `key_template` option in disc description. [#57663](https://github.com/ClickHouse/ClickHouse/pull/57663) ([Sema Checherinda](https://github.com/CheSema)).
* Table system.dropped_tables_parts contains parts of system.dropped_tables tables (dropped but not yet removed tables). [#58038](https://github.com/ClickHouse/ClickHouse/pull/58038) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Implement Variant data type that represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). Variant type is available under a setting `allow_experimental_variant_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#58047](https://github.com/ClickHouse/ClickHouse/pull/58047) ([Kruglov Pavel](https://github.com/Avogar)).
* Add settings `max_materialized_views_size_for_table` to limit the number of materialized views attached to a table. [#58068](https://github.com/ClickHouse/ClickHouse/pull/58068) ([zhongyuankai](https://github.com/zhongyuankai)).
* `clickhouse-format` improvements: * support INSERT queries with `VALUES` * support comments (use `--comments` to output them) * support `--max_line_length` option to format only long queries in multiline. [#58246](https://github.com/ClickHouse/ClickHouse/pull/58246) ([vdimir](https://github.com/vdimir)).
* Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add table `system.database_engines`. [#58390](https://github.com/ClickHouse/ClickHouse/pull/58390) ([Bharat Nallan](https://github.com/bharatnc)).
* Added FROM <Replicas> modifier for SYSTEM SYNC REPLICA LIGHTWEIGHT query. The FROM modifier ensures we wait for for fetches and drop-ranges only for the specified source replicas, as well as any replica not in zookeeper or with an empty source_replica. [#58393](https://github.com/ClickHouse/ClickHouse/pull/58393) ([Jayme Bird](https://github.com/jaymebrd)).
* Add function `arrayShingles()` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)).
* Added functions `punycodeEncode()`, `punycodeDecode()`, `idnaEncode()` and `idnaDecode()` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)).
* Added string similarity functions `dramerauLevenshteinDistance()`, `jaroSimilarity()` and `jaroWinklerSimilarity()`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)).
* Add two settings `output_format_compression_level` to change output compression level and `output_format_compression_zstd_window_log` to explicitly set compression window size and enable long-range mode for zstd compression if output compression method is `zstd`. Applied for `INTO OUTFILE` and when writing to table functions `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`. [#58539](https://github.com/ClickHouse/ClickHouse/pull/58539) ([Duc Canh Le](https://github.com/canhld94)).
* Automatically disable ANSI escape sequences in Pretty formats if the output is not a terminal. Add new `auto` mode to setting `output_format_pretty_color`. [#58614](https://github.com/ClickHouse/ClickHouse/pull/58614) ([Shaun Struwig](https://github.com/Blargian)).
* Added setting `update_insert_deduplication_token_in_dependent_materialized_views`. This setting allows to update insert deduplication token with table identifier during insert in dependent materialized views. Closes [#59165](https://github.com/ClickHouse/ClickHouse/issues/59165). [#59238](https://github.com/ClickHouse/ClickHouse/pull/59238) ([Maksim Kita](https://github.com/kitaisreal)).
#### Performance Improvement
* More cache-friendly final implementation. Note on the behaviour change: previously queries with `FINAL` modifier that read with a single stream (e.g. `max_threads=1`) produced sorted output without explicitly provided `ORDER BY` clause. This behaviour no longer exists when `enable_vertical_final = true` (and it is so by default). [#54366](https://github.com/ClickHouse/ClickHouse/pull/54366) ([Duc Canh Le](https://github.com/canhld94)).
* Optimize array element function when input is array(map)/array(array(num)/array(array(string))/array(bigint)/array(decimal). Current implementation causes too many reallocs. The optimization speed up by ~6x especially when input type is array(map). [#56403](https://github.com/ClickHouse/ClickHouse/pull/56403) ([李扬](https://github.com/taiyang-li)).
* Bypass `Poco::BasicBufferedStreamBuf` abstraction when reading from S3 (namely `ReadBufferFromIStream`) to avoid extra copying of data. [#56961](https://github.com/ClickHouse/ClickHouse/pull/56961) ([Nikita Taranov](https://github.com/nickitat)).
* Read column once while reading more that one subcolumn from it in Compact parts. [#57631](https://github.com/ClickHouse/ClickHouse/pull/57631) ([Kruglov Pavel](https://github.com/Avogar)).
* Rewrite the AST of sum(column + literal) function. [#57853](https://github.com/ClickHouse/ClickHouse/pull/57853) ([Jiebin Sun](https://github.com/jiebinn)).
* The evaluation of function `match()` now utilizes skipping indices `ngrambf_v1` and `tokenbf_v1`. [#57882](https://github.com/ClickHouse/ClickHouse/pull/57882) ([凌涛](https://github.com/lingtaolf)).
* Default coordinator for parallel replicas is rewritten for better cache locality (same mark ranges are almost always assigned to the same replicas). Consistent hashing is used also during work stealing, so better tail latency is expected. It has been tested for linear scalability on a hundred of replicas. [#57968](https://github.com/ClickHouse/ClickHouse/pull/57968) ([Nikita Taranov](https://github.com/nickitat)).
* MergeTree FINAL to not compare rows from same non-L0 part. [#58142](https://github.com/ClickHouse/ClickHouse/pull/58142) ([Duc Canh Le](https://github.com/canhld94)).
* Speed up iota calls (filling array with consecutive numbers). [#58271](https://github.com/ClickHouse/ClickHouse/pull/58271) ([Raúl Marín](https://github.com/Algunenano)).
* The evaluation of function `match()` now utilizes inverted indices. [#58284](https://github.com/ClickHouse/ClickHouse/pull/58284) ([凌涛](https://github.com/lingtaolf)).
* Speedup MIN/MAX for non numeric types. [#58334](https://github.com/ClickHouse/ClickHouse/pull/58334) ([Raúl Marín](https://github.com/Algunenano)).
* Enable JIT compilation for aggregation without a key. Closes [#41461](https://github.com/ClickHouse/ClickHouse/issues/41461). Originally [#53757](https://github.com/ClickHouse/ClickHouse/issues/53757). [#58440](https://github.com/ClickHouse/ClickHouse/pull/58440) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The performance experiments of **OnTime** on the Intel server with up to AVX2 (and BMI2) support show that this change could effectively improve the QPS of **Q2** and **Q3** by **5.0%** and **3.7%** through reducing the cycle ratio of the hotspot, **_DB::MergeTreeRangeReader::ReadResult::optimize_**, **from 11.48% to 1.09%** and **from 8.09% to 0.67%** respectively while having no impact on others. [#58800](https://github.com/ClickHouse/ClickHouse/pull/58800) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
* Use one thread less in `clickhouse-local`. [#58968](https://github.com/ClickHouse/ClickHouse/pull/58968) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Large aggregation states of `uniqExact` will be merged in parallel in distrubuted queries. [#59009](https://github.com/ClickHouse/ClickHouse/pull/59009) ([Nikita Taranov](https://github.com/nickitat)).
* Lower memory usage after reading from `MergeTree` tables. [#59290](https://github.com/ClickHouse/ClickHouse/pull/59290) ([Anton Popov](https://github.com/CurtizJ)).
* Lower memory usage in vertical merges. [#59340](https://github.com/ClickHouse/ClickHouse/pull/59340) ([Anton Popov](https://github.com/CurtizJ)).
#### Improvement
* Enable MySQL/MariaDB on macOS. This closes [#21191](https://github.com/ClickHouse/ClickHouse/issues/21191). [#46316](https://github.com/ClickHouse/ClickHouse/pull/46316) ([Robert Schulze](https://github.com/rschu1ze)).
* Do not interpret numbers with leading zeroes as octals. [#55575](https://github.com/ClickHouse/ClickHouse/pull/55575) ([Joanna Hulboj](https://github.com/jh0x)).
* Replace HTTP outgoing buffering based on std ostream with CH Buffer. Add bytes counting metrics for interfaces. [#56064](https://github.com/ClickHouse/ClickHouse/pull/56064) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Disable `max_rows_in_set_to_optimize_join` by default. [#56396](https://github.com/ClickHouse/ClickHouse/pull/56396) ([vdimir](https://github.com/vdimir)).
* Add `<host_name>` config parameter that allows avoiding resolving hostnames in DDLWorker. This mitigates the possibility of the queue being stuck in case of a change in cluster definition. Closes [#57573](https://github.com/ClickHouse/ClickHouse/issues/57573). [#57603](https://github.com/ClickHouse/ClickHouse/pull/57603) ([Nikolay Degterinsky](https://github.com/evillique)).
* Increase `load_metadata_threads` to 16 for the filesystem cache. It will make the server start up faster. [#57732](https://github.com/ClickHouse/ClickHouse/pull/57732) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve the `multiIf` function performance when the type is Nullable. [#57745](https://github.com/ClickHouse/ClickHouse/pull/57745) ([KevinyhZou](https://github.com/KevinyhZou)).
* Add ability to throttle merges/mutations (`max_mutations_bandwidth_for_server`/`max_merges_bandwidth_for_server`). [#57877](https://github.com/ClickHouse/ClickHouse/pull/57877) ([Azat Khuzhin](https://github.com/azat)).
* Replaced undocumented (boolean) column `is_hot_reloadable` in system table `system.server_settings` by (Enum8) column `changeable_without_restart` with possible values `No`, `Yes`, `IncreaseOnly` and `DecreaseOnly`. Also documented the column. [#58029](https://github.com/ClickHouse/ClickHouse/pull/58029) ([skyoct](https://github.com/skyoct)).
* ClusterDiscovery supports setting username and password, close [#58063](https://github.com/ClickHouse/ClickHouse/issues/58063). [#58123](https://github.com/ClickHouse/ClickHouse/pull/58123) ([vdimir](https://github.com/vdimir)).
* Support query parameters in ALTER TABLE ... PART. [#58297](https://github.com/ClickHouse/ClickHouse/pull/58297) ([Azat Khuzhin](https://github.com/azat)).
* Create consumers for Kafka tables on fly (but keep them for some period - `kafka_consumers_pool_ttl_ms`, since last used), this should fix problem with statistics for `system.kafka_consumers` (that does not consumed when nobody reads from Kafka table, which leads to live memory leak and slow table detach) and also this PR enables stats for `system.kafka_consumers` by default again. [#58310](https://github.com/ClickHouse/ClickHouse/pull/58310) ([Azat Khuzhin](https://github.com/azat)).
* Sparkbar as an alias to sparkbar. [#58335](https://github.com/ClickHouse/ClickHouse/pull/58335) ([凌涛](https://github.com/lingtaolf)).
* Avoid sending ComposeObject requests after upload to GCS. [#58343](https://github.com/ClickHouse/ClickHouse/pull/58343) ([Azat Khuzhin](https://github.com/azat)).
* Correctly handle keys with dot in the name in configurations XMLs. [#58354](https://github.com/ClickHouse/ClickHouse/pull/58354) ([Azat Khuzhin](https://github.com/azat)).
* Added comments (brief descriptions) to all columns of system tables. The are several reasons fro this: - We use system tables a lot and sometimes is could be very difficult for developer to understand the purpose and the meaning of a particular column. - We change (add new ones or modify existing) system tables a lot and the documentation for them is always outdated. For example take a look at the documentation page for [`system.parts`](https://clickhouse.com/docs/en/operations/system-tables/parts). It misses a lot of columns - We would like to eventually generate documentation directly from ClickHouse. [#58356](https://github.com/ClickHouse/ClickHouse/pull/58356) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Make function `format` return constant on constant arguments. This closes [#58355](https://github.com/ClickHouse/ClickHouse/issues/58355). [#58358](https://github.com/ClickHouse/ClickHouse/pull/58358) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Attach all system tables in `clickhouse-local`, including `system.parts`. This closes [#58312](https://github.com/ClickHouse/ClickHouse/issues/58312). [#58359](https://github.com/ClickHouse/ClickHouse/pull/58359) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support for `Enum` data types in function `transform`. This closes [#58241](https://github.com/ClickHouse/ClickHouse/issues/58241). [#58360](https://github.com/ClickHouse/ClickHouse/pull/58360) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow registering database engines independently. [#58365](https://github.com/ClickHouse/ClickHouse/pull/58365) ([Bharat Nallan](https://github.com/bharatnc)).
* Adding a setting `max_estimated_execution_time` to separate `max_execution_time` and `max_estimated_execution_time`. [#58402](https://github.com/ClickHouse/ClickHouse/pull/58402) ([Zhang Yifan](https://github.com/zhangyifan27)).
* Allow registering interpreters independently. [#58443](https://github.com/ClickHouse/ClickHouse/pull/58443) ([Bharat Nallan](https://github.com/bharatnc)).
* Provide hint when an invalid database engine name is used. [#58444](https://github.com/ClickHouse/ClickHouse/pull/58444) ([Bharat Nallan](https://github.com/bharatnc)).
* Avoid huge memory consumption during Keeper startup for more cases. [#58455](https://github.com/ClickHouse/ClickHouse/pull/58455) ([Antonio Andelic](https://github.com/antonio2368)).
* Add settings for better control of indexes type in Arrow dictionary. Use signed integer type for indexes by default as Arrow recommends. Closes [#57401](https://github.com/ClickHouse/ClickHouse/issues/57401). [#58519](https://github.com/ClickHouse/ClickHouse/pull/58519) ([Kruglov Pavel](https://github.com/Avogar)).
* Added function `sqidDecode()` which decodes [Sqids](https://sqids.org/). [#58544](https://github.com/ClickHouse/ClickHouse/pull/58544) ([Robert Schulze](https://github.com/rschu1ze)).
* Allow to read Bool values into String in JSON input formats. It's done under a setting `input_format_json_read_bools_as_strings` that is enabled by default. [#58561](https://github.com/ClickHouse/ClickHouse/pull/58561) ([Kruglov Pavel](https://github.com/Avogar)).
* Implement [#58575](https://github.com/ClickHouse/ClickHouse/issues/58575) Support `CLICKHOUSE_PASSWORD_FILE ` environment variable when running the docker image. [#58583](https://github.com/ClickHouse/ClickHouse/pull/58583) ([Eyal Halpern Shalev](https://github.com/Eyal-Shalev)).
* When executing some queries, which require a lot of streams for reading data, the error `"Paste JOIN requires sorted tables only"` was previously thrown. Now the numbers of streams resize to 1 in that case. [#58608](https://github.com/ClickHouse/ClickHouse/pull/58608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)).
* Better message for INVALID_IDENTIFIER error. [#58703](https://github.com/ClickHouse/ClickHouse/pull/58703) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Improved handling of signed numeric literals in normalizeQuery. [#58710](https://github.com/ClickHouse/ClickHouse/pull/58710) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Support Point data type for MySQL. [#58721](https://github.com/ClickHouse/ClickHouse/pull/58721) ([Kseniia Sumarokova](https://github.com/kssenii)).
* When comparing a Float32 column and a const string, read the string as Float32 (instead of Float64). [#58724](https://github.com/ClickHouse/ClickHouse/pull/58724) ([Raúl Marín](https://github.com/Algunenano)).
* Improve S3 compatible, add Ecloud EOS storage support. [#58786](https://github.com/ClickHouse/ClickHouse/pull/58786) ([xleoken](https://github.com/xleoken)).
* Allow `KILL QUERY` to cancel backups / restores. This PR also makes running backups and restores visible in `system.processes`. Also there is a new setting in the server configuration now - `shutdown_wait_backups_and_restores` (default=true) which makes the server either wait on shutdown for all running backups and restores to finish or just cancel them. [#58804](https://github.com/ClickHouse/ClickHouse/pull/58804) ([Vitaly Baranov](https://github.com/vitlibar)).
* Avro format support Zstd codec. Closes [#58735](https://github.com/ClickHouse/ClickHouse/issues/58735). [#58805](https://github.com/ClickHouse/ClickHouse/pull/58805) ([flynn](https://github.com/ucasfl)).
* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#58835](https://github.com/ClickHouse/ClickHouse/pull/58835) ([Serge Klochkov](https://github.com/slvrtrn)).
* Fixing a problem described in [#58719](https://github.com/ClickHouse/ClickHouse/issues/58719). [#58841](https://github.com/ClickHouse/ClickHouse/pull/58841) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Make sure that for custom (created from SQL) disks ether `filesystem_caches_path` (a common directory prefix for all filesystem caches) or `custom_cached_disks_base_directory` (a common directory prefix for only filesystem caches created from custom disks) is specified in server config. `custom_cached_disks_base_directory` has higher priority for custom disks over `filesystem_caches_path`, which is used if the former one is absent. Filesystem cache setting `path` must lie inside that directory, otherwise exception will be thrown preventing disk to be created. This will not affect disks created on an older version and server was upgraded - then the exception will not be thrown to allow the server to successfully start). `custom_cached_disks_base_directory` is added to default server config as `/var/lib/clickhouse/caches/`. Closes [#57825](https://github.com/ClickHouse/ClickHouse/issues/57825). [#58869](https://github.com/ClickHouse/ClickHouse/pull/58869) ([Kseniia Sumarokova](https://github.com/kssenii)).
* MySQL interface gained compatibility with `SHOW WARNINGS`/`SHOW COUNT(*) WARNINGS` queries, though the returned result is always an empty set. [#58929](https://github.com/ClickHouse/ClickHouse/pull/58929) ([Serge Klochkov](https://github.com/slvrtrn)).
* Skip unavailable replicas when executing parallel distributed `INSERT SELECT`. [#58931](https://github.com/ClickHouse/ClickHouse/pull/58931) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Display word-descriptive log level while enabling structured log formatting in json. [#58936](https://github.com/ClickHouse/ClickHouse/pull/58936) ([Tim Liou](https://github.com/wheatdog)).
* MySQL interface gained support for `CAST(x AS SIGNED)` and `CAST(x AS UNSIGNED)` statements via data type aliases: `SIGNED` for Int64, and `UNSIGNED` for UInt64. This improves compatibility with BI tools such as Looker Studio. [#58954](https://github.com/ClickHouse/ClickHouse/pull/58954) ([Serge Klochkov](https://github.com/slvrtrn)).
* Function `seriesDecomposeSTL()` now returns a baseline component as season + trend components. [#58961](https://github.com/ClickHouse/ClickHouse/pull/58961) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* Fix memory management in copyDataToS3File. [#58962](https://github.com/ClickHouse/ClickHouse/pull/58962) ([Vitaly Baranov](https://github.com/vitlibar)).
* Change working directory to data path in docker container. [#58975](https://github.com/ClickHouse/ClickHouse/pull/58975) ([cangyin](https://github.com/cangyin)).
* Added setting for Azure Blob Storage `azure_max_unexpected_write_error_retries` , can also be set from config under azure section. [#59001](https://github.com/ClickHouse/ClickHouse/pull/59001) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Keeper improvement: reduce Keeper's memory usage for stored nodes. [#59002](https://github.com/ClickHouse/ClickHouse/pull/59002) ([Antonio Andelic](https://github.com/antonio2368)).
* Allow server to start with broken data lake table. Closes [#58625](https://github.com/ClickHouse/ClickHouse/issues/58625). [#59080](https://github.com/ClickHouse/ClickHouse/pull/59080) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fixes https://github.com/ClickHouse/ClickHouse/pull/59120#issuecomment-1906177350. [#59122](https://github.com/ClickHouse/ClickHouse/pull/59122) ([Arthur Passos](https://github.com/arthurpassos)).
* The state of URL's #hash in the dashboard is now compressed using [lz-string](https://github.com/pieroxy/lz-string). The default size of the state string is compressed from 6856B to 2823B. [#59124](https://github.com/ClickHouse/ClickHouse/pull/59124) ([Amos Bird](https://github.com/amosbird)).
* Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)).
* Prohibit mutable operations (`INSERT`/`ALTER`/`OPTIMIZE`/...) on read-only/write-once storages with a proper `TABLE_IS_READ_ONLY` error (to avoid leftovers). Avoid leaving left-overs on write-once disks (`format_version.txt`) on `CREATE`/`ATTACH`. Ignore `DROP` for `ReplicatedMergeTree` (so as for `MergeTree`). Fix iterating over `s3_plain` (`MetadataStorageFromPlainObjectStorage::iterateDirectory`). Note read-only is `web` disk, and write-once is `s3_plain`. [#59170](https://github.com/ClickHouse/ClickHouse/pull/59170) ([Azat Khuzhin](https://github.com/azat)).
* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#59293](https://github.com/ClickHouse/ClickHouse/pull/59293) ([Serge Klochkov](https://github.com/slvrtrn)).
* Fix bug in experimental `_block_number` column which could lead to logical error during complex combination of `ALTER`s and `merge`s. Fixes [#56202](https://github.com/ClickHouse/ClickHouse/issues/56202). Replaces [#58601](https://github.com/ClickHouse/ClickHouse/issues/58601). CC @SmitaRKulkarni. [#59295](https://github.com/ClickHouse/ClickHouse/pull/59295) ([alesapin](https://github.com/alesapin)).
* Play UI understands when an exception is returned inside JSON. Adjustment for [#52853](https://github.com/ClickHouse/ClickHouse/issues/52853). [#59303](https://github.com/ClickHouse/ClickHouse/pull/59303) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* `/binary` HTTP handler allows to specify user, host, and optionally, password in the query string. [#59311](https://github.com/ClickHouse/ClickHouse/pull/59311) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support backups for compressed in-memory tables. This closes [#57893](https://github.com/ClickHouse/ClickHouse/issues/57893). [#59315](https://github.com/ClickHouse/ClickHouse/pull/59315) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve exception message of function regexp_extract, close [#56393](https://github.com/ClickHouse/ClickHouse/issues/56393). [#59319](https://github.com/ClickHouse/ClickHouse/pull/59319) ([李扬](https://github.com/taiyang-li)).
* Support the FORMAT clause in BACKUP and RESTORE queries. [#59338](https://github.com/ClickHouse/ClickHouse/pull/59338) ([Vitaly Baranov](https://github.com/vitlibar)).
* Function `concatWithSeparator()` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). For example, `SELECT concatWithSeparator('.', 'number', 1)` now returns `number.1`. [#59341](https://github.com/ClickHouse/ClickHouse/pull/59341) ([Robert Schulze](https://github.com/rschu1ze)).
#### Build/Testing/Packaging Improvement
* Improve aliases for clickhouse binary (now `ch`/`clickhouse` is `clickhouse-local` or `clickhouse` depends on the arguments) and add bash completion for new aliases. [#58344](https://github.com/ClickHouse/ClickHouse/pull/58344) ([Azat Khuzhin](https://github.com/azat)).
* Add settings changes check to CI to check that all settings changes are reflected in settings changes history. [#58555](https://github.com/ClickHouse/ClickHouse/pull/58555) ([Kruglov Pavel](https://github.com/Avogar)).
* Use tables directly attached from S3 in stateful tests. [#58791](https://github.com/ClickHouse/ClickHouse/pull/58791) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Save the whole `fuzzer.log` as an archive instead of the last 100k lines. `tail -n 100000` often removes lines with table definitions. Example:. [#58821](https://github.com/ClickHouse/ClickHouse/pull/58821) ([Dmitry Novik](https://github.com/novikd)).
* Enable Rust on OSX ARM64 (this will add fuzzy search in client with skim and prql language, though I don't think that are people who hosts ClickHouse on darwin, so it is mostly for fuzzy search in client I would say). [#59272](https://github.com/ClickHouse/ClickHouse/pull/59272) ([Azat Khuzhin](https://github.com/azat)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Add join keys conversion for nested lowcardinality [#51550](https://github.com/ClickHouse/ClickHouse/pull/51550) ([vdimir](https://github.com/vdimir)).
* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix a bug with projections and the aggregate_functions_null_for_empty setting during insertion. [#56944](https://github.com/ClickHouse/ClickHouse/pull/56944) ([Amos Bird](https://github.com/amosbird)).
* Fixed potential exception due to stale profile UUID [#57263](https://github.com/ClickHouse/ClickHouse/pull/57263) ([Vasily Nemkov](https://github.com/Enmk)).
* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
* Ignore MVs with dropped target table during pushing to views [#57520](https://github.com/ClickHouse/ClickHouse/pull/57520) ([Kruglov Pavel](https://github.com/Avogar)).
* [RFC] Eliminate possible race between ALTER_METADATA and MERGE_PARTS [#57755](https://github.com/ClickHouse/ClickHouse/pull/57755) ([Azat Khuzhin](https://github.com/azat)).
* Fix the exprs order bug in group by with rollup [#57786](https://github.com/ClickHouse/ClickHouse/pull/57786) ([Chen768959](https://github.com/Chen768959)).
* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Allow users to work with symlinks in user_files_path (again) [#58447](https://github.com/ClickHouse/ClickHouse/pull/58447) ([Duc Canh Le](https://github.com/canhld94)).
* Fix segfault when graphite table does not have agg function [#58453](https://github.com/ClickHouse/ClickHouse/pull/58453) ([Duc Canh Le](https://github.com/canhld94)).
* Delay reading from StorageKafka to allow multiple reads in materialized views [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)).
* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)).
* Enable ordinary databases while restoration [#58520](https://github.com/ClickHouse/ClickHouse/pull/58520) ([Jihyuk Bok](https://github.com/tomahawk28)).
* Fix hive threadpool read ORC/Parquet/... Failed [#58537](https://github.com/ClickHouse/ClickHouse/pull/58537) ([sunny](https://github.com/sunny19930321)).
* Hide credentials in system.backup_log base_backup_name column [#58550](https://github.com/ClickHouse/ClickHouse/pull/58550) ([Daniel Pozo Escalona](https://github.com/danipozo)).
* toStartOfInterval for milli- microsencods values rounding [#58557](https://github.com/ClickHouse/ClickHouse/pull/58557) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Disable max_joined_block_rows in ConcurrentHashJoin [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)).
* Fix join using nullable in old analyzer [#58596](https://github.com/ClickHouse/ClickHouse/pull/58596) ([vdimir](https://github.com/vdimir)).
* `makeDateTime64()`: Allow non-const fraction argument [#58597](https://github.com/ClickHouse/ClickHouse/pull/58597) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix possible NULL dereference during symbolizing inline frames [#58607](https://github.com/ClickHouse/ClickHouse/pull/58607) ([Azat Khuzhin](https://github.com/azat)).
* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix broken partition key analysis when doing projection optimization [#58638](https://github.com/ClickHouse/ClickHouse/pull/58638) ([Amos Bird](https://github.com/amosbird)).
* Query cache: Fix per-user quota [#58731](https://github.com/ClickHouse/ClickHouse/pull/58731) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)).
* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)).
* Don't process requests in Keeper during shutdown [#58765](https://github.com/ClickHouse/ClickHouse/pull/58765) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix Segfault in `SlabsPolygonIndex::find` [#58771](https://github.com/ClickHouse/ClickHouse/pull/58771) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)).
* Table CREATE DROP Poco::Logger memory leak fix [#58831](https://github.com/ClickHouse/ClickHouse/pull/58831) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix HTTP compressors finalization [#58846](https://github.com/ClickHouse/ClickHouse/pull/58846) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Multiple read file log storage in mv [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Restriction for the access key id for s3. [#58900](https://github.com/ClickHouse/ClickHouse/pull/58900) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix possible crash in clickhouse-local during loading suggestions [#58907](https://github.com/ClickHouse/ClickHouse/pull/58907) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash when indexHint() is used [#58911](https://github.com/ClickHouse/ClickHouse/pull/58911) ([Dmitry Novik](https://github.com/novikd)).
* Fix StorageURL forgetting headers on server restart [#58933](https://github.com/ClickHouse/ClickHouse/pull/58933) ([Michael Kolupaev](https://github.com/al13n321)).
* Analyzer: fix storage replacement with insertion block [#58958](https://github.com/ClickHouse/ClickHouse/pull/58958) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix seek in ReadBufferFromZipArchive [#58966](https://github.com/ClickHouse/ClickHouse/pull/58966) ([Michael Kolupaev](https://github.com/al13n321)).
* `DROP INDEX` of inverted index now removes all relevant files from persistence [#59040](https://github.com/ClickHouse/ClickHouse/pull/59040) ([mochi](https://github.com/MochiXu)).
* Fix data race on query_factories_info [#59049](https://github.com/ClickHouse/ClickHouse/pull/59049) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Disable "Too many redirects" error retry [#59099](https://github.com/ClickHouse/ClickHouse/pull/59099) ([skyoct](https://github.com/skyoct)).
* Fix aggregation issue in mixed x86_64 and ARM clusters [#59132](https://github.com/ClickHouse/ClickHouse/pull/59132) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Fix not started database shutdown deadlock [#59137](https://github.com/ClickHouse/ClickHouse/pull/59137) ([Sergei Trifonov](https://github.com/serxa)).
* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix crash with nullable timezone for `toString` [#59190](https://github.com/ClickHouse/ClickHouse/pull/59190) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix abort in iceberg metadata on bad file paths [#59275](https://github.com/ClickHouse/ClickHouse/pull/59275) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix architecture name in select of Rust target [#59307](https://github.com/ClickHouse/ClickHouse/pull/59307) ([p1rattttt](https://github.com/p1rattttt)).
* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix lazy initialization in RabbitMQ [#59352](https://github.com/ClickHouse/ClickHouse/pull/59352) ([Kruglov Pavel](https://github.com/Avogar)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Refreshable materialized views (takeover)"'. [#58296](https://github.com/ClickHouse/ClickHouse/pull/58296) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Fix an error in the release script - it didn't allow to make 23.12."'. [#58381](https://github.com/ClickHouse/ClickHouse/pull/58381) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* NO CL ENTRY: 'Revert "Use CH Buffer for HTTP out stream, add metrics for interfaces"'. [#58450](https://github.com/ClickHouse/ClickHouse/pull/58450) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Second attempt: Use CH Buffer for HTTP out stream, add metrics for interfaces'. [#58475](https://github.com/ClickHouse/ClickHouse/pull/58475) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* NO CL ENTRY: 'Revert "Merging [#53757](https://github.com/ClickHouse/ClickHouse/issues/53757)"'. [#58542](https://github.com/ClickHouse/ClickHouse/pull/58542) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Add support for MySQL `net_write_timeout` and `net_read_timeout` settings"'. [#58872](https://github.com/ClickHouse/ClickHouse/pull/58872) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Extend performance test norm_dist.xml"'. [#58989](https://github.com/ClickHouse/ClickHouse/pull/58989) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892)"'. [#58990](https://github.com/ClickHouse/ClickHouse/pull/58990) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Allow parallel replicas for JOIN with analyzer [part 1]."'. [#59059](https://github.com/ClickHouse/ClickHouse/pull/59059) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Consume leading zeroes when parsing a number in ConstantExpressionTemplate"'. [#59070](https://github.com/ClickHouse/ClickHouse/pull/59070) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Revert "Allow parallel replicas for JOIN with analyzer [part 1].""'. [#59076](https://github.com/ClickHouse/ClickHouse/pull/59076) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* NO CL ENTRY: 'Revert "Allow to attach partition from table with different partition expression when destination partition expression doesn't re-partition"'. [#59120](https://github.com/ClickHouse/ClickHouse/pull/59120) ([Robert Schulze](https://github.com/rschu1ze)).
* NO CL ENTRY: 'DisksApp.cpp: fix typo (specifiged → specified)'. [#59140](https://github.com/ClickHouse/ClickHouse/pull/59140) ([Nikolay Edigaryev](https://github.com/edigaryev)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Analyzer: Fix resolving subcolumns in JOIN [#49703](https://github.com/ClickHouse/ClickHouse/pull/49703) ([vdimir](https://github.com/vdimir)).
* Analyzer: always qualify execution names [#53705](https://github.com/ClickHouse/ClickHouse/pull/53705) ([Dmitry Novik](https://github.com/novikd)).
* Insert quorum: check host node version in addition [#55528](https://github.com/ClickHouse/ClickHouse/pull/55528) ([Igor Nikonov](https://github.com/devcrafter)).
* Remove more old code of projection analysis [#55579](https://github.com/ClickHouse/ClickHouse/pull/55579) ([Anton Popov](https://github.com/CurtizJ)).
* Better exception messages in input formats [#57053](https://github.com/ClickHouse/ClickHouse/pull/57053) ([Kruglov Pavel](https://github.com/Avogar)).
* Parallel replicas custom key: skip unavailable replicas [#57235](https://github.com/ClickHouse/ClickHouse/pull/57235) ([Igor Nikonov](https://github.com/devcrafter)).
* Small change in log message in MergeTreeDataMergerMutator [#57550](https://github.com/ClickHouse/ClickHouse/pull/57550) ([Nikita Taranov](https://github.com/nickitat)).
* fs cache: small optimization [#57615](https://github.com/ClickHouse/ClickHouse/pull/57615) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Customizable dependency failure handling for AsyncLoader [#57697](https://github.com/ClickHouse/ClickHouse/pull/57697) ([Sergei Trifonov](https://github.com/serxa)).
* Bring test back [#57700](https://github.com/ClickHouse/ClickHouse/pull/57700) ([Nikita Taranov](https://github.com/nickitat)).
* Change default database name in clickhouse-local to 'default' [#57774](https://github.com/ClickHouse/ClickHouse/pull/57774) ([Kruglov Pavel](https://github.com/Avogar)).
* Add option `--show-whitespaces-in-diff` to clickhouse-test [#57870](https://github.com/ClickHouse/ClickHouse/pull/57870) ([vdimir](https://github.com/vdimir)).
* Update `query_masking_rules` when reloading the config, attempt 2 [#57993](https://github.com/ClickHouse/ClickHouse/pull/57993) ([Mikhail Koviazin](https://github.com/mkmkme)).
* Remove unneeded parameter `use_external_buffer` from `AsynchronousReadBuffer*` [#58077](https://github.com/ClickHouse/ClickHouse/pull/58077) ([Nikita Taranov](https://github.com/nickitat)).
* Print another message in Bugfix check if internal check had been failed [#58091](https://github.com/ClickHouse/ClickHouse/pull/58091) ([vdimir](https://github.com/vdimir)).
* Refactor StorageMerge virtual columns filtering. [#58255](https://github.com/ClickHouse/ClickHouse/pull/58255) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Analyzer: fix tuple comparison when result is always null [#58266](https://github.com/ClickHouse/ClickHouse/pull/58266) ([vdimir](https://github.com/vdimir)).
* Fix an error in the release script - it didn't allow to make 23.12. [#58288](https://github.com/ClickHouse/ClickHouse/pull/58288) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update version_date.tsv and changelogs after v23.12.1.1368-stable [#58290](https://github.com/ClickHouse/ClickHouse/pull/58290) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Fix test_storage_s3_queue/test.py::test_drop_table [#58293](https://github.com/ClickHouse/ClickHouse/pull/58293) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix timeout in 01732_race_condition_storage_join_long [#58298](https://github.com/ClickHouse/ClickHouse/pull/58298) ([vdimir](https://github.com/vdimir)).
* Handle another case for preprocessing in Keeper [#58308](https://github.com/ClickHouse/ClickHouse/pull/58308) ([Antonio Andelic](https://github.com/antonio2368)).
* Disable max_bytes_before_external* in 00172_hits_joins [#58309](https://github.com/ClickHouse/ClickHouse/pull/58309) ([vdimir](https://github.com/vdimir)).
* Analyzer: support functional arguments in USING clause [#58317](https://github.com/ClickHouse/ClickHouse/pull/58317) ([Dmitry Novik](https://github.com/novikd)).
* Fixed logical error in CheckSortedTransform [#58318](https://github.com/ClickHouse/ClickHouse/pull/58318) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Refreshable materialized views again [#58320](https://github.com/ClickHouse/ClickHouse/pull/58320) ([Michael Kolupaev](https://github.com/al13n321)).
* Organize symbols from src/* into DB namespace [#58336](https://github.com/ClickHouse/ClickHouse/pull/58336) ([Amos Bird](https://github.com/amosbird)).
* Add a style check against DOS and Windows [#58345](https://github.com/ClickHouse/ClickHouse/pull/58345) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Check what happen if remove array joined columns from KeyCondition [#58346](https://github.com/ClickHouse/ClickHouse/pull/58346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Upload time of the perf tests into artifacts as test_duration_ms [#58348](https://github.com/ClickHouse/ClickHouse/pull/58348) ([Azat Khuzhin](https://github.com/azat)).
* Keep exception format string in retries ctl [#58351](https://github.com/ClickHouse/ClickHouse/pull/58351) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix replication.lib helper (system.mutations has database not current_database) [#58352](https://github.com/ClickHouse/ClickHouse/pull/58352) ([Azat Khuzhin](https://github.com/azat)).
* Refactor StorageHDFS and StorageFile virtual columns filtering [#58353](https://github.com/ClickHouse/ClickHouse/pull/58353) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix suspended workers for AsyncLoader [#58362](https://github.com/ClickHouse/ClickHouse/pull/58362) ([Sergei Trifonov](https://github.com/serxa)).
* Remove stale events from README [#58364](https://github.com/ClickHouse/ClickHouse/pull/58364) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Do not fail the CI on an expired token [#58384](https://github.com/ClickHouse/ClickHouse/pull/58384) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add a test for [#38534](https://github.com/ClickHouse/ClickHouse/issues/38534) [#58391](https://github.com/ClickHouse/ClickHouse/pull/58391) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* fix database engine validation inside database factory [#58395](https://github.com/ClickHouse/ClickHouse/pull/58395) ([Bharat Nallan](https://github.com/bharatnc)).
* Fix bad formatting of the `timeDiff` compatibility alias [#58398](https://github.com/ClickHouse/ClickHouse/pull/58398) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix a comment; remove unused method; stop using pointers [#58399](https://github.com/ClickHouse/ClickHouse/pull/58399) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix test_user_valid_until [#58409](https://github.com/ClickHouse/ClickHouse/pull/58409) ([Nikolay Degterinsky](https://github.com/evillique)).
* Make a test not depend on the lack of floating point associativity [#58439](https://github.com/ClickHouse/ClickHouse/pull/58439) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `02944_dynamically_change_filesystem_cache_size` [#58445](https://github.com/ClickHouse/ClickHouse/pull/58445) ([Nikolay Degterinsky](https://github.com/evillique)).
* Analyzer: Fix LOGICAL_ERROR with LowCardinality [#58457](https://github.com/ClickHouse/ClickHouse/pull/58457) ([Dmitry Novik](https://github.com/novikd)).
* Replace `std::regex` by re2 [#58458](https://github.com/ClickHouse/ClickHouse/pull/58458) ([Robert Schulze](https://github.com/rschu1ze)).
* Improve perf tests [#58478](https://github.com/ClickHouse/ClickHouse/pull/58478) ([Raúl Marín](https://github.com/Algunenano)).
* Check if I can remove KeyCondition analysis on AST. [#58480](https://github.com/ClickHouse/ClickHouse/pull/58480) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix some thread pool settings not updating at runtime [#58485](https://github.com/ClickHouse/ClickHouse/pull/58485) ([Michael Kolupaev](https://github.com/al13n321)).
* Lower log levels for some Raft logs to new test level [#58487](https://github.com/ClickHouse/ClickHouse/pull/58487) ([Antonio Andelic](https://github.com/antonio2368)).
* PartsSplitter small refactoring [#58506](https://github.com/ClickHouse/ClickHouse/pull/58506) ([Maksim Kita](https://github.com/kitaisreal)).
* Sync content of the docker test images [#58507](https://github.com/ClickHouse/ClickHouse/pull/58507) ([Max K.](https://github.com/maxknv)).
* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)).
* Minor fixups for `sqid()` [#58517](https://github.com/ClickHouse/ClickHouse/pull/58517) ([Robert Schulze](https://github.com/rschu1ze)).
* Update version_date.tsv and changelogs after v23.12.2.59-stable [#58545](https://github.com/ClickHouse/ClickHouse/pull/58545) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.11.4.24-stable [#58546](https://github.com/ClickHouse/ClickHouse/pull/58546) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.8.9.54-lts [#58547](https://github.com/ClickHouse/ClickHouse/pull/58547) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.10.6.60-stable [#58548](https://github.com/ClickHouse/ClickHouse/pull/58548) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.3.19.32-lts [#58549](https://github.com/ClickHouse/ClickHouse/pull/58549) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update CHANGELOG.md [#58559](https://github.com/ClickHouse/ClickHouse/pull/58559) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Fix test 02932_kill_query_sleep [#58560](https://github.com/ClickHouse/ClickHouse/pull/58560) ([Vitaly Baranov](https://github.com/vitlibar)).
* CI fix. Add packager script to build digest [#58571](https://github.com/ClickHouse/ClickHouse/pull/58571) ([Max K.](https://github.com/maxknv)).
* fix and test that S3Clients are reused [#58573](https://github.com/ClickHouse/ClickHouse/pull/58573) ([Sema Checherinda](https://github.com/CheSema)).
* Follow-up to [#58482](https://github.com/ClickHouse/ClickHouse/issues/58482) [#58574](https://github.com/ClickHouse/ClickHouse/pull/58574) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Do not load database engines in suggest [#58586](https://github.com/ClickHouse/ClickHouse/pull/58586) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix wrong message in Keeper [#58588](https://github.com/ClickHouse/ClickHouse/pull/58588) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add some missing LLVM includes [#58594](https://github.com/ClickHouse/ClickHouse/pull/58594) ([Raúl Marín](https://github.com/Algunenano)).
* Small fix in Keeper [#58598](https://github.com/ClickHouse/ClickHouse/pull/58598) ([Antonio Andelic](https://github.com/antonio2368)).
* Update analyzer_tech_debt.txt [#58599](https://github.com/ClickHouse/ClickHouse/pull/58599) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Simplify release.py script [#58600](https://github.com/ClickHouse/ClickHouse/pull/58600) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update analyzer_tech_debt.txt [#58602](https://github.com/ClickHouse/ClickHouse/pull/58602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Refactor stacktrace symbolizer to avoid copy-paste [#58610](https://github.com/ClickHouse/ClickHouse/pull/58610) ([Azat Khuzhin](https://github.com/azat)).
* Add intel AMX checking [#58617](https://github.com/ClickHouse/ClickHouse/pull/58617) ([Roman Glinskikh](https://github.com/omgronny)).
* Optional `client` argument for `S3Helper` [#58619](https://github.com/ClickHouse/ClickHouse/pull/58619) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add sorting to 02366_kql_summarize.sql [#58621](https://github.com/ClickHouse/ClickHouse/pull/58621) ([Raúl Marín](https://github.com/Algunenano)).
* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Remove more projections code [#58628](https://github.com/ClickHouse/ClickHouse/pull/58628) ([Anton Popov](https://github.com/CurtizJ)).
* Remove finalize() from ~WriteBufferFromEncryptedFile [#58629](https://github.com/ClickHouse/ClickHouse/pull/58629) ([Vitaly Baranov](https://github.com/vitlibar)).
* Update test_replicated_database/test.py [#58647](https://github.com/ClickHouse/ClickHouse/pull/58647) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Try disabling `muzzy_decay_ms` in jemalloc [#58648](https://github.com/ClickHouse/ClickHouse/pull/58648) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix test_replicated_database::test_startup_without_zk flakiness [#58649](https://github.com/ClickHouse/ClickHouse/pull/58649) ([Azat Khuzhin](https://github.com/azat)).
* Fix 01600_remerge_sort_lowered_memory_bytes_ratio flakiness (due to settings randomization) [#58650](https://github.com/ClickHouse/ClickHouse/pull/58650) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer: Fix assertion in HashJoin with duplicate columns [#58652](https://github.com/ClickHouse/ClickHouse/pull/58652) ([vdimir](https://github.com/vdimir)).
* Document that `match()` can use `ngrambf_v1` and `tokenbf_v1` indexes [#58655](https://github.com/ClickHouse/ClickHouse/pull/58655) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix perf tests duration (checks.test_duration_ms) [#58656](https://github.com/ClickHouse/ClickHouse/pull/58656) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer: Correctly handle constant set in index [#58657](https://github.com/ClickHouse/ClickHouse/pull/58657) ([Dmitry Novik](https://github.com/novikd)).
* fix a typo in stress randomization setting [#58658](https://github.com/ClickHouse/ClickHouse/pull/58658) ([Sema Checherinda](https://github.com/CheSema)).
* Small follow-up to `std::regex` --> `re2` conversion ([#58458](https://github.com/ClickHouse/ClickHouse/issues/58458)) [#58678](https://github.com/ClickHouse/ClickHouse/pull/58678) ([Robert Schulze](https://github.com/rschu1ze)).
* Remove `<regex>` from libcxx [#58681](https://github.com/ClickHouse/ClickHouse/pull/58681) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix bad log message [#58698](https://github.com/ClickHouse/ClickHouse/pull/58698) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Some small improvements to version_helper from [#57203](https://github.com/ClickHouse/ClickHouse/issues/57203) [#58712](https://github.com/ClickHouse/ClickHouse/pull/58712) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Small fixes in different helpers [#58717](https://github.com/ClickHouse/ClickHouse/pull/58717) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix bug in new (not released yet) parallel replicas coordinator [#58722](https://github.com/ClickHouse/ClickHouse/pull/58722) ([Nikita Taranov](https://github.com/nickitat)).
* Analyzer: Fix LOGICAL_ERROR in CountDistinctPass [#58723](https://github.com/ClickHouse/ClickHouse/pull/58723) ([Dmitry Novik](https://github.com/novikd)).
* Fix reading of offsets subcolumn (`size0`) from `Nested` [#58729](https://github.com/ClickHouse/ClickHouse/pull/58729) ([Anton Popov](https://github.com/CurtizJ)).
* Fix Mac OS X [#58733](https://github.com/ClickHouse/ClickHouse/pull/58733) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* fix stress with generate-template-key [#58740](https://github.com/ClickHouse/ClickHouse/pull/58740) ([Sema Checherinda](https://github.com/CheSema)).
* more relaxed check [#58751](https://github.com/ClickHouse/ClickHouse/pull/58751) ([Sema Checherinda](https://github.com/CheSema)).
* Fix usage of small buffers for remote reading [#58768](https://github.com/ClickHouse/ClickHouse/pull/58768) ([Nikita Taranov](https://github.com/nickitat)).
* Add missing includes when _LIBCPP_REMOVE_TRANSITIVE_INCLUDES enabled [#58770](https://github.com/ClickHouse/ClickHouse/pull/58770) ([Artem Alperin](https://github.com/hdnpth)).
* Remove some code [#58772](https://github.com/ClickHouse/ClickHouse/pull/58772) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove some code [#58790](https://github.com/ClickHouse/ClickHouse/pull/58790) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix trash in performance tests [#58794](https://github.com/ClickHouse/ClickHouse/pull/58794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix data race in Keeper [#58806](https://github.com/ClickHouse/ClickHouse/pull/58806) ([Antonio Andelic](https://github.com/antonio2368)).
* Increase log level to trace to help debug `00993_system_parts_race_condition_drop_zookeeper` [#58809](https://github.com/ClickHouse/ClickHouse/pull/58809) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* DatabaseCatalog background tasks add log names [#58832](https://github.com/ClickHouse/ClickHouse/pull/58832) ([Maksim Kita](https://github.com/kitaisreal)).
* Analyzer: Resolve GROUPING function on shards [#58833](https://github.com/ClickHouse/ClickHouse/pull/58833) ([Dmitry Novik](https://github.com/novikd)).
* Allow parallel replicas for JOIN with analyzer [part 1]. [#58838](https://github.com/ClickHouse/ClickHouse/pull/58838) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix `isRetry` method [#58839](https://github.com/ClickHouse/ClickHouse/pull/58839) ([alesapin](https://github.com/alesapin)).
* fs cache: fix data race in slru [#58842](https://github.com/ClickHouse/ClickHouse/pull/58842) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix reading from an invisible part in new (not released yet) parallel replicas coordinator [#58844](https://github.com/ClickHouse/ClickHouse/pull/58844) ([Nikita Taranov](https://github.com/nickitat)).
* Fix bad log message [#58849](https://github.com/ClickHouse/ClickHouse/pull/58849) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Set max_bytes_before_external_group_by in 01961_roaring_memory_tracking [#58863](https://github.com/ClickHouse/ClickHouse/pull/58863) ([vdimir](https://github.com/vdimir)).
* Fix `00089_group_by_arrays_of_fixed` with external aggregation [#58873](https://github.com/ClickHouse/ClickHouse/pull/58873) ([Antonio Andelic](https://github.com/antonio2368)).
* DiskWeb minor improvement in loading [#58874](https://github.com/ClickHouse/ClickHouse/pull/58874) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix RPN construction for indexHint [#58875](https://github.com/ClickHouse/ClickHouse/pull/58875) ([Dmitry Novik](https://github.com/novikd)).
* Analyzer: add test with GROUP BY on shards [#58876](https://github.com/ClickHouse/ClickHouse/pull/58876) ([Dmitry Novik](https://github.com/novikd)).
* Jepsen job to reuse builds [#58881](https://github.com/ClickHouse/ClickHouse/pull/58881) ([Max K.](https://github.com/maxknv)).
* Fix ambiguity in the setting description [#58883](https://github.com/ClickHouse/ClickHouse/pull/58883) ([Denny Crane](https://github.com/den-crane)).
* Less error prone interface of read buffers [#58886](https://github.com/ClickHouse/ClickHouse/pull/58886) ([Anton Popov](https://github.com/CurtizJ)).
* Add metric for keeper memory soft limit [#58890](https://github.com/ClickHouse/ClickHouse/pull/58890) ([Pradeep Chhetri](https://github.com/chhetripradeep)).
* Add a test for [#47988](https://github.com/ClickHouse/ClickHouse/issues/47988) [#58893](https://github.com/ClickHouse/ClickHouse/pull/58893) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Whitespaces [#58894](https://github.com/ClickHouse/ClickHouse/pull/58894) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix data race in `AggregatingTransform` [#58896](https://github.com/ClickHouse/ClickHouse/pull/58896) ([Antonio Andelic](https://github.com/antonio2368)).
* Update SLRUFileCachePriority.cpp [#58898](https://github.com/ClickHouse/ClickHouse/pull/58898) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add tests for [#57193](https://github.com/ClickHouse/ClickHouse/issues/57193) [#58899](https://github.com/ClickHouse/ClickHouse/pull/58899) ([Raúl Marín](https://github.com/Algunenano)).
* Add log for already download binary in Jepsen [#58901](https://github.com/ClickHouse/ClickHouse/pull/58901) ([Antonio Andelic](https://github.com/antonio2368)).
* fs cache: minor refactoring [#58902](https://github.com/ClickHouse/ClickHouse/pull/58902) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Checking on flaky test_parallel_replicas_custom_key_failover [#58909](https://github.com/ClickHouse/ClickHouse/pull/58909) ([Igor Nikonov](https://github.com/devcrafter)).
* Style fix [#58913](https://github.com/ClickHouse/ClickHouse/pull/58913) ([Dmitry Novik](https://github.com/novikd)).
* Opentelemetry spans to analyze CPU and S3 bottlenecks on inserts [#58914](https://github.com/ClickHouse/ClickHouse/pull/58914) ([Alexander Gololobov](https://github.com/davenger)).
* Fix fault handler in case of thread (for fault handler) cannot be spawned [#58917](https://github.com/ClickHouse/ClickHouse/pull/58917) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer: Support GROUP BY injective function elimination [#58919](https://github.com/ClickHouse/ClickHouse/pull/58919) ([Dmitry Novik](https://github.com/novikd)).
* Cancel MasterCI in PRs [#58920](https://github.com/ClickHouse/ClickHouse/pull/58920) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix and test for azure [#58697](https://github.com/ClickHouse/ClickHouse/issues/58697) [#58921](https://github.com/ClickHouse/ClickHouse/pull/58921) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Extend performance test norm_dist.xml [#58922](https://github.com/ClickHouse/ClickHouse/pull/58922) ([Robert Schulze](https://github.com/rschu1ze)).
* Add regression test for parallel replicas (follow up [#58722](https://github.com/ClickHouse/ClickHouse/issues/58722), [#58844](https://github.com/ClickHouse/ClickHouse/issues/58844)) [#58923](https://github.com/ClickHouse/ClickHouse/pull/58923) ([Nikita Taranov](https://github.com/nickitat)).
* Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892) [#58927](https://github.com/ClickHouse/ClickHouse/pull/58927) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `FunctionToSubcolumnsPass` in debug build [#58930](https://github.com/ClickHouse/ClickHouse/pull/58930) ([Anton Popov](https://github.com/CurtizJ)).
* Call `getMaxFileDescriptorCount` once in Keeper [#58938](https://github.com/ClickHouse/ClickHouse/pull/58938) ([Antonio Andelic](https://github.com/antonio2368)).
* Add missing files to digests [#58942](https://github.com/ClickHouse/ClickHouse/pull/58942) ([Raúl Marín](https://github.com/Algunenano)).
* Analyzer: fix join column not found with compound identifiers [#58943](https://github.com/ClickHouse/ClickHouse/pull/58943) ([vdimir](https://github.com/vdimir)).
* CI: pr_info to provide event_type for job scripts [#58947](https://github.com/ClickHouse/ClickHouse/pull/58947) ([Max K.](https://github.com/maxknv)).
* Using the destination object for paths generation in S3copy. [#58949](https://github.com/ClickHouse/ClickHouse/pull/58949) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix data race in slru (2) [#58950](https://github.com/ClickHouse/ClickHouse/pull/58950) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix flaky test_postgresql_replica_database_engine_2/test.py::test_dependent_loading [#58951](https://github.com/ClickHouse/ClickHouse/pull/58951) ([Kseniia Sumarokova](https://github.com/kssenii)).
* More safe way to dump system logs in tests [#58955](https://github.com/ClickHouse/ClickHouse/pull/58955) ([alesapin](https://github.com/alesapin)).
* Add a comment about sparse checkout [#58960](https://github.com/ClickHouse/ClickHouse/pull/58960) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Follow up to [#58357](https://github.com/ClickHouse/ClickHouse/issues/58357) [#58963](https://github.com/ClickHouse/ClickHouse/pull/58963) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Better error message about tuples [#58971](https://github.com/ClickHouse/ClickHouse/pull/58971) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix timeout for prometheus exporter for HTTP/1.1 (due to keep-alive) [#58981](https://github.com/ClickHouse/ClickHouse/pull/58981) ([Azat Khuzhin](https://github.com/azat)).
* Fix 02891_array_shingles with analyzer [#58982](https://github.com/ClickHouse/ClickHouse/pull/58982) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix script name in SQL example in executable.md [#58984](https://github.com/ClickHouse/ClickHouse/pull/58984) ([Lino Uruñuela](https://github.com/Wachynaky)).
* Fix typo [#58986](https://github.com/ClickHouse/ClickHouse/pull/58986) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Revert flaky [#58992](https://github.com/ClickHouse/ClickHouse/pull/58992) ([Raúl Marín](https://github.com/Algunenano)).
* Revive: Parallel replicas custom key: skip unavailable replicas [#58993](https://github.com/ClickHouse/ClickHouse/pull/58993) ([Igor Nikonov](https://github.com/devcrafter)).
* Make performance test `test norm_dist.xml` more realistic [#58995](https://github.com/ClickHouse/ClickHouse/pull/58995) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix 02404_memory_bound_merging with analyzer (follow up [#56419](https://github.com/ClickHouse/ClickHouse/issues/56419)) [#58996](https://github.com/ClickHouse/ClickHouse/pull/58996) ([Nikita Taranov](https://github.com/nickitat)).
* Add test for [#58930](https://github.com/ClickHouse/ClickHouse/issues/58930) [#58999](https://github.com/ClickHouse/ClickHouse/pull/58999) ([Anton Popov](https://github.com/CurtizJ)).
* initialization ConnectionTimeouts [#59000](https://github.com/ClickHouse/ClickHouse/pull/59000) ([Sema Checherinda](https://github.com/CheSema)).
* DiskWeb fix loading [#59006](https://github.com/ClickHouse/ClickHouse/pull/59006) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Update log level for http buffer [#59008](https://github.com/ClickHouse/ClickHouse/pull/59008) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Change log level for super imporant message in Keeper [#59010](https://github.com/ClickHouse/ClickHouse/pull/59010) ([alesapin](https://github.com/alesapin)).
* Fix async loader stress test [#59011](https://github.com/ClickHouse/ClickHouse/pull/59011) ([Sergei Trifonov](https://github.com/serxa)).
* Remove `StaticResourceManager` [#59013](https://github.com/ClickHouse/ClickHouse/pull/59013) ([Sergei Trifonov](https://github.com/serxa)).
* preserve 'amz-sdk-invocation-id' and 'amz-sdk-request' headers with gcp [#59015](https://github.com/ClickHouse/ClickHouse/pull/59015) ([Sema Checherinda](https://github.com/CheSema)).
* Update rename.md [#59017](https://github.com/ClickHouse/ClickHouse/pull/59017) ([filimonov](https://github.com/filimonov)).
* очепятка [#59024](https://github.com/ClickHouse/ClickHouse/pull/59024) ([edpyt](https://github.com/edpyt)).
* Split resource scheduler off `IO/` into `Common/Scheduler/` [#59025](https://github.com/ClickHouse/ClickHouse/pull/59025) ([Sergei Trifonov](https://github.com/serxa)).
* Add a parameter for testing purposes [#59027](https://github.com/ClickHouse/ClickHouse/pull/59027) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix test 02932_kill_query_sleep when running with query cache [#59041](https://github.com/ClickHouse/ClickHouse/pull/59041) ([Vitaly Baranov](https://github.com/vitlibar)).
* CI: Jepsen: fix sanity check in ci.py [#59043](https://github.com/ClickHouse/ClickHouse/pull/59043) ([Max K.](https://github.com/maxknv)).
* CI: add ci_config classes for job and build names [#59046](https://github.com/ClickHouse/ClickHouse/pull/59046) ([Max K.](https://github.com/maxknv)).
* remove flaky test [#59066](https://github.com/ClickHouse/ClickHouse/pull/59066) ([Sema Checherinda](https://github.com/CheSema)).
* Followup to 57853 [#59068](https://github.com/ClickHouse/ClickHouse/pull/59068) ([Dmitry Novik](https://github.com/novikd)).
* Follow-up to [#59027](https://github.com/ClickHouse/ClickHouse/issues/59027) [#59075](https://github.com/ClickHouse/ClickHouse/pull/59075) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `test_parallel_replicas_invisible_parts` [#59077](https://github.com/ClickHouse/ClickHouse/pull/59077) ([Nikita Taranov](https://github.com/nickitat)).
* Increase max_bytes_before_external_group_by for 00165_jit_aggregate_functions [#59078](https://github.com/ClickHouse/ClickHouse/pull/59078) ([Raúl Marín](https://github.com/Algunenano)).
* Fix stateless/run.sh [#59079](https://github.com/ClickHouse/ClickHouse/pull/59079) ([Kseniia Sumarokova](https://github.com/kssenii)).
* CI: hot fix for reuse [#59081](https://github.com/ClickHouse/ClickHouse/pull/59081) ([Max K.](https://github.com/maxknv)).
* Fix server shutdown due to exception while loading metadata [#59083](https://github.com/ClickHouse/ClickHouse/pull/59083) ([Sergei Trifonov](https://github.com/serxa)).
* Coordinator returns ranges for reading in sorted order [#59089](https://github.com/ClickHouse/ClickHouse/pull/59089) ([Nikita Taranov](https://github.com/nickitat)).
* Raise timeout in 02294_decimal_second_errors [#59090](https://github.com/ClickHouse/ClickHouse/pull/59090) ([Raúl Marín](https://github.com/Algunenano)).
* Add `[[nodiscard]]` to a couple of methods [#59093](https://github.com/ClickHouse/ClickHouse/pull/59093) ([Nikita Taranov](https://github.com/nickitat)).
* Docs: Update integer and float aliases [#59100](https://github.com/ClickHouse/ClickHouse/pull/59100) ([Robert Schulze](https://github.com/rschu1ze)).
* Avoid election timeouts during startup in Keeper [#59102](https://github.com/ClickHouse/ClickHouse/pull/59102) ([Antonio Andelic](https://github.com/antonio2368)).
* Add missing setting max_estimated_execution_time in SettingsChangesHistory [#59104](https://github.com/ClickHouse/ClickHouse/pull/59104) ([Kruglov Pavel](https://github.com/Avogar)).
* Rename some inverted index test files [#59106](https://github.com/ClickHouse/ClickHouse/pull/59106) ([Robert Schulze](https://github.com/rschu1ze)).
* Further reduce runtime of `norm_distance.xml` [#59108](https://github.com/ClickHouse/ClickHouse/pull/59108) ([Robert Schulze](https://github.com/rschu1ze)).
* Minor follow-up to [#53710](https://github.com/ClickHouse/ClickHouse/issues/53710) [#59109](https://github.com/ClickHouse/ClickHouse/pull/59109) ([Robert Schulze](https://github.com/rschu1ze)).
* Update stateless/run.sh [#59116](https://github.com/ClickHouse/ClickHouse/pull/59116) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Followup 57875 [#59117](https://github.com/ClickHouse/ClickHouse/pull/59117) ([Dmitry Novik](https://github.com/novikd)).
* Fixing build [#59130](https://github.com/ClickHouse/ClickHouse/pull/59130) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Capability check for `s3_plain` [#59145](https://github.com/ClickHouse/ClickHouse/pull/59145) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix `02015_async_inserts_stress_long` [#59146](https://github.com/ClickHouse/ClickHouse/pull/59146) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix AggregateFunctionNothing result type issues introducing it with different names [#59147](https://github.com/ClickHouse/ClickHouse/pull/59147) ([vdimir](https://github.com/vdimir)).
* Fix url encoding issue [#59162](https://github.com/ClickHouse/ClickHouse/pull/59162) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Upgrade simdjson to v3.6.3 [#59166](https://github.com/ClickHouse/ClickHouse/pull/59166) ([Robert Schulze](https://github.com/rschu1ze)).
* Decrease log level for one log message [#59168](https://github.com/ClickHouse/ClickHouse/pull/59168) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix broken cache for non-existing temp_path [#59172](https://github.com/ClickHouse/ClickHouse/pull/59172) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Move some headers [#59175](https://github.com/ClickHouse/ClickHouse/pull/59175) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Analyzer: Fix CTE name clash resolution [#59177](https://github.com/ClickHouse/ClickHouse/pull/59177) ([Dmitry Novik](https://github.com/novikd)).
* Fix another place with special symbols in the URL [#59184](https://github.com/ClickHouse/ClickHouse/pull/59184) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Actions dag build filter actions refactoring [#59228](https://github.com/ClickHouse/ClickHouse/pull/59228) ([Maksim Kita](https://github.com/kitaisreal)).
* Minor cleanup of msan usage [#59229](https://github.com/ClickHouse/ClickHouse/pull/59229) ([Robert Schulze](https://github.com/rschu1ze)).
* Load server configs in clickhouse local [#59231](https://github.com/ClickHouse/ClickHouse/pull/59231) ([pufit](https://github.com/pufit)).
* Make libssh build dependent on `-DENABLE_LIBRARIES` [#59242](https://github.com/ClickHouse/ClickHouse/pull/59242) ([Robert Schulze](https://github.com/rschu1ze)).
* Disable copy constructor for MultiVersion [#59244](https://github.com/ClickHouse/ClickHouse/pull/59244) ([Vitaly Baranov](https://github.com/vitlibar)).
* CI: fix ci configuration for nightly job [#59252](https://github.com/ClickHouse/ClickHouse/pull/59252) ([Max K.](https://github.com/maxknv)).
* Fix 02475_bson_each_row_format flakiness (due to small parsing block) [#59253](https://github.com/ClickHouse/ClickHouse/pull/59253) ([Azat Khuzhin](https://github.com/azat)).
* Improve pytest --pdb experience by preserving dockerd on SIGINT (v2) [#59255](https://github.com/ClickHouse/ClickHouse/pull/59255) ([Azat Khuzhin](https://github.com/azat)).
* Fix fasttest by pinning pip dependencies [#59256](https://github.com/ClickHouse/ClickHouse/pull/59256) ([Azat Khuzhin](https://github.com/azat)).
* Added AtomicLogger [#59273](https://github.com/ClickHouse/ClickHouse/pull/59273) ([Maksim Kita](https://github.com/kitaisreal)).
* Update test_reload_after_fail_in_cache_dictionary for analyzer [#59274](https://github.com/ClickHouse/ClickHouse/pull/59274) ([vdimir](https://github.com/vdimir)).
* Update run.sh [#59280](https://github.com/ClickHouse/ClickHouse/pull/59280) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add missing setting optimize_injective_functions_in_group_by to SettingsChangesHistory [#59283](https://github.com/ClickHouse/ClickHouse/pull/59283) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix perf tests (after sumMap starts to filter out -0.) [#59287](https://github.com/ClickHouse/ClickHouse/pull/59287) ([Azat Khuzhin](https://github.com/azat)).
* Use fresh ZooKeeper client on DROP (to have higher chances on success) [#59288](https://github.com/ClickHouse/ClickHouse/pull/59288) ([Azat Khuzhin](https://github.com/azat)).
* Additional check [#59292](https://github.com/ClickHouse/ClickHouse/pull/59292) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* No debug symbols in Rust [#59306](https://github.com/ClickHouse/ClickHouse/pull/59306) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix deadlock in `AsyncLoader::stop()` [#59308](https://github.com/ClickHouse/ClickHouse/pull/59308) ([Sergei Trifonov](https://github.com/serxa)).
* Speed up `00165_jit_aggregate_functions` [#59312](https://github.com/ClickHouse/ClickHouse/pull/59312) ([Nikita Taranov](https://github.com/nickitat)).
* CI: WA for issue with perf test with artifact reuse [#59325](https://github.com/ClickHouse/ClickHouse/pull/59325) ([Max K.](https://github.com/maxknv)).
* Fix typo [#59329](https://github.com/ClickHouse/ClickHouse/pull/59329) ([Raúl Marín](https://github.com/Algunenano)).
* Simplify query_run_metric_arrays in perf tests [#59333](https://github.com/ClickHouse/ClickHouse/pull/59333) ([Raúl Marín](https://github.com/Algunenano)).
* IVolume constructor improve exception message [#59335](https://github.com/ClickHouse/ClickHouse/pull/59335) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix upgrade check for new setting [#59343](https://github.com/ClickHouse/ClickHouse/pull/59343) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix sccache when building without coverage [#59345](https://github.com/ClickHouse/ClickHouse/pull/59345) ([Raúl Marín](https://github.com/Algunenano)).
* Loggers initialization fix [#59347](https://github.com/ClickHouse/ClickHouse/pull/59347) ([Maksim Kita](https://github.com/kitaisreal)).
* Add setting update_insert_deduplication_token_in_dependent_materialized_views to settings changes history [#59349](https://github.com/ClickHouse/ClickHouse/pull/59349) ([Maksim Kita](https://github.com/kitaisreal)).
* Slightly better memory usage in `AsynchronousBoundedReadBuffer` [#59354](https://github.com/ClickHouse/ClickHouse/pull/59354) ([Anton Popov](https://github.com/CurtizJ)).
* Try to make variant tests a bit faster [#59355](https://github.com/ClickHouse/ClickHouse/pull/59355) ([Kruglov Pavel](https://github.com/Avogar)).
* Minor typos in Settings.h [#59371](https://github.com/ClickHouse/ClickHouse/pull/59371) ([Jordi Villar](https://github.com/jrdi)).
* Rename `quantileDDSketch` to `quantileDD` [#59372](https://github.com/ClickHouse/ClickHouse/pull/59372) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -0,0 +1,14 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.1.2.5-stable (b2605dd4a5a) FIXME as compared to v24.1.1.2048-stable (5a024dfc093)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)).

View File

@ -109,6 +109,9 @@ Do not check for a particular wording of error message, it may change in the fut
If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is run in CI in correct configurations, where the server is configured to support distributed queries.
### Working with Temporary Files
Sometimes in a shell test you may need to create a file on the fly to work with. Keep in mind that some CI checks run tests in parallel, so if you are creating or removing a temporary file in your script without a unique name this can cause some of the CI checks, such as Flaky, to fail. To get around this you should use environment variable `$CLICKHOUSE_TEST_UNIQUE_NAME` to give temporary files a name unique to the test that is running. That way you can be sure that the file you are creating during setup or removing during cleanup is the file only in use by that test and not some other test which is running in parallel.
## Known Bugs {#known-bugs}

View File

@ -0,0 +1,342 @@
---
slug: /en/getting-started/example-datasets/noaa
sidebar_label: NOAA Global Historical Climatology Network
sidebar_position: 1
description: 2.5 billion rows of climate data for the last 120 yrs
---
# NOAA Global Historical Climatology Network
This dataset contains weather measurements for the last 120 years. Each row is a measurement for a point in time and station.
More precisely and according to the [origin of this data](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn):
> GHCN-Daily is a dataset that contains daily observations over global land areas. It contains station-based measurements from land-based stations worldwide, about two-thirds of which are for precipitation measurements only (Menne et al., 2012). GHCN-Daily is a composite of climate records from numerous sources that were merged together and subjected to a common suite of quality assurance reviews (Durre et al., 2010). The archive includes the following meteorological elements:
- Daily maximum temperature
- Daily minimum temperature
- Temperature at the time of observation
- Precipitation (i.e., rain, melted snow)
- Snowfall
- Snow depth
- Other elements where available
## Downloading the data
- A [pre-prepared version](#pre-prepared-data) of the data for ClickHouse, which has been cleansed, re-structured, and enriched. This data covers the years 1900 to 2022.
- [Download the original data](#original-data) and convert to the format required by ClickHouse. Users wanting to add their own columns may wish to explore this approach.
### Pre-prepared data
More specifically, rows have been removed that did not fail any quality assurance checks by Noaa. The data has also been restructured from a measurement per line to a row per station id and date, i.e.
```csv
"station_id","date","tempAvg","tempMax","tempMin","precipitation","snowfall","snowDepth","percentDailySun","averageWindSpeed","maxWindSpeed","weatherType"
"AEM00041194","2022-07-30",347,0,308,0,0,0,0,0,0,0
"AEM00041194","2022-07-31",371,413,329,0,0,0,0,0,0,0
"AEM00041194","2022-08-01",384,427,357,0,0,0,0,0,0,0
"AEM00041194","2022-08-02",381,424,352,0,0,0,0,0,0,0
```
This is simpler to query and ensures the resulting table is less sparse. Finally, the data has also been enriched with latitude and longitude.
This data is available in the following S3 location. Either download the data to your local filesystem (and insert using the ClickHouse client) or insert directly into ClickHouse (see [Inserting from S3](#inserting-from-s3)).
To download:
```bash
wget https://datasets-documentation.s3.eu-west-3.amazonaws.com/noaa/noaa_enriched.parquet
```
### Original data
The following details the steps to download and transform the original data in preparation for loading into ClickHouse.
#### Download
To download the original data:
```bash
for i in {1900..2023}; do wget https://noaa-ghcn-pds.s3.amazonaws.com/csv.gz/${i}.csv.gz; done
```
#### Sampling the data
```bash
$ clickhouse-local --query "SELECT * FROM '2021.csv.gz' LIMIT 10" --format PrettyCompact
┌─c1──────────┬───────c2─┬─c3───┬──c4─┬─c5───┬─c6───┬─c7─┬───c8─┐
│ AE000041196 │ 20210101 │ TMAX │ 278 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AE000041196 │ 20210101 │ PRCP │ 0 │ D │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AE000041196 │ 20210101 │ TAVG │ 214 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041194 │ 20210101 │ TMAX │ 266 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041194 │ 20210101 │ TMIN │ 178 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041194 │ 20210101 │ PRCP │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041194 │ 20210101 │ TAVG │ 217 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041217 │ 20210101 │ TMAX │ 262 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041217 │ 20210101 │ TMIN │ 155 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
│ AEM00041217 │ 20210101 │ TAVG │ 202 │ H │ ᴺᵁᴸᴸ │ S │ ᴺᵁᴸᴸ │
└─────────────┴──────────┴──────┴─────┴──────┴──────┴────┴──────┘
```
Summarizing the [format documentation](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn):
Summarizing the format documentation and the columns in order:
- An 11 character station identification code. This itself encodes some useful information
- YEAR/MONTH/DAY = 8 character date in YYYYMMDD format (e.g. 19860529 = May 29, 1986)
- ELEMENT = 4 character indicator of element type. Effectively the measurement type. While there are many measurements available, we select the following:
- PRCP - Precipitation (tenths of mm)
- SNOW - Snowfall (mm)
- SNWD - Snow depth (mm)
- TMAX - Maximum temperature (tenths of degrees C)
- TAVG - Average temperature (tenths of a degree C)
- TMIN - Minimum temperature (tenths of degrees C)
- PSUN - Daily percent of possible sunshine (percent)
- AWND - Average daily wind speed (tenths of meters per second)
- WSFG - Peak gust wind speed (tenths of meters per second)
- WT** = Weather Type where ** defines the weather type. Full list of weather types here.
- DATA VALUE = 5 character data value for ELEMENT i.e. the value of the measurement.
- M-FLAG = 1 character Measurement Flag. This has 10 possible values. Some of these values indicate questionable data accuracy. We accept data where this is set to “P” - identified as missing presumed zero, as this is only relevant to the PRCP, SNOW and SNWD measurements.
- Q-FLAG is the measurement quality flag with 14 possible values. We are only interested in data with an empty value i.e. it did not fail any quality assurance checks.
- S-FLAG is the source flag for the observation. Not useful for our analysis and ignored.
- OBS-TIME = 4-character time of observation in hour-minute format (i.e. 0700 =7:00 am). Typically not present in older data. We ignore this for our purposes.
A measurement per line would result in a sparse table structure in ClickHouse. We should transform to a row per time and station, with measurements as columns. First, we limit the dataset to those rows without issues i.e. where `qFlag` is equal to an empty string.
#### Clean the data
Using [ClickHouse local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local) we can filter rows that represent measurements of interest and pass our quality requirements:
```bash
clickhouse local --query "SELECT count()
FROM file('*.csv.gz', CSV, 'station_id String, date String, measurement String, value Int64, mFlag String, qFlag String, sFlag String, obsTime String') WHERE qFlag = '' AND (measurement IN ('PRCP', 'SNOW', 'SNWD', 'TMAX', 'TAVG', 'TMIN', 'PSUN', 'AWND', 'WSFG') OR startsWith(measurement, 'WT'))"
2679264563
```
With over 2.6 billion rows, this isnt a fast query since it involves parsing all the files. On our 8 core machine, this takes around 160 seconds.
### Pivot data
While the measurement per line structure can be used with ClickHouse, it will unnecessarily complicate future queries. Ideally, we need a row per station id and date, where each measurement type and associated value are a column i.e.
```csv
"station_id","date","tempAvg","tempMax","tempMin","precipitation","snowfall","snowDepth","percentDailySun","averageWindSpeed","maxWindSpeed","weatherType"
"AEM00041194","2022-07-30",347,0,308,0,0,0,0,0,0,0
"AEM00041194","2022-07-31",371,413,329,0,0,0,0,0,0,0
"AEM00041194","2022-08-01",384,427,357,0,0,0,0,0,0,0
"AEM00041194","2022-08-02",381,424,352,0,0,0,0,0,0,0
```
Using ClickHouse local and a simple `GROUP BY`, we can repivot our data to this structure. To limit memory overhead, we do this one file at a time.
```bash
for i in {1900..2022}
do
clickhouse-local --query "SELECT station_id,
toDate32(date) as date,
anyIf(value, measurement = 'TAVG') as tempAvg,
anyIf(value, measurement = 'TMAX') as tempMax,
anyIf(value, measurement = 'TMIN') as tempMin,
anyIf(value, measurement = 'PRCP') as precipitation,
anyIf(value, measurement = 'SNOW') as snowfall,
anyIf(value, measurement = 'SNWD') as snowDepth,
anyIf(value, measurement = 'PSUN') as percentDailySun,
anyIf(value, measurement = 'AWND') as averageWindSpeed,
anyIf(value, measurement = 'WSFG') as maxWindSpeed,
toUInt8OrZero(replaceOne(anyIf(measurement, startsWith(measurement, 'WT') AND value = 1), 'WT', '')) as weatherType
FROM file('$i.csv.gz', CSV, 'station_id String, date String, measurement String, value Int64, mFlag String, qFlag String, sFlag String, obsTime String')
WHERE qFlag = '' AND (measurement IN ('PRCP', 'SNOW', 'SNWD', 'TMAX', 'TAVG', 'TMIN', 'PSUN', 'AWND', 'WSFG') OR startsWith(measurement, 'WT'))
GROUP BY station_id, date
ORDER BY station_id, date FORMAT CSV" >> "noaa.csv";
done
```
This query produces a single 50GB file `noaa.csv`.
### Enriching the data
The data has no indication of location aside from a station id, which includes a prefix country code. Ideally, each station would have a latitude and longitude associated with it. To achieve this, NOAA conveniently provides the details of each station as a separate [ghcnd-stations.txt](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn#format-of-ghcnd-stationstxt-file). This file has [several columns](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn#format-of-ghcnd-stationstxt-file), of which five are useful to our future analysis: id, latitude, longitude, elevation, and name.
```bash
wget http://noaa-ghcn-pds.s3.amazonaws.com/ghcnd-stations.txt
```
```bash
clickhouse local --query "WITH stations AS (SELECT id, lat, lon, elevation, splitByString(' GSN ',name)[1] as name FROM file('ghcnd-stations.txt', Regexp, 'id String, lat Float64, lon Float64, elevation Float32, name String'))
SELECT station_id,
date,
tempAvg,
tempMax,
tempMin,
precipitation,
snowfall,
snowDepth,
percentDailySun,
averageWindSpeed,
maxWindSpeed,
weatherType,
tuple(lon, lat) as location,
elevation,
name
FROM file('noaa.csv', CSV,
'station_id String, date Date32, tempAvg Int32, tempMax Int32, tempMin Int32, precipitation Int32, snowfall Int32, snowDepth Int32, percentDailySun Int8, averageWindSpeed Int32, maxWindSpeed Int32, weatherType UInt8') as noaa LEFT OUTER
JOIN stations ON noaa.station_id = stations.id INTO OUTFILE 'noaa_enriched.parquet' FORMAT Parquet SETTINGS format_regexp='^(.{11})\s+(\-?\d{1,2}\.\d{4})\s+(\-?\d{1,3}\.\d{1,4})\s+(\-?\d*\.\d*)\s+(.*)\s+(?:[\d]*)'"
```
This query takes a few minutes to run and produces a 6.4 GB file, `noaa_enriched.parquet`.
## Create table
Create a MergeTree table in ClickHouse (from the ClickHouse client).
```sql
CREATE TABLE noaa
(
`station_id` LowCardinality(String),
`date` Date32,
`tempAvg` Int32 COMMENT 'Average temperature (tenths of a degrees C)',
`tempMax` Int32 COMMENT 'Maximum temperature (tenths of degrees C)',
`tempMin` Int32 COMMENT 'Minimum temperature (tenths of degrees C)',
`precipitation` UInt32 COMMENT 'Precipitation (tenths of mm)',
`snowfall` UInt32 COMMENT 'Snowfall (mm)',
`snowDepth` UInt32 COMMENT 'Snow depth (mm)',
`percentDailySun` UInt8 COMMENT 'Daily percent of possible sunshine (percent)',
`averageWindSpeed` UInt32 COMMENT 'Average daily wind speed (tenths of meters per second)',
`maxWindSpeed` UInt32 COMMENT 'Peak gust wind speed (tenths of meters per second)',
`weatherType` Enum8('Normal' = 0, 'Fog' = 1, 'Heavy Fog' = 2, 'Thunder' = 3, 'Small Hail' = 4, 'Hail' = 5, 'Glaze' = 6, 'Dust/Ash' = 7, 'Smoke/Haze' = 8, 'Blowing/Drifting Snow' = 9, 'Tornado' = 10, 'High Winds' = 11, 'Blowing Spray' = 12, 'Mist' = 13, 'Drizzle' = 14, 'Freezing Drizzle' = 15, 'Rain' = 16, 'Freezing Rain' = 17, 'Snow' = 18, 'Unknown Precipitation' = 19, 'Ground Fog' = 21, 'Freezing Fog' = 22),
`location` Point,
`elevation` Float32,
`name` LowCardinality(String)
) ENGINE = MergeTree() ORDER BY (station_id, date);
```
## Inserting into ClickHouse
### Inserting from local file
Data can be inserted from a local file as follows (from the ClickHouse client):
```sql
INSERT INTO noaa FROM INFILE '<path>/noaa_enriched.parquet'
```
where `<path>` represents the full path to the local file on disk.
See [here](https://clickhouse.com/blog/real-world-data-noaa-climate-data#load-the-data) for how to speed this load up.
### Inserting from S3
```sql
INSERT INTO noaa SELECT *
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/noaa/noaa_enriched.parquet')
```
For how to speed this up, see our blog post on [tuning large data loads](https://clickhouse.com/blog/supercharge-your-clickhouse-data-loads-part2).
## Sample queries
### Highest temperature ever
```sql
SELECT
tempMax / 10 AS maxTemp,
location,
name,
date
FROM blogs.noaa
WHERE tempMax > 500
ORDER BY
tempMax DESC,
date ASC
LIMIT 5
┌─maxTemp─┬─location──────────┬─name───────────────────────────────────────────┬───────date─┐
│ 56.7 │ (-116.8667,36.45) │ CA GREENLAND RCH │ 1913-07-10 │
│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1949-08-20 │
│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1949-09-18 │
│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1952-07-17 │
│ 56.7 │ (-115.4667,32.55) │ MEXICALI (SMN) │ 1952-09-04 │
└─────────┴───────────────────┴────────────────────────────────────────────────┴────────────┘
5 rows in set. Elapsed: 0.514 sec. Processed 1.06 billion rows, 4.27 GB (2.06 billion rows/s., 8.29 GB/s.)
```
Reassuringly consistent with the [documented record](https://en.wikipedia.org/wiki/List_of_weather_records#Highest_temperatures_ever_recorded) at [Furnace Creek](https://www.google.com/maps/place/36%C2%B027'00.0%22N+116%C2%B052'00.1%22W/@36.1329666,-116.1104099,8.95z/data=!4m5!3m4!1s0x0:0xf2ed901b860f4446!8m2!3d36.45!4d-116.8667) as of 2023.
### Best ski resorts
Using a [list of ski resorts](https://gist.githubusercontent.com/gingerwizard/dd022f754fd128fdaf270e58fa052e35/raw/622e03c37460f17ef72907afe554cb1c07f91f23/ski_resort_stats.csv) in the united states and their respective locations, we join these against the top 1000 weather stations with the most in any month in the last 5 yrs. Sorting this join by [geoDistance](https://clickhouse.com/docs/en/sql-reference/functions/geo/coordinates/#geodistance) and restricting the results to those where the distance is less than 20km, we select the top result per resort and sort this by total snow. Note we also restrict resorts to those above 1800m, as a broad indicator of good skiing conditions.
```sql
SELECT
resort_name,
total_snow / 1000 AS total_snow_m,
resort_location,
month_year
FROM
(
WITH resorts AS
(
SELECT
resort_name,
state,
(lon, lat) AS resort_location,
'US' AS code
FROM url('https://gist.githubusercontent.com/gingerwizard/dd022f754fd128fdaf270e58fa052e35/raw/622e03c37460f17ef72907afe554cb1c07f91f23/ski_resort_stats.csv', CSVWithNames)
)
SELECT
resort_name,
highest_snow.station_id,
geoDistance(resort_location.1, resort_location.2, station_location.1, station_location.2) / 1000 AS distance_km,
highest_snow.total_snow,
resort_location,
station_location,
month_year
FROM
(
SELECT
sum(snowfall) AS total_snow,
station_id,
any(location) AS station_location,
month_year,
substring(station_id, 1, 2) AS code
FROM noaa
WHERE (date > '2017-01-01') AND (code = 'US') AND (elevation > 1800)
GROUP BY
station_id,
toYYYYMM(date) AS month_year
ORDER BY total_snow DESC
LIMIT 1000
) AS highest_snow
INNER JOIN resorts ON highest_snow.code = resorts.code
WHERE distance_km < 20
ORDER BY
resort_name ASC,
total_snow DESC
LIMIT 1 BY
resort_name,
station_id
)
ORDER BY total_snow DESC
LIMIT 5
┌─resort_name──────────┬─total_snow_m─┬─resort_location─┬─month_year─┐
│ Sugar Bowl, CA │ 7.799 │ (-120.3,39.27) │ 201902 │
│ Donner Ski Ranch, CA │ 7.799 │ (-120.34,39.31) │ 201902 │
│ Boreal, CA │ 7.799 │ (-120.35,39.33) │ 201902 │
│ Homewood, CA │ 4.926 │ (-120.17,39.08) │ 201902 │
│ Alpine Meadows, CA │ 4.926 │ (-120.22,39.17) │ 201902 │
└──────────────────────┴──────────────┴─────────────────┴────────────┘
5 rows in set. Elapsed: 0.750 sec. Processed 689.10 million rows, 3.20 GB (918.20 million rows/s., 4.26 GB/s.)
Peak memory usage: 67.66 MiB.
```
## Credits
We would like to acknowledge the efforts of the Global Historical Climatology Network for preparing, cleansing, and distributing this data. We appreciate your efforts.
Menne, M.J., I. Durre, B. Korzeniewski, S. McNeal, K. Thomas, X. Yin, S. Anthony, R. Ray, R.S. Vose, B.E.Gleason, and T.G. Houston, 2012: Global Historical Climatology Network - Daily (GHCN-Daily), Version 3. [indicate subset used following decimal, e.g. Version 3.25]. NOAA National Centers for Environmental Information. http://doi.org/10.7289/V5D21VHZ [17/08/2020]

View File

@ -197,6 +197,29 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section).
## Aliases {#cli_aliases}
- `\l` - SHOW DATABASES
- `\d` - SHOW TABLES
- `\c <DATABASE>` - USE DATABASE
- `.` - repeat the last query
## Shortkeys {#shortkeys_aliases}
- `Alt (Option) + Shift + e` - open editor with current query. It is possible to set up an environment variable - `EDITOR`, by default vim is used.
- `Alt (Option) + #` - comment line.
- `Ctrl + r` - fuzzy history search.
:::tip
To configure the correct work of meta key (Option) on MacOS:
iTerm2: Go to Preferences -> Profile -> Keys -> Left Option key and click Esc+
:::
The full list with all available shortkeys - [replxx](https://github.com/AmokHuginnsson/replxx/blob/1f149bf/src/replxx_impl.cxx#L262).
## Connection string {#connection_string}
clickhouse-client alternatively supports connecting to clickhouse server using a connection string similar to [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/), [PostgreSQL](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [MySQL](https://dev.mysql.com/doc/refman/8.0/en/connecting-using-uri-or-key-value-pairs.html#connecting-using-uri). It has the following syntax:

View File

@ -163,7 +163,7 @@ key: value
Corresponding XML:
``` xml
<key>value</value>
<key>value</key>
```
A nested XML node is represented by a YAML map:

View File

@ -1922,7 +1922,7 @@ Possible values:
- Positive integer.
- 0 — Asynchronous insertions are disabled.
Default value: `100000`.
Default value: `1000000`.
### async_insert_max_query_number {#async-insert-max-query-number}
@ -1935,7 +1935,7 @@ Possible values:
Default value: `450`.
### async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms}
### async_insert_busy_timeout_max_ms {#async-insert-busy-timeout-max-ms}
The maximum timeout in milliseconds since the first `INSERT` query before inserting collected data.
@ -1946,6 +1946,61 @@ Possible values:
Default value: `200`.
### async_insert_poll_timeout_ms {#async-insert-poll-timeout-ms}
Timeout in milliseconds for polling data from asynchronous insert queue.
Possible values:
- Positive integer.
Default value: `10`.
### async_insert_use_adaptive_busy_timeout {#allow-experimental-async-insert-adaptive-busy-timeout}
Use adaptive asynchronous insert timeout.
Possible values:
- 0 - Disabled.
- 1 - Enabled.
Default value: `0`.
### async_insert_busy_timeout_min_ms {#async-insert-busy-timeout-min-ms}
If adaptive asynchronous insert timeout is allowed through [async_insert_use_adaptive_busy_timeout](#allow-experimental-async-insert-adaptive-busy-timeout), the setting specifies the minimum value of the asynchronous insert timeout in milliseconds. It also serves as the initial value, which may be increased later by the adaptive algorithm, up to the [async_insert_busy_timeout_ms](#async_insert_busy_timeout_ms).
Possible values:
- Positive integer.
Default value: `50`.
### async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms}
Alias for [`async_insert_busy_timeout_max_ms`](#async_insert_busy_timeout_max_ms).
### async_insert_busy_timeout_increase_rate {#async-insert-busy-timeout-increase-rate}
If adaptive asynchronous insert timeout is allowed through [async_insert_use_adaptive_busy_timeout](#allow-experimental-async-insert-adaptive-busy-timeout), the setting specifies the exponential growth rate at which the adaptive asynchronous insert timeout increases.
Possible values:
- A positive floating-point number.
Default value: `0.2`.
### async_insert_busy_timeout_decrease_rate {#async-insert-busy-timeout-decrease-rate}
If adaptive asynchronous insert timeout is allowed through [async_insert_use_adaptive_busy_timeout](#allow-experimental-async-insert-adaptive-busy-timeout), the setting specifies the exponential growth rate at which the adaptive asynchronous insert timeout decreases.
Possible values:
- A positive floating-point number.
Default value: `0.2`.
### async_insert_stale_timeout_ms {#async-insert-stale-timeout-ms}
The maximum timeout in milliseconds since the last `INSERT` query before dumping collected data. If enabled, the settings prolongs the [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) with every `INSERT` query as long as [async_insert_max_data_size](#async-insert-max-data-size) is not exceeded.
@ -2040,6 +2095,32 @@ SELECT * FROM test_table
└───┘
```
## update_insert_deduplication_token_in_dependent_materialized_views {#update-insert-deduplication-token-in-dependent-materialized-views}
Allows to update `insert_deduplication_token` with table identifier during insert in dependent materialized views, if setting `deduplicate_blocks_in_dependent_materialized_views` is enabled and `insert_deduplication_token` is set.
Possible values:
0 — Disabled.
1 — Enabled.
Default value: 0.
Usage:
If setting `deduplicate_blocks_in_dependent_materialized_views` is enabled, `insert_deduplication_token` is passed to dependent materialized views. But in complex INSERT flows it is possible that we want to avoid deduplication for dependent materialized views.
Example:
```
landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1
| |
└--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘
```
In this example we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will be inserted into `ds_2_1`. Without `update_insert_deduplication_token_in_dependent_materialized_views` setting enabled, those two different blocks will be deduplicated, because different blocks from `mv_2_1` and `mv_2_2` will have the same `insert_deduplication_token`.
If setting `update_insert_deduplication_token_in_dependent_materialized_views` is enabled, during each insert into dependent materialized views `insert_deduplication_token` is updated with table identifier, so block from `mv_2_1` and block from `mv_2_2` will have different `insert_deduplication_token` and will not be deduplicated.
## insert_keeper_max_retries
The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeeper) requests during insert into replicated MergeTree. Only Keeper requests which failed due to network error, Keeper session timeout, or request timeout are considered for retries.
@ -5165,7 +5246,7 @@ SETTINGS(dictionary_use_async_executor=1, max_threads=8);
## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key}
When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files.
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
Default value: `false`.
@ -5176,6 +5257,95 @@ When set to `false` than all attempts are made with identical timeouts.
Default value: `true`.
## allow_experimental_variant_type {#allow_experimental_variant_type}
Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md).
Default value: `false`.
## use_variant_as_common_type {#use_variant_as_common_type}
Allows to use `Variant` type as a result type for [if](../../sql-reference/functions/conditional-functions.md/#if)/[multiIf](../../sql-reference/functions/conditional-functions.md/#multiif)/[array](../../sql-reference/functions/array-functions.md)/[map](../../sql-reference/functions/tuple-map-functions.md) functions when there is no common type for argument types.
Example:
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(if(number % 2, number, range(number))) as variant_type FROM numbers(1);
SELECT if(number % 2, number, range(number)) as variant FROM numbers(5);
```
```text
┌─variant_type───────────────────┐
│ Variant(Array(UInt64), UInt64) │
└────────────────────────────────┘
┌─variant───┐
│ [] │
│ 1 │
│ [0,1] │
│ 3 │
│ [0,1,2,3] │
└───────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL)) AS variant_type FROM numbers(1);
SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4);
```
```text
─variant_type─────────────────────────┐
│ Variant(Array(UInt8), String, UInt8) │
└──────────────────────────────────────┘
┌─variant───────┐
│ 42 │
│ [1,2,3] │
│ Hello, World! │
│ ᴺᵁᴸᴸ │
└───────────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(array(range(number), number, 'str_' || toString(number))) as array_of_variants_type from numbers(1);
SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3);
```
```text
┌─array_of_variants_type────────────────────────┐
│ Array(Variant(Array(UInt64), String, UInt64)) │
└───────────────────────────────────────────────┘
┌─array_of_variants─┐
│ [[],0,'str_0'] │
│ [[0],1,'str_1'] │
│ [[0,1],2,'str_2'] │
└───────────────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(map('a', range(number), 'b', number, 'c', 'str_' || toString(number))) as map_of_variants_type from numbers(1);
SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3);
```
```text
┌─map_of_variants_type────────────────────────────────┐
│ Map(String, Variant(Array(UInt64), String, UInt64)) │
└─────────────────────────────────────────────────────┘
┌─map_of_variants───────────────┐
│ {'a':[],'b':0,'c':'str_0'} │
│ {'a':[0],'b':1,'c':'str_1'} │
│ {'a':[0,1],'b':2,'c':'str_2'} │
└───────────────────────────────┘
```
Default value: `false`.
## max_partition_size_to_drop
Restriction on dropping partitions in query time.
@ -5206,4 +5376,4 @@ Allow to ignore schema evolution in Iceberg table engine and read all data using
Enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema.
:::
Default value: 'false'.
Default value: 'false'.

View File

@ -287,7 +287,7 @@ Number of threads in the HashedDictionary thread pool running a task.
### IOPrefetchThreads
Number of threads in the IO prefertch thread pool.
Number of threads in the IO prefetch thread pool.
### IOPrefetchThreadsActive

View File

@ -25,6 +25,8 @@ Columns:
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions.
- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — The total count of sequential authentication failures. If the user entered the correct password before exceed `failed_sequential_authentications` threshold then the counter will be reset.
- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Maximum count of sequential authentication failures.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time).
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time.

View File

@ -28,8 +28,10 @@ Columns:
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time).
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time.
- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total count of sequential authentication failures. If the user entered the correct password before exceed `failed_sequential_authentications` threshold then the counter will be reset.
- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum count of sequential authentication failures.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time).
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Maximum of query execution time.
## See Also {#see-also}

View File

@ -45,11 +45,11 @@ clickhouse-benchmark [keys] < queries_file;
- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1.
- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1.
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys.
- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys.
- `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever).
- `-r`, `--randomize` — Random order of queries execution if there is more than one input query.
- `-s`, `--secure` — Using `TLS` connection.
- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled).
- `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `--port` keys.
- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Students t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions arent different with the selected level of confidence.
- `--cumulative` — Printing cumulative data instead of data per interval.
- `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`.

View File

@ -0,0 +1,48 @@
---
toc_priority: 112
---
# groupArraySorted {#groupArraySorted}
Returns an array with the first N items in ascending order.
``` sql
groupArraySorted(N)(column)
```
**Arguments**
- `N` The number of elements to return.
If the parameter is omitted, default value is the size of input.
- `column` The value (Integer, String, Float and other Generic types).
**Example**
Gets the first 10 numbers:
``` sql
SELECT groupArraySorted(10)(number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number)─┐
│ [0,1,2,3,4,5,6,7,8,9] │
└──────────────────────────────┘
```
Gets all the String implementations of all numbers in column:
``` sql
SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5));
```
``` text
┌─groupArraySorted(str)────────┐
│ ['0','1','2','3','4'] │
└──────────────────────────────┘
```

View File

@ -54,6 +54,7 @@ ClickHouse-specific aggregate functions:
- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
- [groupArraySample](./grouparraysample.md)
- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md)
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)
@ -88,7 +89,7 @@ ClickHouse-specific aggregate functions:
- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
- [quantileDDSketch](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch)
- [quantileDD](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch)
- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md)
- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
@ -105,4 +106,3 @@ ClickHouse-specific aggregate functions:
- [sparkBar](./sparkbar.md)
- [sumCount](./sumcount.md)
- [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md)

View File

@ -18,7 +18,7 @@ Functions:
- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest).
- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted).
- `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16).
- `medianDDSketch` — Alias for [quantileDDSketch](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch).
- `medianDD` — Alias for [quantileDD](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch).
**Example**

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch
sidebar_position: 211
title: quantileDDSketch
title: quantileDD
---
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DDSketch](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf).
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DD](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf).
**Syntax**
@ -44,13 +44,13 @@ Input table has an integer and a float columns:
Query to calculate 0.75-quantile (third quartile):
``` sql
SELECT quantileDDSketch(0.01, 0.75)(a), quantileDDSketch(0.01, 0.75)(b) FROM example_table;
SELECT quantileDD(0.01, 0.75)(a), quantileDD(0.01, 0.75)(b) FROM example_table;
```
Result:
``` text
┌─quantileDDSketch(0.01, 0.75)(a)─┬─quantileDDSketch(0.01, 0.75)(b)─┐
┌─quantileDD(0.01, 0.75)(a)─┬─quantileDD(0.01, 0.75)(b)─┐
│ 2.974233423476717 │ 1.01 │
└─────────────────────────────────┴─────────────────────────────────┘
```

View File

@ -9,7 +9,7 @@ sidebar_position: 201
Syntax: `quantiles(level1, level2, …)(x)`
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDDSketch`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
## quantilesExactExclusive

View File

@ -7,7 +7,7 @@ sidebar_label: JSON
# JSON
:::note
This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead.
This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead.
:::
Stores JavaScript Object Notation (JSON) documents in a single column.
@ -15,7 +15,8 @@ Stores JavaScript Object Notation (JSON) documents in a single column.
`JSON` is an alias for `Object('json')`.
:::note
The JSON data type is an experimental feature. To use it, set `allow_experimental_object_type = 1`.
The JSON data type is an obsolete feature. Do not use it.
If you want to use it, set `allow_experimental_object_type = 1`.
:::
## Example

View File

@ -0,0 +1,274 @@
---
slug: /en/sql-reference/data-types/json
sidebar_position: 55
sidebar_label: Variant
---
# Variant(T1, T2, T3, ...)
This type represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type
has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value).
The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1).
Nested types can be arbitrary types except Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types.
:::note
The Variant data type is an experimental feature. To use it, set `allow_experimental_variant_type = 1`.
:::
## Creating Variant
Using `Variant` type in table column definition:
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT v FROM test;
```
```text
┌─v─────────────┐
│ ᴺᵁᴸᴸ │
│ 42 │
│ Hello, World! │
│ [1,2,3] │
└───────────────┘
```
Using CAST from ordinary columns:
```sql
SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant;
```
```text
┌─type_name──────────────────────────────┬─variant───────┐
│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │
└────────────────────────────────────────┴───────────────┘
```
Using functions `if/multiIf` when arguments don't have common type (setting `use_variant_as_common_type` should be enabled for it):
```sql
SET use_variant_as_common_type = 1;
SELECT if(number % 2, number, range(number)) as variant FROM numbers(5);
```
```text
┌─variant───┐
│ [] │
│ 1 │
│ [0,1] │
│ 3 │
│ [0,1,2,3] │
└───────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4);
```
```text
┌─variant───────┐
│ 42 │
│ [1,2,3] │
│ Hello, World! │
│ ᴺᵁᴸᴸ │
└───────────────┘
```
Using functions 'array/map' if array elements/map values don't have common type (setting `use_variant_as_common_type` should be enabled for it):
```sql
SET use_variant_as_common_type = 1;
SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3);
```
```text
┌─array_of_variants─┐
│ [[],0,'str_0'] │
│ [[0],1,'str_1'] │
│ [[0,1],2,'str_2'] │
└───────────────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3);
```
```text
┌─map_of_variants───────────────┐
│ {'a':[],'b':0,'c':'str_0'} │
│ {'a':[0],'b':1,'c':'str_1'} │
│ {'a':[0,1],'b':2,'c':'str_2'} │
└───────────────────────────────┘
```
## Reading Variant nested types as subcolumns
Variant type supports reading a single nested type from a Variant column using the type name as a subcolumn.
So, if you have column `variant Variant(T1, T2, T3)` you can read a subcolumn of type `T2` using syntax `variant.T2`,
this subcolumn will have type `Nullable(T2)` if `T2` can be inside `Nullable` and `T2` otherwise. This subcolumn will
be the same size as original `Variant` column and will contain `NULL` values (or empty values if `T2` cannot be inside `Nullable`)
in all rows in which original `Variant` column doesn't have type `T2`.
Variant subcolumns can be also read using function `variantElement(variant_column, type_name)`.
Examples:
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT v, v.String, v.UInt64, v.`Array(UInt64)` FROM test;
```
```text
┌─v─────────────┬─v.String──────┬─v.UInt64─┬─v.Array(UInt64)─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└───────────────┴───────────────┴──────────┴─────────────────┘
```
```sql
SELECT toTypeName(v.String), toTypeName(v.UInt64), toTypeName(v.`Array(UInt64)`) FROM test LIMIT 1;
```
```text
┌─toTypeName(v.String)─┬─toTypeName(v.UInt64)─┬─toTypeName(v.Array(UInt64))─┐
│ Nullable(String) │ Nullable(UInt64) │ Array(UInt64) │
└──────────────────────┴──────────────────────┴─────────────────────────────┘
```
```sql
SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test;
```
```text
┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘
```
To know what variant is stored in each row function `variantType(variant_column)` can be used. It returns `Enum` with variant type name for each row (or `'None'` if row is `NULL`).
Example:
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT variantType(v) from test;
```
```text
┌─variantType(v)─┐
│ None │
│ UInt64 │
│ String │
│ Array(UInt64) │
└────────────────┘
```
```sql
SELECT toTypeName(variantType(v)) FROM test LIMIT 1;
```
```text
┌─toTypeName(variantType(v))──────────────────────────────────────────┐
│ Enum8('None' = -1, 'Array(UInt64)' = 0, 'String' = 1, 'UInt64' = 2) │
└─────────────────────────────────────────────────────────────────────┘
```
## Conversion between Variant column and other columns
There are 3 possible conversions that can be performed with Variant column.
### Converting an ordinary column to a Variant column
It is possible to convert ordinary column with type `T` to a `Variant` column containing this type:
```sql
SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant;
```
```text
┌─type_name──────────────────────────────┬─variant───────┐
│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │
└────────────────────────────────────────┴───────────────┘
```
### Converting a Variant column to an ordinary column
It is possible to convert a `Variant` column to an ordinary column. In this case all nested variants will be converted to a destination type:
```sql
CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('42.42');
SELECT v::Nullable(Float64) FROM test;
```
```text
┌─CAST(v, 'Nullable(Float64)')─┐
│ ᴺᵁᴸᴸ │
│ 42 │
│ 42.42 │
└──────────────────────────────┘
```
### Converting a Variant to another Variant
It is possible to convert a `Variant` column to another `Variant` column, but only if the destination `Variant` column contains all nested types from the original `Variant`:
```sql
CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('String');
SELECT v::Variant(UInt64, String, Array(UInt64)) FROM test;
```
```text
┌─CAST(v, 'Variant(UInt64, String, Array(UInt64))')─┐
│ ᴺᵁᴸᴸ │
│ 42 │
│ String │
└───────────────────────────────────────────────────┘
```
## Reading Variant type from the data
All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Variant` type. During data parsing ClickHouse tries to insert value into most appropriate variant type.
Example:
```sql
SELECT
v,
variantElement(v, 'String') AS str,
variantElement(v, 'UInt64') AS num,
variantElement(v, 'Float64') AS float,
variantElement(v, 'DateTime') AS date,
variantElement(v, 'Array(UInt64)') AS arr
FROM format(JSONEachRow, 'v Variant(String, UInt64, Float64, DateTime, Array(UInt64))', $$
{"v" : "Hello, World!"},
{"v" : 42},
{"v" : 42.42},
{"v" : "2020-01-01 00:00:00"},
{"v" : [1, 2, 3]}
$$)
```
```text
┌─v───────────────────┬─str───────────┬──num─┬─float─┬────────────────date─┬─arr─────┐
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42.42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │
│ 2020-01-01 00:00:00 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-01 00:00:00 │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└─────────────────────┴───────────────┴──────┴───────┴─────────────────────┴─────────┘
```

View File

@ -2832,6 +2832,88 @@ Result:
└─────────────────────────────────────────────────────────────────────────┘
```
## variantElement
Extracts a column with specified type from a `Variant` column.
**Syntax**
```sql
variantElement(variant, type_name, [, default_value])
```
**Arguments**
- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md).
- `type_name` — The name of the variant type to extract. [String](../../sql-reference/data-types/string.md).
- `default_value` - The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional.
**Returned value**
- Subcolumn of a `Variant` column with specified type.
**Example**
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test;
```
```text
┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘
```
## variantType
Returns the variant type name for each row of `Variant` column. If row contains NULL, it returns `'None'` for it.
**Syntax**
```sql
variantType(variant)
```
**Arguments**
- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md).
**Returned value**
- Enum8 column with variant type name for each row.
**Example**
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT variantType(v) FROM test;
```
```text
┌─variantType(v)─┐
│ None │
│ UInt64 │
│ String │
│ Array(UInt64) │
└────────────────┘
```
```sql
SELECT toTypeName(variantType(v)) FROM test LIMIT 1;
```
```text
┌─toTypeName(variantType(v))──────────────────────────────────────────┐
│ Enum8('None' = -1, 'Array(UInt64)' = 0, 'String' = 1, 'UInt64' = 2) │
└─────────────────────────────────────────────────────────────────────┘
```
## minSampleSizeConversion
Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples.

View File

@ -515,7 +515,7 @@ Alias: `concat_ws`
**Arguments**
- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
- exprN — expression to be concatenated. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
- exprN — expression to be concatenated. Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.
**Returned values**

View File

@ -77,8 +77,8 @@ The number of data points in `series` should be at least twice the value of `per
**Returned value**
- An array of three arrays where the first array include seasonal components, the second array - trend,
and the third array - residue component.
- An array of four arrays where the first array include seasonal components, the second array - trend,
the third array - residue component, and the fourth array - baseline(seasonal + trend) component.
Type: [Array](../../sql-reference/data-types/array.md).
@ -107,6 +107,10 @@ Result:
[
0, 0.0000019073486, -0.0000019073486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0000019073486, 0,
0
],
[
10.1, 20.449999, 40.340004, 10.100001, 20.45, 40.34, 10.100001, 20.45, 40.34, 10.1, 20.45, 40.34,
10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.100002, 20.45, 40.34
]] │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```

View File

@ -112,7 +112,7 @@ Note that:
For the query to run successfully, the following conditions must be met:
- Both tables must have the same structure.
- Both tables must have the same partition key, the same order by key and the same primary key.
- Both tables must have the same order by key and the same primary key.
- Both tables must have the same indices and projections.
- Both tables must have the same storage policy.

View File

@ -21,7 +21,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).

View File

@ -26,8 +26,11 @@ slug: /ru/operations/system-tables/quota_usage
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Общее количество неудачных попыток подряд ввести пароль. Если пользователь ввел верный пароль до преодоления порогового значения `max_failed_sequential_authentications` то счетчик неудачных попыток будет сброшен.
- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное количество неудачных попыток подряд ввести пароль.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also}

View File

@ -29,9 +29,10 @@ slug: /ru/operations/system-tables/quotas_usage
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Общее количество неудачных попыток подряд ввести пароль. Если пользователь ввел верный пароль до преодоления порогового значения `max_failed_sequential_authentications` то счетчик неудачных попыток будет сброшен.
- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Максимальное количество неудачных попыток подряд ввести пароль.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -22,7 +22,7 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).

View File

@ -20,7 +20,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
```
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).

View File

@ -70,6 +70,19 @@
if (params.has('password')) { password = params.get('password'); }
}
let url = `${host}?allow_introspection_functions=1`;
if (add_http_cors_header) {
url += '&add_http_cors_header=1';
}
if (user) {
url += `&user=${encodeURIComponent(user)}`;
}
if (password) {
url += `&password=${encodeURIComponent(password)}`;
}
let map = L.map('space', {
crs: L.CRS.Simple,
center: [-512, 512],
@ -103,24 +116,11 @@
const key = `${coords.z}-${coords.x}-${coords.y}`;
let buf = cached_tiles[key];
if (!buf) {
let url = `${host}?default_format=RowBinary&allow_introspection_functions=1`;
let request_url = `${url}&default_format=RowBinary` +
`&param_z=${coords.z}&param_x=${coords.x}&param_y=${coords.y}` +
`&enable_http_compression=1&network_compression_method=zstd&network_zstd_compression_level=6`;
if (add_http_cors_header) {
// For debug purposes, you may set add_http_cors_header from a browser console
url += '&add_http_cors_header=1';
}
if (user) {
url += `&user=${encodeURIComponent(user)}`;
}
if (password) {
url += `&password=${encodeURIComponent(password)}`;
}
url += `&param_z=${coords.z}&param_x=${coords.x}&param_y=${coords.y}`;
url += `&enable_http_compression=1&network_compression_method=zstd&network_zstd_compression_level=6`;
const response = await fetch(url, { method: 'POST', body: sql });
const response = await fetch(request_url, { method: 'POST', body: sql });
if (!response.ok) {
const text = await response.text();
@ -238,7 +238,7 @@
const addr_hex = '0x' + addr_int.toString(16);
const response = fetch(
`http://localhost:8123/?default_format=JSON`,
`${url}&default_format=JSON`,
{
method: 'POST',
body: `SELECT encodeXMLComponent(demangle(addressToSymbol(${addr_int}::UInt64))) AS name,

View File

@ -5,6 +5,7 @@
<title>ClickHouse Dashboard</title>
<link rel="icon" href="">
<script src="https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.iife.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/lz-string@1.5.0/libs/lz-string.min.js"></script>
<style>
:root {
--color: black;
@ -484,7 +485,6 @@
*
* TODO:
* - zoom on the graphs should work on touch devices;
* - compress the state for URL's #hash;
* - footer with "about" or a link to source code;
* - allow to configure a table on a server to save the dashboards;
* - if a query returned one value, display this value instead of a diagram;
@ -1384,7 +1384,7 @@ document.getElementById('params').onsubmit = function(event) {
function saveState() {
const state = { host, user, queries, params, search_query, customized };
history.pushState(state, '',
window.location.pathname + (window.location.search || '') + '#' + btoa(JSON.stringify(state)));
window.location.pathname + (window.location.search || '') + '#' + LZString.compressToEncodedURIComponent(JSON.stringify(state)));
}
async function searchQueries() {
@ -1450,7 +1450,13 @@ window.onpopstate = function(event) {
if (window.location.hash) {
try {
let search_query_, customized_;
({host, user, queries, params, search_query_, customized_} = JSON.parse(atob(window.location.hash.substring(1))));
try {
({host, user, queries, params, search_query_, customized_} = JSON.parse(LZString.decompressFromEncodedURIComponent(window.location.hash.substring(1))));
} catch {
// For compatibility with uncompressed state
({host, user, queries, params, search_query_, customized_} = JSON.parse(atob(window.location.hash.substring(1))));
}
// For compatibility with old URLs' hashes
search_query = search_query_ !== undefined ? search_query_ : search_query;
customized = customized_ !== undefined ? customized_ : true;

View File

@ -0,0 +1 @@
var LZString=function(){var r=String.fromCharCode,o="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",n="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-$",e={};function t(r,o){if(!e[r]){e[r]={};for(var n=0;n<r.length;n++)e[r][r.charAt(n)]=n}return e[r][o]}var i={compressToBase64:function(r){if(null==r)return"";var n=i._compress(r,6,function(r){return o.charAt(r)});switch(n.length%4){default:case 0:return n;case 1:return n+"===";case 2:return n+"==";case 3:return n+"="}},decompressFromBase64:function(r){return null==r?"":""==r?null:i._decompress(r.length,32,function(n){return t(o,r.charAt(n))})},compressToUTF16:function(o){return null==o?"":i._compress(o,15,function(o){return r(o+32)})+" "},decompressFromUTF16:function(r){return null==r?"":""==r?null:i._decompress(r.length,16384,function(o){return r.charCodeAt(o)-32})},compressToUint8Array:function(r){for(var o=i.compress(r),n=new Uint8Array(2*o.length),e=0,t=o.length;e<t;e++){var s=o.charCodeAt(e);n[2*e]=s>>>8,n[2*e+1]=s%256}return n},decompressFromUint8Array:function(o){if(null==o)return i.decompress(o);for(var n=new Array(o.length/2),e=0,t=n.length;e<t;e++)n[e]=256*o[2*e]+o[2*e+1];var s=[];return n.forEach(function(o){s.push(r(o))}),i.decompress(s.join(""))},compressToEncodedURIComponent:function(r){return null==r?"":i._compress(r,6,function(r){return n.charAt(r)})},decompressFromEncodedURIComponent:function(r){return null==r?"":""==r?null:(r=r.replace(/ /g,"+"),i._decompress(r.length,32,function(o){return t(n,r.charAt(o))}))},compress:function(o){return i._compress(o,16,function(o){return r(o)})},_compress:function(r,o,n){if(null==r)return"";var e,t,i,s={},u={},a="",p="",c="",l=2,f=3,h=2,d=[],m=0,v=0;for(i=0;i<r.length;i+=1)if(a=r.charAt(i),Object.prototype.hasOwnProperty.call(s,a)||(s[a]=f++,u[a]=!0),p=c+a,Object.prototype.hasOwnProperty.call(s,p))c=p;else{if(Object.prototype.hasOwnProperty.call(u,c)){if(c.charCodeAt(0)<256){for(e=0;e<h;e++)m<<=1,v==o-1?(v=0,d.push(n(m)),m=0):v++;for(t=c.charCodeAt(0),e=0;e<8;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1}else{for(t=1,e=0;e<h;e++)m=m<<1|t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t=0;for(t=c.charCodeAt(0),e=0;e<16;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1}0==--l&&(l=Math.pow(2,h),h++),delete u[c]}else for(t=s[c],e=0;e<h;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1;0==--l&&(l=Math.pow(2,h),h++),s[p]=f++,c=String(a)}if(""!==c){if(Object.prototype.hasOwnProperty.call(u,c)){if(c.charCodeAt(0)<256){for(e=0;e<h;e++)m<<=1,v==o-1?(v=0,d.push(n(m)),m=0):v++;for(t=c.charCodeAt(0),e=0;e<8;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1}else{for(t=1,e=0;e<h;e++)m=m<<1|t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t=0;for(t=c.charCodeAt(0),e=0;e<16;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1}0==--l&&(l=Math.pow(2,h),h++),delete u[c]}else for(t=s[c],e=0;e<h;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1;0==--l&&(l=Math.pow(2,h),h++)}for(t=2,e=0;e<h;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1;for(;;){if(m<<=1,v==o-1){d.push(n(m));break}v++}return d.join("")},decompress:function(r){return null==r?"":""==r?null:i._decompress(r.length,32768,function(o){return r.charCodeAt(o)})},_decompress:function(o,n,e){var t,i,s,u,a,p,c,l=[],f=4,h=4,d=3,m="",v=[],g={val:e(0),position:n,index:1};for(t=0;t<3;t+=1)l[t]=t;for(s=0,a=Math.pow(2,2),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;switch(s){case 0:for(s=0,a=Math.pow(2,8),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;c=r(s);break;case 1:for(s=0,a=Math.pow(2,16),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;c=r(s);break;case 2:return""}for(l[3]=c,i=c,v.push(c);;){if(g.index>o)return"";for(s=0,a=Math.pow(2,d),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;switch(c=s){case 0:for(s=0,a=Math.pow(2,8),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;l[h++]=r(s),c=h-1,f--;break;case 1:for(s=0,a=Math.pow(2,16),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;l[h++]=r(s),c=h-1,f--;break;case 2:return v.join("")}if(0==f&&(f=Math.pow(2,d),d++),l[c])m=l[c];else{if(c!==h)return null;m=i+i.charAt(0)}v.push(m),l[h++]=i+m.charAt(0),i=m,0==--f&&(f=Math.pow(2,d),d++)}}};return i}();"function"==typeof define&&define.amd?define(function(){return LZString}):"undefined"!=typeof module&&null!=module?module.exports=LZString:"undefined"!=typeof angular&&null!=angular&&angular.module("LZString",[]).factory("LZString",function(){return LZString});

View File

@ -2,6 +2,7 @@ use prql_compiler::sql::Dialect;
use prql_compiler::{Options, Target};
use std::ffi::{c_char, CString};
use std::slice;
use std::panic;
fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
assert!(!out_size.is_null());
@ -13,8 +14,7 @@ fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
*out_ptr = CString::new(result).unwrap().into_raw() as *mut u8;
}
#[no_mangle]
pub unsafe extern "C" fn prql_to_sql(
pub unsafe extern "C" fn prql_to_sql_impl(
query: *const u8,
size: u64,
out: *mut *mut u8,
@ -50,6 +50,23 @@ pub unsafe extern "C" fn prql_to_sql(
}
}
#[no_mangle]
pub unsafe extern "C" fn prql_to_sql(
query: *const u8,
size: u64,
out: *mut *mut u8,
out_size: *mut u64,
) -> i64 {
let ret = panic::catch_unwind(|| {
return prql_to_sql_impl(query, size, out, out_size);
});
return match ret {
// NOTE: using cxxbridge we can return proper Result<> type.
Err(_err) => 1,
Ok(res) => res,
}
}
#[no_mangle]
pub unsafe extern "C" fn prql_free_pointer(ptr_to_free: *mut u8) {
std::mem::drop(CString::from_raw(ptr_to_free as *mut c_char));

View File

@ -1,6 +1,7 @@
use skim::prelude::*;
use term::terminfo::TermInfo;
use cxx::{CxxString, CxxVector};
use std::panic;
#[cxx::bridge]
mod ffi {
@ -36,7 +37,7 @@ impl SkimItem for Item {
}
}
fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, String> {
fn skim_impl(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, String> {
// Let's check is terminal available. To avoid panic.
if let Err(err) = TermInfo::from_env() {
return Err(format!("{}", err));
@ -89,3 +90,22 @@ fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, Stri
}
return Ok(output.selected_items[0].output().to_string());
}
fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, String> {
let ret = panic::catch_unwind(|| {
return skim_impl(prefix, words);
});
return match ret {
Err(err) => {
let e = if let Some(s) = err.downcast_ref::<String>() {
format!("{}", s)
} else if let Some(s) = err.downcast_ref::<&str>() {
format!("{}", s)
} else {
format!("Unknown panic type: {:?}", err.type_id())
};
Err(format!("Rust panic: {:?}", e))
},
Ok(res) => res,
}
}

View File

@ -559,12 +559,33 @@ AccessChangesNotifier & AccessControl::getChangesNotifier()
}
AuthResult AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const
AuthResult AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const String & forwarded_address) const
{
// NOTE: In the case where the user has never been logged in using LDAP,
// Then user_id is not generated, and the authentication quota will always be nullptr.
auto authentication_quota = getAuthenticationQuota(credentials.getUserName(), address, forwarded_address);
if (authentication_quota)
{
/// Reserve a single try from the quota to check whether we have another authentication try.
/// This is required for correct behavior in this situation:
/// User has 1 login failures quota.
/// * At the first login with an invalid password: Increase the quota counter. 1 (used) > 1 (max) is false.
///   Then try to authenticate the user and throw an AUTHENTICATION_FAILED error.
/// * In case of the second try: increase quota counter, 2 (used) > 1 (max), then throw QUOTA_EXCEED
///   and don't let the user authenticate.
///
/// The authentication failures counter will be reset after successful authentication.
authentication_quota->used(QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS, 1);
}
try
{
return MultipleAccessStorage::authenticate(credentials, address, *external_authenticators, allow_no_password,
allow_plaintext_password);
const auto auth_result = MultipleAccessStorage::authenticate(credentials, address, *external_authenticators, allow_no_password,
allow_plaintext_password);
if (authentication_quota)
authentication_quota->reset(QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS);
return auth_result;
}
catch (...)
{
@ -763,7 +784,34 @@ std::shared_ptr<const EnabledQuota> AccessControl::getEnabledQuota(
const String & forwarded_address,
const String & custom_quota_key) const
{
return quota_cache->getEnabledQuota(user_id, user_name, enabled_roles, address, forwarded_address, custom_quota_key);
return quota_cache->getEnabledQuota(user_id, user_name, enabled_roles, address, forwarded_address, custom_quota_key, true);
}
std::shared_ptr<const EnabledQuota> AccessControl::getAuthenticationQuota(
const String & user_name, const Poco::Net::IPAddress & address, const std::string & forwarded_address) const
{
auto user_id = find<User>(user_name);
UserPtr user;
if (user_id && (user = tryRead<User>(*user_id)))
{
const auto new_current_roles = user->granted_roles.findGranted(user->default_roles);
const auto roles_info = getEnabledRolesInfo(new_current_roles, {});
// client_key is not received at the moment of authentication during TCP connection
// if key type is set to QuotaKeyType::CLIENT_KEY
// QuotaCache::QuotaInfo::calculateKey will throw exception without throw_if_client_key_empty = false
String quota_key;
bool throw_if_client_key_empty = false;
return quota_cache->getEnabledQuota(*user_id,
user->getName(),
roles_info->enabled_roles,
address,
forwarded_address,
quota_key,
throw_if_client_key_empty);
}
else
return nullptr;
}

View File

@ -118,7 +118,7 @@ public:
scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const;
scope_guard subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const;
AuthResult authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
AuthResult authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const String & forwarded_address) const;
/// Makes a backup of access entities.
void restoreFromBackup(RestorerFromBackup & restorer) override;
@ -206,6 +206,11 @@ public:
const String & forwarded_address,
const String & custom_quota_key) const;
std::shared_ptr<const EnabledQuota> getAuthenticationQuota(
const String & user_name,
const Poco::Net::IPAddress & address,
const std::string & forwarded_address) const;
std::vector<QuotaUsage> getAllQuotasUsage() const;
std::shared_ptr<const EnabledSettings> getEnabledSettings(

View File

@ -111,6 +111,11 @@ const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type)
static const auto info = make_info("WRITTEN_BYTES", 1);
return info;
}
case QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS:
{
static const auto info = make_info("FAILED_SEQUENTIAL_AUTHENTICATIONS", 1);
return info;
}
case QuotaType::MAX: break;
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected quota type: {}", static_cast<int>(type));

View File

@ -11,16 +11,17 @@ using QuotaValue = UInt64;
/// Kinds of resource what we wish to quota.
enum class QuotaType
{
QUERIES, /// Number of queries.
QUERY_SELECTS, /// Number of select queries.
QUERY_INSERTS, /// Number of insert queries.
ERRORS, /// Number of queries with exceptions.
RESULT_ROWS, /// Number of rows returned as result.
RESULT_BYTES, /// Number of bytes returned as result.
READ_ROWS, /// Number of rows read from tables.
READ_BYTES, /// Number of bytes read from tables.
EXECUTION_TIME, /// Total amount of query execution time in nanoseconds.
WRITTEN_BYTES, /// Number of bytes written to tables.
QUERIES, /// Number of queries.
QUERY_SELECTS, /// Number of select queries.
QUERY_INSERTS, /// Number of insert queries.
ERRORS, /// Number of queries with exceptions.
RESULT_ROWS, /// Number of rows returned as result.
RESULT_BYTES, /// Number of bytes returned as result.
READ_ROWS, /// Number of rows read from tables.
READ_BYTES, /// Number of bytes read from tables.
EXECUTION_TIME, /// Total amount of query execution time in nanoseconds.
WRITTEN_BYTES, /// Number of bytes written to tables.
FAILED_SEQUENTIAL_AUTHENTICATIONS, /// Number of recent failed authentications.
MAX
};

View File

@ -105,6 +105,16 @@ struct EnabledQuota::Impl
std::uniform_int_distribution<Int64> distribution{0, count - 1};
return std::chrono::system_clock::duration(distribution(thread_local_rng));
}
static void resetQuotaValue(const Intervals & intervals, QuotaType quota_type, QuotaValue value, std::chrono::system_clock::time_point current_time)
{
const auto quota_type_i = static_cast<size_t>(quota_type);
for (const auto & interval : intervals.intervals)
{
interval.used[quota_type_i] = value;
interval.getEndOfInterval(current_time);
}
}
};
@ -285,6 +295,12 @@ void EnabledQuota::checkExceeded(QuotaType quota_type) const
}
void EnabledQuota::reset(QuotaType quota_type) const
{
const auto loaded = intervals.load();
Impl::resetQuotaValue(*loaded, quota_type, 0, std::chrono::system_clock::now());
}
std::optional<QuotaUsage> EnabledQuota::getUsage() const
{
auto loaded = intervals.load();

View File

@ -52,6 +52,8 @@ public:
void checkExceeded() const;
void checkExceeded(QuotaType quota_type) const;
void reset(QuotaType quota_type) const;
/// Returns the information about quota consumption.
std::optional<QuotaUsage> getUsage() const;

View File

@ -30,7 +30,7 @@ void QuotaCache::QuotaInfo::setQuota(const QuotaPtr & quota_, const UUID & quota
}
String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled) const
String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled, bool throw_if_client_key_empty) const
{
const auto & params = enabled.params;
switch (quota->key_type)
@ -55,8 +55,15 @@ String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled) const
{
if (!params.client_key.empty())
return params.client_key;
throw Exception(ErrorCodes::QUOTA_REQUIRES_CLIENT_KEY, "Quota {} (for user {}) requires a client supplied key.",
quota->getName(), params.user_name);
if (throw_if_client_key_empty)
throw Exception(
ErrorCodes::QUOTA_REQUIRES_CLIENT_KEY,
"Quota {} (for user {}) requires a client supplied key.",
quota->getName(),
params.user_name);
else
return ""; // Authentication quota has no client key at time of authentication.
}
case QuotaKeyType::CLIENT_KEY_OR_USER_NAME:
{
@ -165,7 +172,14 @@ QuotaCache::QuotaCache(const AccessControl & access_control_)
QuotaCache::~QuotaCache() = default;
std::shared_ptr<const EnabledQuota> QuotaCache::getEnabledQuota(const UUID & user_id, const String & user_name, const boost::container::flat_set<UUID> & enabled_roles, const Poco::Net::IPAddress & client_address, const String & forwarded_address, const String & client_key)
std::shared_ptr<const EnabledQuota> QuotaCache::getEnabledQuota(
const UUID & user_id,
const String & user_name,
const boost::container::flat_set<UUID> & enabled_roles,
const Poco::Net::IPAddress & client_address,
const String & forwarded_address,
const String & client_key,
bool throw_if_client_key_empty)
{
std::lock_guard lock{mutex};
ensureAllQuotasRead();
@ -188,11 +202,10 @@ std::shared_ptr<const EnabledQuota> QuotaCache::getEnabledQuota(const UUID & use
auto res = std::shared_ptr<EnabledQuota>(new EnabledQuota(params));
enabled_quotas.emplace(std::move(params), res);
chooseQuotaToConsumeFor(*res);
chooseQuotaToConsumeFor(*res, throw_if_client_key_empty);
return res;
}
void QuotaCache::ensureAllQuotasRead()
{
/// `mutex` is already locked.
@ -257,13 +270,13 @@ void QuotaCache::chooseQuotaToConsume()
i = enabled_quotas.erase(i);
else
{
chooseQuotaToConsumeFor(*elem);
chooseQuotaToConsumeFor(*elem, true);
++i;
}
}
}
void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled)
void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled, bool throw_if_client_key_empty)
{
/// `mutex` is already locked.
boost::shared_ptr<const Intervals> intervals;
@ -271,7 +284,7 @@ void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled)
{
if (info.roles->match(enabled.params.user_id, enabled.params.enabled_roles))
{
String key = info.calculateKey(enabled);
String key = info.calculateKey(enabled, throw_if_client_key_empty);
intervals = info.getOrBuildIntervals(key);
break;
}

View File

@ -28,7 +28,8 @@ public:
const boost::container::flat_set<UUID> & enabled_roles,
const Poco::Net::IPAddress & address,
const String & forwarded_address,
const String & client_key);
const String & client_key,
bool throw_if_client_key_empty);
std::vector<QuotaUsage> getAllQuotasUsage() const;
@ -41,7 +42,7 @@ private:
QuotaInfo(const QuotaPtr & quota_, const UUID & quota_id_) { setQuota(quota_, quota_id_); }
void setQuota(const QuotaPtr & quota_, const UUID & quota_id_);
String calculateKey(const EnabledQuota & enabled_quota) const;
String calculateKey(const EnabledQuota & enabled_quota, bool throw_if_client_key_empty) const;
boost::shared_ptr<const Intervals> getOrBuildIntervals(const String & key);
boost::shared_ptr<const Intervals> rebuildIntervals(const String & key, std::chrono::system_clock::time_point current_time);
void rebuildAllIntervals();
@ -56,7 +57,7 @@ private:
void quotaAddedOrChanged(const UUID & quota_id, const std::shared_ptr<const Quota> & new_quota);
void quotaRemoved(const UUID & quota_id);
void chooseQuotaToConsume();
void chooseQuotaToConsumeFor(EnabledQuota & enabled_quota);
void chooseQuotaToConsumeFor(EnabledQuota & enabled_quota, bool throw_if_client_key_empty);
const AccessControl & access_control;
mutable std::mutex mutex;

View File

@ -291,8 +291,17 @@ public:
const UInt64 size = value.size();
checkArraySize(size, max_elems);
writeVarUInt(size, buf);
for (const auto & element : value)
writeBinaryLittleEndian(element, buf);
if constexpr (std::endian::native == std::endian::little)
{
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
}
else
{
for (const auto & element : value)
writeBinaryLittleEndian(element, buf);
}
if constexpr (Trait::last)
writeBinaryLittleEndian(this->data(place).total_values, buf);
@ -315,8 +324,16 @@ public:
auto & value = this->data(place).value;
value.resize_exact(size, arena);
for (auto & element : value)
readBinaryLittleEndian(element, buf);
if constexpr (std::endian::native == std::endian::little)
{
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
}
else
{
for (auto & element : value)
readBinaryLittleEndian(element, buf);
}
if constexpr (Trait::last)
readBinaryLittleEndian(this->data(place).total_values, buf);

View File

@ -0,0 +1,414 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <base/sort.h>
#include <algorithm>
#include <type_traits>
#include <utility>
#include <Common/RadixSort.h>
#include <Common/Exception.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnConst.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
extern const int TOO_LARGE_ARRAY_SIZE;
}
namespace
{
enum class GroupArraySortedStrategy
{
heap,
sort
};
constexpr size_t group_array_sorted_sort_strategy_max_elements_threshold = 1000000;
template <typename T, GroupArraySortedStrategy strategy>
struct GroupArraySortedData
{
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
static constexpr size_t partial_sort_max_elements_factor = 2;
static constexpr bool is_value_generic_field = std::is_same_v<T, Field>;
Array values;
static bool compare(const T & lhs, const T & rhs)
{
if constexpr (is_value_generic_field)
{
return lhs < rhs;
}
else
{
return CompareHelper<T>::less(lhs, rhs, -1);
}
}
struct Comparator
{
bool operator()(const T & lhs, const T & rhs)
{
return compare(lhs, rhs);
}
};
ALWAYS_INLINE void heapReplaceTop()
{
size_t size = values.size();
if (size < 2)
return;
size_t child_index = 1;
if (values.size() > 2 && compare(values[1], values[2]))
++child_index;
/// Check if we are in order
if (compare(values[child_index], values[0]))
return;
size_t current_index = 0;
auto current = values[current_index];
do
{
/// We are not in heap-order, swap the parent with it's largest child.
values[current_index] = values[child_index];
current_index = child_index;
// Recompute the child based off of the updated parent
child_index = 2 * child_index + 1;
if (child_index >= size)
break;
if ((child_index + 1) < size && compare(values[child_index], values[child_index + 1]))
{
/// Right child exists and is greater than left child.
++child_index;
}
/// Check if we are in order.
} while (!compare(values[child_index], current));
values[current_index] = current;
}
ALWAYS_INLINE void sortAndLimit(size_t max_elements, Arena * arena)
{
if constexpr (is_value_generic_field)
{
::sort(values.begin(), values.end(), Comparator());
}
else
{
bool try_sort = trySort(values.begin(), values.end(), Comparator());
if (!try_sort)
RadixSort<RadixSortNumTraits<T>>::executeLSD(values.data(), values.size());
}
if (values.size() > max_elements)
values.resize(max_elements, arena);
}
ALWAYS_INLINE void partialSortAndLimitIfNeeded(size_t max_elements, Arena * arena)
{
if (values.size() < max_elements * partial_sort_max_elements_factor)
return;
::nth_element(values.begin(), values.begin() + max_elements, values.end(), Comparator());
values.resize(max_elements, arena);
}
ALWAYS_INLINE void addElement(T && element, size_t max_elements, Arena * arena)
{
if constexpr (strategy == GroupArraySortedStrategy::heap)
{
if (values.size() >= max_elements)
{
/// Element is greater or equal than current max element, it cannot be in k min elements
if (!compare(element, values[0]))
return;
values[0] = std::move(element);
heapReplaceTop();
return;
}
values.push_back(std::move(element), arena);
std::push_heap(values.begin(), values.end(), Comparator());
}
else
{
values.push_back(std::move(element), arena);
partialSortAndLimitIfNeeded(max_elements, arena);
}
}
ALWAYS_INLINE void insertResultInto(IColumn & to, size_t max_elements, Arena * arena)
{
auto & result_array = assert_cast<ColumnArray &>(to);
auto & result_array_offsets = result_array.getOffsets();
sortAndLimit(max_elements, arena);
result_array_offsets.push_back(result_array_offsets.back() + values.size());
if (values.empty())
return;
if constexpr (is_value_generic_field)
{
auto & result_array_data = result_array.getData();
for (auto & value : values)
result_array_data.insert(value);
}
else
{
auto & result_array_data = assert_cast<ColumnVector<T> &>(result_array.getData()).getData();
size_t result_array_data_insert_begin = result_array_data.size();
result_array_data.resize(result_array_data_insert_begin + values.size());
for (size_t i = 0; i < values.size(); ++i)
result_array_data[result_array_data_insert_begin + i] = values[i];
}
}
};
template <typename T>
using GroupArraySortedDataHeap = GroupArraySortedData<T, GroupArraySortedStrategy::heap>;
template <typename T>
using GroupArraySortedDataSort = GroupArraySortedData<T, GroupArraySortedStrategy::sort>;
constexpr UInt64 aggregate_function_group_array_sorted_max_element_size = 0xFFFFFF;
template <typename Data, typename T>
class GroupArraySorted final
: public IAggregateFunctionDataHelper<Data, GroupArraySorted<Data, T>>
{
public:
explicit GroupArraySorted(
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elements_)
: IAggregateFunctionDataHelper<Data, GroupArraySorted<Data, T>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, max_elements(max_elements_)
, serialization(data_type_->getDefaultSerialization())
{
if (max_elements > aggregate_function_group_array_sorted_max_element_size)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Too large limit parameter for groupArraySorted aggregate function, it should not exceed {}",
aggregate_function_group_array_sorted_max_element_size);
}
String getName() const override { return "groupArraySorted"; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if constexpr (std::is_same_v<T, Field>)
{
auto row_value = (*columns[0])[row_num];
this->data(place).addElement(std::move(row_value), max_elements, arena);
}
else
{
auto row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
this->data(place).addElement(std::move(row_value), max_elements, arena);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & rhs_values = this->data(rhs).values;
for (auto rhs_element : rhs_values)
this->data(place).addElement(std::move(rhs_element), max_elements, arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & values = this->data(place).values;
size_t size = values.size();
writeVarUInt(size, buf);
if constexpr (std::is_same_v<T, Field>)
{
for (const Field & element : values)
{
if (element.isNull())
{
writeBinary(false, buf);
}
else
{
writeBinary(true, buf);
serialization->serializeBinary(element, buf, {});
}
}
}
else
{
if constexpr (std::endian::native == std::endian::little)
{
buf.write(reinterpret_cast<const char *>(values.data()), size * sizeof(values[0]));
}
else
{
for (const auto & element : values)
writeBinaryLittleEndian(element, buf);
}
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > max_elements))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elements);
auto & values = this->data(place).values;
values.resize_exact(size, arena);
if constexpr (std::is_same_v<T, Field>)
{
for (Field & element : values)
{
UInt8 is_null = 0;
readBinary(is_null, buf);
if (!is_null)
serialization->deserializeBinary(element, buf, {});
}
}
else
{
if constexpr (std::endian::native == std::endian::little)
{
buf.readStrict(reinterpret_cast<char *>(values.data()), size * sizeof(values[0]));
}
else
{
for (auto & element : values)
readBinaryLittleEndian(element, buf);
}
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
this->data(place).insertResultInto(to, max_elements, arena);
}
bool allocatesMemoryInArena() const override { return true; }
private:
UInt64 max_elements;
SerializationPtr serialization;
};
template <typename T>
using GroupArraySortedHeap = GroupArraySorted<GroupArraySortedDataHeap<T>, T>;
template <typename T>
using GroupArraySortedSort = GroupArraySorted<GroupArraySortedDataSort<T>, T>;
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
AggregateFunctionPtr createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date) return std::make_shared<AggregateFunctionTemplate<UInt16>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::DateTime) return std::make_shared<AggregateFunctionTemplate<UInt32>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::IPv4) return std::make_shared<AggregateFunctionTemplate<IPv4>>(std::forward<TArgs>(args)...);
return AggregateFunctionPtr(createWithNumericType<AggregateFunctionTemplate, TArgs...>(argument_type, std::forward<TArgs>(args)...));
}
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
inline AggregateFunctionPtr createAggregateFunctionGroupArraySortedImpl(const DataTypePtr & argument_type, const Array & parameters, TArgs ... args)
{
if (auto res = createWithNumericOrTimeType<AggregateFunctionTemplate>(*argument_type, argument_type, parameters, std::forward<TArgs>(args)...))
return AggregateFunctionPtr(res);
return std::make_shared<AggregateFunctionTemplate<Field>>(argument_type, parameters, std::forward<TArgs>(args)...);
}
AggregateFunctionPtr createAggregateFunctionGroupArray(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertUnary(name, argument_types);
UInt64 max_elems = std::numeric_limits<UInt64>::max();
if (parameters.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should have limit argument", name);
}
else if (parameters.size() == 1)
{
auto type = parameters[0].getType();
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
max_elems = parameters[0].get<UInt64>();
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} does not support this number of arguments", name);
if (max_elems > group_array_sorted_sort_strategy_max_elements_threshold)
return createAggregateFunctionGroupArraySortedImpl<GroupArraySortedSort>(argument_types[0], parameters, max_elems);
return createAggregateFunctionGroupArraySortedImpl<GroupArraySortedHeap>(argument_types[0], parameters, max_elems);
}
}
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = false };
factory.registerFunction("groupArraySorted", { createAggregateFunctionGroupArray, properties });
}
}

View File

@ -31,7 +31,7 @@ namespace ErrorCodes
template <typename> class QuantileTiming;
template <typename> class QuantileGK;
template <typename> class QuantileDDSketch;
template <typename> class QuantileDD;
/** Generic aggregate function for calculation of quantiles.
* It depends on quantile calculation data structure. Look at Quantile*.h for various implementations.
@ -64,7 +64,7 @@ private:
using ColVecType = ColumnVectorOrDecimal<Value>;
static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
static constexpr bool is_quantile_ddsketch = std::is_same_v<Data, QuantileDDSketch<Value>>;
static constexpr bool is_quantile_ddsketch = std::is_same_v<Data, QuantileDD<Value>>;
static_assert(!is_decimal<Value> || !returns_float);
QuantileLevels<Float64> levels;
@ -334,7 +334,7 @@ struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBF
struct NameQuantileGK { static constexpr auto name = "quantileGK"; };
struct NameQuantilesGK { static constexpr auto name = "quantilesGK"; };
struct NameQuantileDDSketch { static constexpr auto name = "quantileDDSketch"; };
struct NameQuantilesDDSketch { static constexpr auto name = "quantilesDDSketch"; };
struct NameQuantileDD { static constexpr auto name = "quantileDD"; };
struct NameQuantilesDD { static constexpr auto name = "quantilesDD"; };
}

View File

@ -1,5 +1,5 @@
#include <AggregateFunctions/AggregateFunctionQuantile.h>
#include <AggregateFunctions/QuantileDDSketch.h>
#include <AggregateFunctions/QuantileDD.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
@ -19,8 +19,8 @@ namespace ErrorCodes
namespace
{
template <typename Value, bool float_return> using FuncQuantileDDSketch = AggregateFunctionQuantile<Value, QuantileDDSketch<Value>, NameQuantileDDSketch, false, std::conditional_t<float_return, Float64, void>, false, true>;
template <typename Value, bool float_return> using FuncQuantilesDDSketch = AggregateFunctionQuantile<Value, QuantileDDSketch<Value>, NameQuantilesDDSketch, false, std::conditional_t<float_return, Float64, void>, true, true>;
template <typename Value, bool float_return> using FuncQuantileDD = AggregateFunctionQuantile<Value, QuantileDD<Value>, NameQuantileDD, false, std::conditional_t<float_return, Float64, void>, false, true>;
template <typename Value, bool float_return> using FuncQuantilesDD = AggregateFunctionQuantile<Value, QuantileDD<Value>, NameQuantilesDD, false, std::conditional_t<float_return, Float64, void>, true, true>;
template <template <typename, bool> class Function>
@ -46,16 +46,16 @@ AggregateFunctionPtr createAggregateFunctionQuantile(
}
void registerAggregateFunctionsQuantileDDSketch(AggregateFunctionFactory & factory)
void registerAggregateFunctionsQuantileDD(AggregateFunctionFactory & factory)
{
/// For aggregate functions returning array we cannot return NULL on empty set.
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
factory.registerFunction(NameQuantileDDSketch::name, createAggregateFunctionQuantile<FuncQuantileDDSketch>);
factory.registerFunction(NameQuantilesDDSketch::name, { createAggregateFunctionQuantile<FuncQuantilesDDSketch>, properties });
factory.registerFunction(NameQuantileDD::name, createAggregateFunctionQuantile<FuncQuantileDD>);
factory.registerFunction(NameQuantilesDD::name, { createAggregateFunctionQuantile<FuncQuantilesDD>, properties });
/// 'median' is an alias for 'quantile'
factory.registerAlias("medianDDSketch", NameQuantileDDSketch::name);
factory.registerAlias("medianDD", NameQuantileDD::name);
}
}

View File

@ -17,6 +17,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
}
@ -30,12 +31,12 @@ class ApproxSampler
public:
struct Stats
{
T value; // the sampled value
Int64 g; // the minimum rank jump from the previous value's minimum rank
Int64 delta; // the maximum span of the rank
T value; // The sampled value
Int64 g; // The minimum rank jump from the previous value's minimum rank
Int64 delta; // The maximum span of the rank
Stats() = default;
Stats(T value_, Int64 g_, Int64 delta_) : value(value_), g(g_), delta(delta_) {}
Stats(T value_, Int64 g_, Int64 delta_) : value(value_), g(g_), delta(delta_) { }
};
struct QueryResult
@ -49,20 +50,20 @@ public:
ApproxSampler() = default;
explicit ApproxSampler(
double relative_error_,
size_t compress_threshold_ = default_compress_threshold,
size_t count_ = 0,
bool compressed_ = false)
: relative_error(relative_error_)
, compress_threshold(compress_threshold_)
, count(count_)
, compressed(compressed_)
ApproxSampler(const ApproxSampler & other)
: relative_error(other.relative_error)
, compress_threshold(other.compress_threshold)
, count(other.count)
, compressed(other.compressed)
, sampled(other.sampled.begin(), other.sampled.end())
, backup_sampled(other.backup_sampled.begin(), other.backup_sampled.end())
, head_sampled(other.head_sampled.begin(), other.head_sampled.end())
{
sampled.reserve(compress_threshold);
backup_sampled.reserve(compress_threshold);
}
head_sampled.reserve(default_head_size);
explicit ApproxSampler(double relative_error_)
: relative_error(relative_error_), compress_threshold(default_compress_threshold), count(0), compressed(false)
{
}
bool isCompressed() const { return compressed; }
@ -95,9 +96,9 @@ public:
Int64 current_max = std::numeric_limits<Int64>::min();
for (const auto & stats : sampled)
current_max = std::max(stats.delta + stats.g, current_max);
Int64 target_error = current_max/2;
Int64 target_error = current_max / 2;
size_t index= 0;
size_t index = 0;
auto min_rank = sampled[0].g;
for (size_t i = 0; i < size; ++i)
{
@ -118,7 +119,6 @@ public:
result[indices[i]] = res.value;
}
}
}
void compress()
@ -256,16 +256,27 @@ public:
void read(ReadBuffer & buf)
{
readBinaryLittleEndian(compress_threshold, buf);
if (compress_threshold != default_compress_threshold)
throw Exception(
ErrorCodes::INCORRECT_DATA,
"The compress threshold {} isn't the expected one {}",
compress_threshold,
default_compress_threshold);
readBinaryLittleEndian(relative_error, buf);
readBinaryLittleEndian(count, buf);
size_t sampled_len = 0;
readBinaryLittleEndian(sampled_len, buf);
if (sampled_len > compress_threshold)
throw Exception(
ErrorCodes::INCORRECT_DATA, "The number of elements {} for quantileGK exceeds {}", sampled_len, compress_threshold);
sampled.resize(sampled_len);
for (size_t i = 0; i < sampled_len; ++i)
{
auto stats = sampled[i];
auto & stats = sampled[i];
readBinaryLittleEndian(stats.value, buf);
readBinaryLittleEndian(stats.g, buf);
readBinaryLittleEndian(stats.delta, buf);
@ -291,7 +302,7 @@ private:
min_rank += curr_sample.g;
}
}
return {sampled.size()-1, 0, sampled.back().value};
return {sampled.size() - 1, 0, sampled.back().value};
}
void withHeadBufferInserted()
@ -389,12 +400,11 @@ private:
double relative_error;
size_t compress_threshold;
size_t count = 0;
size_t count;
bool compressed;
PaddedPODArray<Stats> sampled;
PaddedPODArray<Stats> backup_sampled;
PaddedPODArray<T> head_sampled;
static constexpr size_t default_compress_threshold = 10000;
@ -406,17 +416,14 @@ class QuantileGK
{
private:
using Data = ApproxSampler<Value>;
mutable Data data;
Data data;
public:
QuantileGK() = default;
explicit QuantileGK(size_t accuracy) : data(1.0 / static_cast<double>(accuracy)) { }
void add(const Value & x)
{
data.insert(x);
}
void add(const Value & x) { data.insert(x); }
template <typename Weight>
void add(const Value &, const Weight &)
@ -429,22 +436,34 @@ public:
if (!data.isCompressed())
data.compress();
data.merge(rhs.data);
if (rhs.data.isCompressed())
data.merge(rhs.data);
else
{
/// We can't modify rhs, so copy it and compress
Data rhs_data_copy(rhs.data);
rhs_data_copy.compress();
data.merge(rhs_data_copy);
}
}
void serialize(WriteBuffer & buf) const
{
/// Always compress before serialization
if (!data.isCompressed())
data.compress();
data.write(buf);
if (data.isCompressed())
data.write(buf);
else
{
/// We can't modify rhs, so copy it and compress
Data data_copy(data);
data_copy.compress();
data_copy.write(buf);
}
}
void deserialize(ReadBuffer & buf)
{
data.read(buf);
/// Serialized data is always compressed
data.setCompressed();
}
@ -481,7 +500,6 @@ public:
}
};
template <typename Value, bool _> using FuncQuantileGK = AggregateFunctionQuantile<Value, QuantileGK<Value>, NameQuantileGK, false, void, false, true>;
template <typename Value, bool _> using FuncQuantilesGK = AggregateFunctionQuantile<Value, QuantileGK<Value>, NameQuantilesGK, false, void, true, true>;

View File

@ -33,14 +33,14 @@ namespace DB
*/
template <typename Value>
class QuantileDDSketch
class QuantileDD
{
public:
using Weight = UInt64;
QuantileDDSketch() = default;
QuantileDD() = default;
explicit QuantileDDSketch(Float64 relative_accuracy) : data(relative_accuracy) { }
explicit QuantileDD(Float64 relative_accuracy) : data(relative_accuracy) { }
void add(const Value & x)
{
@ -53,7 +53,7 @@ public:
data.add(x, w);
}
void merge(const QuantileDDSketch &other)
void merge(const QuantileDD &other)
{
data.merge(other.data);
}

View File

@ -1,2 +1,5 @@
clickhouse_add_executable (quantile-t-digest quantile-t-digest.cpp)
target_link_libraries (quantile-t-digest PRIVATE dbms clickhouse_aggregate_functions)
clickhouse_add_executable (group_array_sorted group_array_sorted.cpp)
target_link_libraries (group_array_sorted PRIVATE dbms clickhouse_aggregate_functions)

View File

@ -0,0 +1,205 @@
#include <algorithm>
#include <type_traits>
#include <utility>
#include <iostream>
#include "pcg_random.hpp"
#include <Columns/ColumnVector.h>
#include <Common/ArenaAllocator.h>
#include <Common/RadixSort.h>
#include <Columns/ColumnArray.h>
using namespace DB;
template <typename T>
struct GroupArraySortedDataHeap
{
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array values;
static bool compare(const T & lhs, const T & rhs)
{
return lhs < rhs;
}
struct Comparator
{
bool operator()(const T & lhs, const T & rhs)
{
return compare(lhs, rhs);
}
};
ALWAYS_INLINE void replaceTop()
{
size_t size = values.size();
if (size < 2)
return;
size_t child_index = 1;
if (values.size() > 2 && compare(values[1], values[2]))
++child_index;
/// Check if we are in order
if (compare(values[child_index], values[0]))
return;
size_t current_index = 0;
auto current = values[current_index];
do
{
/// We are not in heap-order, swap the parent with it's largest child.
values[current_index] = values[child_index];
current_index = child_index;
// Recompute the child based off of the updated parent
child_index = 2 * child_index + 1;
if (child_index >= size)
break;
if ((child_index + 1) < size && compare(values[child_index], values[child_index + 1]))
{
/// Right child exists and is greater than left child.
++child_index;
}
/// Check if we are in order.
} while (!compare(values[child_index], current));
values[current_index] = current;
}
ALWAYS_INLINE void addElement(const T & element, size_t max_elements, Arena * arena)
{
if (values.size() >= max_elements)
{
/// Element is greater or equal than current max element, it cannot be in k min elements
if (!compare(element, values[0]))
return;
values[0] = element;
replaceTop();
return;
}
values.push_back(element, arena);
std::push_heap(values.begin(), values.end(), Comparator());
}
ALWAYS_INLINE void dump()
{
while (!values.empty())
{
std::pop_heap(values.begin(), values.end(), Comparator());
std::cerr << values.back() << ' ';
values.pop_back();
}
std::cerr << '\n';
}
};
template <typename T>
struct GroupArraySortedDataSort
{
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array values;
static bool compare(const T & lhs, const T & rhs)
{
return lhs < rhs;
}
struct Comparator
{
bool operator()(const T & lhs, const T & rhs)
{
return compare(lhs, rhs);
}
};
ALWAYS_INLINE void sortAndLimit(size_t max_elements, Arena * arena)
{
RadixSort<RadixSortNumTraits<T>>::executeLSD(values.data(), values.size());
values.resize(max_elements, arena);
}
ALWAYS_INLINE void partialSortAndLimitIfNeeded(size_t max_elements, Arena * arena)
{
if (values.size() < max_elements * 4)
return;
std::nth_element(values.begin(), values.begin() + max_elements, values.end(), Comparator());
values.resize(max_elements, arena);
}
ALWAYS_INLINE void addElement(const T & element, size_t max_elements, Arena * arena)
{
values.push_back(element, arena);
partialSortAndLimitIfNeeded(max_elements, arena);
}
};
template <typename SortedData>
NO_INLINE void benchmark(size_t elements, size_t max_elements)
{
Stopwatch watch;
watch.start();
SortedData data;
pcg64_fast rng;
Arena arena;
for (size_t i = 0; i < elements; ++i)
{
uint64_t value = rng();
data.addElement(value, max_elements, &arena);
}
watch.stop();
std::cerr << "Elapsed " << watch.elapsedMilliseconds() << " milliseconds" << '\n';
}
int main(int argc, char ** argv)
{
(void)(argc);
(void)(argv);
if (argc != 4)
{
std::cerr << "./group_array_sorted method elements max_elements" << '\n';
return 1;
}
std::string method = std::string(argv[1]);
uint64_t elements = std::atol(argv[2]); /// NOLINT
uint64_t max_elements = std::atol(argv[3]); /// NOLINT
std::cerr << "Method " << method << " elements " << elements << " max elements " << max_elements << '\n';
if (method == "heap")
{
benchmark<GroupArraySortedDataHeap<UInt64>>(elements, max_elements);
}
else if (method == "sort")
{
benchmark<GroupArraySortedDataSort<UInt64>>(elements, max_elements);
}
else
{
std::cerr << "Invalid method " << method << '\n';
return 1;
}
return 0;
}

View File

@ -15,6 +15,7 @@ void registerAggregateFunctionCount(AggregateFunctionFactory &);
void registerAggregateFunctionDeltaSum(AggregateFunctionFactory &);
void registerAggregateFunctionDeltaSumTimestamp(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
@ -31,7 +32,7 @@ void registerAggregateFunctionsQuantileTimingWeighted(AggregateFunctionFactory &
void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileDDSketch(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileDD(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &);
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
@ -112,6 +113,7 @@ void registerAggregateFunctions()
registerAggregateFunctionDeltaSum(factory);
registerAggregateFunctionDeltaSumTimestamp(factory);
registerAggregateFunctionGroupArray(factory);
registerAggregateFunctionGroupArraySorted(factory);
registerAggregateFunctionGroupUniqArray(factory);
registerAggregateFunctionGroupArrayInsertAt(factory);
registerAggregateFunctionsQuantile(factory);
@ -128,7 +130,7 @@ void registerAggregateFunctions()
registerAggregateFunctionsQuantileTDigest(factory);
registerAggregateFunctionsQuantileTDigestWeighted(factory);
registerAggregateFunctionsQuantileBFloat16(factory);
registerAggregateFunctionsQuantileDDSketch(factory);
registerAggregateFunctionsQuantileDD(factory);
registerAggregateFunctionsQuantileBFloat16Weighted(factory);
registerAggregateFunctionsQuantileApprox(factory);
registerAggregateFunctionsSequenceMatch(factory);

View File

@ -181,6 +181,23 @@ public:
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node)
{
/// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`.
const auto & variant_element_constant_value = second_argument_constant_node->getValue();
String subcolumn_name;
if (variant_element_constant_value.getType() != Field::Types::String)
return;
subcolumn_name = variant_element_constant_value.get<const String &>();
column.name += '.';
column.name += subcolumn_name;
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapContains" && column_type.isMap())
{
const auto & data_type_map = assert_cast<const DataTypeMap &>(*column.type);

View File

@ -1214,7 +1214,7 @@ private:
static void expandGroupByAll(QueryNode & query_tree_node_typed);
static void expandOrderByAll(QueryNode & query_tree_node_typed);
void expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings);
static std::string
rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
@ -1396,6 +1396,8 @@ private:
/// Lambdas that are currently in resolve process
std::unordered_set<IQueryTreeNode *> lambdas_in_resolve_process;
std::unordered_set<std::string_view> cte_in_resolve_process;
/// Function name to user defined lambda map
std::unordered_map<std::string, QueryTreeNodePtr> function_name_to_user_defined_lambda;
@ -2332,8 +2334,11 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
query_tree_node_typed.setIsGroupByAll(false);
}
void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings)
{
if (!settings.enable_order_by_all || !query_tree_node_typed.isOrderByAll())
return;
auto * all_node = query_tree_node_typed.getOrderBy().getNodes()[0]->as<SortNode>();
if (!all_node)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not sort node.");
@ -2344,21 +2349,25 @@ void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
for (auto & node : projection_nodes)
{
if (auto * identifier_node = node->as<IdentifierNode>(); identifier_node != nullptr)
if (Poco::toUpper(identifier_node->getIdentifier().getFullName()) == "ALL" || Poco::toUpper(identifier_node->getAlias()) == "ALL")
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
if (auto * function_node = node->as<FunctionNode>(); function_node != nullptr)
if (Poco::toUpper(function_node->getAlias()) == "ALL")
auto resolved_expression_it = resolved_expressions.find(node);
if (resolved_expression_it != resolved_expressions.end())
{
auto projection_names = resolved_expression_it->second;
if (projection_names.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expression nodes list expected 1 projection names. Actual {}",
projection_names.size());
if (Poco::toUpper(projection_names[0]) == "ALL")
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
}
auto sort_node = std::make_shared<SortNode>(node, all_node->getSortDirection(), all_node->getNullsSortDirection());
list_node->getNodes().push_back(sort_node);
}
query_tree_node_typed.getOrderByNode() = list_node;
query_tree_node_typed.setIsOrderByAll(false);
}
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
@ -3716,7 +3725,12 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
if (it->second.resolve_result.isResolved() &&
scope.use_identifier_lookup_to_result_cache &&
!scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup))
return it->second.resolve_result;
{
if (!it->second.resolve_result.isResolvedFromCTEs() || !cte_in_resolve_process.contains(identifier_lookup.identifier.getFullName()))
{
return it->second.resolve_result;
}
}
}
else
{
@ -3773,8 +3787,23 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
if (!resolve_result.resolved_identifier && identifier_lookup.isTableExpressionLookup())
{
auto cte_query_node_it = scope.cte_name_to_query_node.find(identifier_lookup.identifier.getFullName());
if (cte_query_node_it != scope.cte_name_to_query_node.end())
auto full_name = identifier_lookup.identifier.getFullName();
auto cte_query_node_it = scope.cte_name_to_query_node.find(full_name);
/// CTE may reference table expressions with the same name, e.g.:
///
/// WITH test1 AS (SELECT * FROM test1) SELECT * FROM test1;
///
/// Since we don't support recursive CTEs, `test1` identifier inside of CTE
/// references to table <default database name>.test1.
/// This means that the example above is equivalent to the following query:
///
/// SELECT * FROM test1;
///
/// To accomplish this behaviour it's not allowed to resolve identifiers to
/// CTE that is being resolved.
if (cte_query_node_it != scope.cte_name_to_query_node.end()
&& !cte_in_resolve_process.contains(full_name))
{
resolve_result.resolved_identifier = cte_query_node_it->second;
resolve_result.resolve_place = IdentifierResolvePlace::CTE;
@ -5712,6 +5741,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
subquery_node = resolved_identifier_node->as<QueryNode>();
union_node = resolved_identifier_node->as<UnionNode>();
std::string_view cte_name = subquery_node ? subquery_node->getCTEName() : union_node->getCTEName();
if (subquery_node)
subquery_node->setIsCTE(false);
else
@ -5720,10 +5751,21 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
IdentifierResolveScope subquery_scope(resolved_identifier_node, &scope /*parent_scope*/);
subquery_scope.subquery_depth = scope.subquery_depth + 1;
/// CTE is being resolved, it's required to forbid to resolve to it again
/// because recursive CTEs are not supported, e.g.:
///
/// WITH test1 AS (SELECT i + 1, j + 1 FROM test1) SELECT toInt64(4) i, toInt64(5) j FROM numbers(3) WHERE (i, j) IN test1;
///
/// In this example argument of function `in` is being resolve here. If CTE `test1` is not forbidden,
/// `test1` is resolved to CTE (not to the table) in `initializeQueryJoinTreeNode` function.
cte_in_resolve_process.insert(cte_name);
if (subquery_node)
resolveQuery(resolved_identifier_node, subquery_scope);
else
resolveUnion(resolved_identifier_node, subquery_scope);
cte_in_resolve_process.erase(cte_name);
}
}
}
@ -7116,6 +7158,10 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
max_subquery_depth);
auto & query_node_typed = query_node->as<QueryNode &>();
if (query_node_typed.isCTE())
cte_in_resolve_process.insert(query_node_typed.getCTEName());
const auto & settings = scope.context->getSettingsRef();
bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube();
@ -7137,9 +7183,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.hasHaving() && query_node_typed.isGroupByWithTotals() && is_rollup_or_cube)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING");
if (settings.enable_order_by_all && query_node_typed.isOrderByAll())
expandOrderByAll(query_node_typed);
/// Initialize aliases in query node scope
QueryExpressionsAliasVisitor visitor(scope);
@ -7326,6 +7369,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (settings.enable_positional_arguments)
replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope);
expandOrderByAll(query_node_typed, settings);
resolveSortNodeList(query_node_typed.getOrderByNode(), scope);
}
@ -7455,11 +7499,18 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
node->removeAlias();
query_node_typed.resolveProjectionColumns(std::move(projection_columns));
if (query_node_typed.isCTE())
cte_in_resolve_process.erase(query_node_typed.getCTEName());
}
void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, IdentifierResolveScope & scope)
{
auto & union_node_typed = union_node->as<UnionNode &>();
if (union_node_typed.isCTE())
cte_in_resolve_process.insert(union_node_typed.getCTEName());
auto & queries_nodes = union_node_typed.getQueries().getNodes();
for (auto & query_node : queries_nodes)
@ -7483,6 +7534,9 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier
scope.scope_node->formatASTForErrorMessage());
}
}
if (union_node_typed.isCTE())
cte_in_resolve_process.erase(union_node_typed.getCTEName());
}
}

View File

@ -119,6 +119,9 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
if (is_group_by_all)
buffer << ", is_group_by_all: " << is_group_by_all;
if (is_order_by_all)
buffer << ", is_order_by_all: " << is_order_by_all;
std::string group_by_type;
if (is_group_by_with_rollup)
group_by_type = "rollup";
@ -257,6 +260,7 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
is_group_by_with_cube == rhs_typed.is_group_by_with_cube &&
is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets &&
is_group_by_all == rhs_typed.is_group_by_all &&
is_order_by_all == rhs_typed.is_order_by_all &&
cte_name == rhs_typed.cte_name &&
projection_columns == rhs_typed.projection_columns &&
settings_changes == rhs_typed.settings_changes;
@ -288,6 +292,7 @@ void QueryNode::updateTreeHashImpl(HashState & state) const
state.update(is_group_by_with_cube);
state.update(is_group_by_with_grouping_sets);
state.update(is_group_by_all);
state.update(is_order_by_all);
state.update(settings_changes.size());
@ -306,18 +311,19 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
{
auto result_query_node = std::make_shared<QueryNode>(context);
result_query_node->is_subquery = is_subquery;
result_query_node->is_cte = is_cte;
result_query_node->is_distinct = is_distinct;
result_query_node->is_limit_with_ties = is_limit_with_ties;
result_query_node->is_group_by_with_totals = is_group_by_with_totals;
result_query_node->is_group_by_with_rollup = is_group_by_with_rollup;
result_query_node->is_group_by_with_cube = is_group_by_with_cube;
result_query_node->is_subquery = is_subquery;
result_query_node->is_cte = is_cte;
result_query_node->is_distinct = is_distinct;
result_query_node->is_limit_with_ties = is_limit_with_ties;
result_query_node->is_group_by_with_totals = is_group_by_with_totals;
result_query_node->is_group_by_with_rollup = is_group_by_with_rollup;
result_query_node->is_group_by_with_cube = is_group_by_with_cube;
result_query_node->is_group_by_with_grouping_sets = is_group_by_with_grouping_sets;
result_query_node->is_group_by_all = is_group_by_all;
result_query_node->cte_name = cte_name;
result_query_node->projection_columns = projection_columns;
result_query_node->settings_changes = settings_changes;
result_query_node->is_group_by_all = is_group_by_all;
result_query_node->is_order_by_all = is_order_by_all;
result_query_node->cte_name = cte_name;
result_query_node->projection_columns = projection_columns;
result_query_node->settings_changes = settings_changes;
return result_query_node;
}
@ -332,6 +338,7 @@ ASTPtr QueryNode::toASTImpl(const ConvertToASTOptions & options) const
select_query->group_by_with_cube = is_group_by_with_cube;
select_query->group_by_with_grouping_sets = is_group_by_with_grouping_sets;
select_query->group_by_all = is_group_by_all;
select_query->order_by_all = is_order_by_all;
if (hasWith())
select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST(options));

View File

@ -6,7 +6,7 @@
#include <Parsers/ASTBackupQuery.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/TableLockHolder.h>
#include <Storages/MergeTree/ZooKeeperRetries.h>
#include <Common/ZooKeeper/ZooKeeperRetries.h>
#include <filesystem>
#include <queue>

View File

@ -32,6 +32,8 @@ namespace ErrorCodes
M(UInt64, shard_num) \
M(UInt64, replica_num) \
M(Bool, check_parts) \
M(Bool, check_projection_parts) \
M(Bool, allow_backup_broken_projections) \
M(Bool, internal) \
M(String, host_id) \
M(OptionalUUID, backup_uuid)

View File

@ -62,6 +62,12 @@ struct BackupSettings
/// Check checksums of the data parts before writing them to a backup.
bool check_parts = true;
/// Check checksums of the projection data parts before writing them to a backup.
bool check_projection_parts = true;
/// Allow to create backup with broken projections.
bool allow_backup_broken_projections = false;
/// Internal, should not be specified by user.
/// Whether this backup is a part of a distributed backup created by BACKUP ON CLUSTER.
bool internal = false;

View File

@ -1,6 +1,6 @@
#pragma once
#include <Storages/MergeTree/ZooKeeperRetries.h>
#include <Common/ZooKeeper/ZooKeeperRetries.h>
#include <Common/ZooKeeper/Common.h>
#include <Common/ZooKeeper/ZooKeeperWithFaultInjection.h>

View File

@ -296,12 +296,14 @@ if (TARGET ch_contrib::gwp_asan)
endif()
# Otherwise it will slow down stack traces printing too much.
set_source_files_properties(
Common/Elf.cpp
Common/Dwarf.cpp
Common/SymbolIndex.cpp
Common/ThreadFuzzer.cpp
PROPERTIES COMPILE_FLAGS "-O2 ${WITHOUT_COVERAGE_FLAGS}")
if ("${CMAKE_BUILD_TYPE_UC}" STREQUAL "DEBUG")
set_source_files_properties(
Common/Elf.cpp
Common/Dwarf.cpp
Common/SymbolIndex.cpp
Common/ThreadFuzzer.cpp
PROPERTIES COMPILE_FLAGS "-O2 ${WITHOUT_COVERAGE_FLAGS}")
endif()
target_link_libraries (clickhouse_common_io
PRIVATE

View File

@ -22,12 +22,12 @@ namespace ErrorCodes
}
ConnectionEstablisher::ConnectionEstablisher(
IConnectionPool * pool_,
ConnectionPoolPtr pool_,
const ConnectionTimeouts * timeouts_,
const Settings & settings_,
LoggerPtr log_,
const QualifiedTableName * table_to_check_)
: pool(pool_), timeouts(timeouts_), settings(settings_), log(log_), table_to_check(table_to_check_), is_finished(false)
: pool(std::move(pool_)), timeouts(timeouts_), settings(settings_), log(log_), table_to_check(table_to_check_), is_finished(false)
{
}
@ -79,14 +79,13 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
return;
}
UInt32 delay = table_status_it->second.absolute_delay;
const UInt32 delay = table_status_it->second.absolute_delay;
if (delay < max_allowed_delay)
result.is_up_to_date = true;
else
{
result.is_up_to_date = false;
result.staleness = delay;
result.delay = delay;
LOG_TRACE(log, "Server {} has unacceptable replica delay for table {}.{}: {}", result.entry->getDescription(), table_to_check->database, table_to_check->table, delay);
ProfileEvents::increment(ProfileEvents::DistributedConnectionStaleReplica);
@ -111,12 +110,13 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
#if defined(OS_LINUX)
ConnectionEstablisherAsync::ConnectionEstablisherAsync(
IConnectionPool * pool_,
ConnectionPoolPtr pool_,
const ConnectionTimeouts * timeouts_,
const Settings & settings_,
LoggerPtr log_,
const QualifiedTableName * table_to_check_)
: AsyncTaskExecutor(std::make_unique<Task>(*this)), connection_establisher(pool_, timeouts_, settings_, log_, table_to_check_)
: AsyncTaskExecutor(std::make_unique<Task>(*this))
, connection_establisher(std::move(pool_), timeouts_, settings_, log_, table_to_check_)
{
epoll.add(timeout_descriptor.getDescriptor());
}

Some files were not shown because too many files have changed in this diff Show More