Merge branch 'master' into keeper-logs-cache

This commit is contained in:
Antonio Andelic 2024-01-31 16:11:15 +00:00
commit 9bcbab187f
399 changed files with 20874 additions and 4122 deletions

View File

@ -138,19 +138,26 @@ jobs:
############################################################################################ ############################################################################################
##################################### Docker images ####################################### ##################################### Docker images #######################################
############################################################################################ ############################################################################################
DockerServerImages: DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Docker server and keeper images test_name: Docker server image
runner_type: style-checker runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
run_command: | run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push \ python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \ python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################ ############################################################################################

View File

@ -242,20 +242,26 @@ jobs:
############################################################################################ ############################################################################################
##################################### Docker images ####################################### ##################################### Docker images #######################################
############################################################################################ ############################################################################################
DockerServerImages: DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Docker server and keeper images test_name: Docker server image
runner_type: style-checker runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
# FIXME: avoid using 0 checkout
checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
run_command: | run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head \ python3 docker_server.py --release-type head \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head \ python3 docker_server.py --release-type head \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################ ############################################################################################

View File

@ -104,7 +104,7 @@ jobs:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Fast tests test_name: Fast test
runner_type: builder runner_type: builder
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
run_command: | run_command: |
@ -273,19 +273,26 @@ jobs:
############################################################################################ ############################################################################################
##################################### Docker images ####################################### ##################################### Docker images #######################################
############################################################################################ ############################################################################################
DockerServerImages: DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Docker server and keeper images test_name: Docker server image
runner_type: style-checker runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
checkout_depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
run_command: | run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push \ python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \ python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################ ############################################################################################

View File

@ -153,19 +153,26 @@ jobs:
############################################################################################ ############################################################################################
##################################### Docker images ####################################### ##################################### Docker images #######################################
############################################################################################ ############################################################################################
DockerServerImages: DockerServerImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml uses: ./.github/workflows/reusable_test.yml
with: with:
test_name: Docker server and keeper images test_name: Docker server image
runner_type: style-checker runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
checkout_depth: 0
run_command: | run_command: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push \ python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \ python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################ ############################################################################################
@ -456,7 +463,8 @@ jobs:
FinishCheck: FinishCheck:
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}
needs: needs:
- DockerServerImages - DockerServerImage
- DockerKeeperImage
- BuilderReport - BuilderReport
- BuilderSpecialReport - BuilderSpecialReport
- MarkReleaseReady - MarkReleaseReady

File diff suppressed because it is too large Load Diff

View File

@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported | | Version | Supported |
|:-|:-| |:-|:-|
| 24.1 | ✔️ |
| 23.12 | ✔️ | | 23.12 | ✔️ |
| 23.11 | ✔️ | | 23.11 | ✔️ |
| 23.10 | ✔️ | | 23.10 | |
| 23.9 | ❌ | | 23.9 | ❌ |
| 23.8 | ✔️ | | 23.8 | ✔️ |
| 23.7 | ❌ | | 23.7 | ❌ |

View File

@ -64,19 +64,14 @@ using ComparatorWrapper = Comparator;
#include <miniselect/floyd_rivest_select.h> #include <miniselect/floyd_rivest_select.h>
template <typename RandomIt> template <typename RandomIt, typename Compare>
void nth_element(RandomIt first, RandomIt nth, RandomIt last) void nth_element(RandomIt first, RandomIt nth, RandomIt last, Compare compare)
{ {
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
comparator compare;
ComparatorWrapper<comparator> compare_wrapper = compare;
#ifndef NDEBUG #ifndef NDEBUG
::shuffle(first, last); ::shuffle(first, last);
#endif #endif
ComparatorWrapper<Compare> compare_wrapper = compare;
::miniselect::floyd_rivest_select(first, nth, last, compare_wrapper); ::miniselect::floyd_rivest_select(first, nth, last, compare_wrapper);
#ifndef NDEBUG #ifndef NDEBUG
@ -87,6 +82,15 @@ void nth_element(RandomIt first, RandomIt nth, RandomIt last)
#endif #endif
} }
template <typename RandomIt>
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
{
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
::nth_element(first, nth, last, comparator());
}
template <typename RandomIt, typename Compare> template <typename RandomIt, typename Compare>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare) void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare)
{ {

View File

@ -904,13 +904,6 @@ public:
/// Returns a pointer to the Logger with the given name if it /// Returns a pointer to the Logger with the given name if it
/// exists, or a null pointer otherwise. /// exists, or a null pointer otherwise.
static bool destroy(const std::string & name);
/// Destroys the logger with the specified name. Does nothing
/// if the logger is not found.
///
/// After a logger has been destroyed, all references to it
/// become invalid.
static void shutdown(); static void shutdown();
/// Shuts down the logging framework and releases all /// Shuts down the logging framework and releases all
/// Loggers. /// Loggers.
@ -940,8 +933,6 @@ public:
static const std::string ROOT; /// The name of the root logger (""). static const std::string ROOT; /// The name of the root logger ("").
protected: protected:
typedef std::map<std::string, Logger *> LoggerMap;
Logger(const std::string & name, Channel * pChannel, int level); Logger(const std::string & name, Channel * pChannel, int level);
~Logger(); ~Logger();
@ -962,8 +953,6 @@ private:
std::string _name; std::string _name;
Channel * _pChannel; Channel * _pChannel;
std::atomic_int _level; std::atomic_int _level;
static LoggerMap * _pLoggerMap;
}; };

View File

@ -20,6 +20,7 @@
#include "Poco/NumberParser.h" #include "Poco/NumberParser.h"
#include "Poco/String.h" #include "Poco/String.h"
#include <cassert>
#include <mutex> #include <mutex>
namespace namespace
@ -37,12 +38,20 @@ std::mutex & getLoggerMutex()
return *logger_mutex; return *logger_mutex;
} }
struct LoggerEntry
{
Poco::Logger * logger;
bool owned_by_shared_ptr = false;
};
using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
LoggerMap * _pLoggerMap = nullptr;
} }
namespace Poco { namespace Poco {
Logger::LoggerMap* Logger::_pLoggerMap = 0;
const std::string Logger::ROOT; const std::string Logger::ROOT;
@ -134,12 +143,12 @@ void Logger::setLevel(const std::string& name, int level)
if (_pLoggerMap) if (_pLoggerMap)
{ {
std::string::size_type len = name.length(); std::string::size_type len = name.length();
for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) for (auto & it : *_pLoggerMap)
{ {
if (len == 0 || if (len == 0 ||
(it->first.compare(0, len, name) == 0 && (it->first.length() == len || it->first[len] == '.'))) (it.first.compare(0, len, name) == 0 && (it.first.length() == len || it.first[len] == '.')))
{ {
it->second->setLevel(level); it.second.logger->setLevel(level);
} }
} }
} }
@ -153,12 +162,12 @@ void Logger::setChannel(const std::string& name, Channel* pChannel)
if (_pLoggerMap) if (_pLoggerMap)
{ {
std::string::size_type len = name.length(); std::string::size_type len = name.length();
for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) for (auto & it : *_pLoggerMap)
{ {
if (len == 0 || if (len == 0 ||
(it->first.compare(0, len, name) == 0 && (it->first.length() == len || it->first[len] == '.'))) (it.first.compare(0, len, name) == 0 && (it.first.length() == len || it.first[len] == '.')))
{ {
it->second->setChannel(pChannel); it.second.logger->setChannel(pChannel);
} }
} }
} }
@ -172,12 +181,12 @@ void Logger::setProperty(const std::string& loggerName, const std::string& prope
if (_pLoggerMap) if (_pLoggerMap)
{ {
std::string::size_type len = loggerName.length(); std::string::size_type len = loggerName.length();
for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) for (auto & it : *_pLoggerMap)
{ {
if (len == 0 || if (len == 0 ||
(it->first.compare(0, len, loggerName) == 0 && (it->first.length() == len || it->first[len] == '.'))) (it.first.compare(0, len, loggerName) == 0 && (it.first.length() == len || it.first[len] == '.')))
{ {
it->second->setProperty(propertyName, value); it.second.logger->setProperty(propertyName, value);
} }
} }
} }
@ -304,35 +313,84 @@ struct LoggerDeleter
{ {
void operator()(Poco::Logger * logger) void operator()(Poco::Logger * logger)
{ {
if (Logger::destroy(logger->name())) std::lock_guard<std::mutex> lock(getLoggerMutex());
return;
logger->release(); /// If logger infrastructure is destroyed just decrement logger reference count
if (!_pLoggerMap)
{
logger->release();
return;
}
auto it = _pLoggerMap->find(logger->name());
assert(it != _pLoggerMap->end());
/** If reference count is 1, this means this shared pointer owns logger
* and need destroy it.
*/
size_t reference_count_before_release = logger->release();
if (reference_count_before_release == 1)
{
assert(it->second.owned_by_shared_ptr);
_pLoggerMap->erase(it);
}
} }
}; };
inline LoggerPtr makeLoggerPtr(Logger & logger) inline LoggerPtr makeLoggerPtr(Logger & logger)
{ {
logger.duplicate();
return std::shared_ptr<Logger>(&logger, LoggerDeleter()); return std::shared_ptr<Logger>(&logger, LoggerDeleter());
} }
} }
Logger& Logger::get(const std::string& name) Logger& Logger::get(const std::string& name)
{ {
std::lock_guard<std::mutex> lock(getLoggerMutex()); std::lock_guard<std::mutex> lock(getLoggerMutex());
return unsafeGet(name); Logger & logger = unsafeGet(name);
/** If there are already shared pointer created for this logger
* we need to increment Logger reference count and now logger
* is owned by logger infrastructure.
*/
auto it = _pLoggerMap->find(name);
if (it->second.owned_by_shared_ptr)
{
it->second.logger->duplicate();
it->second.owned_by_shared_ptr = false;
}
return logger;
} }
LoggerPtr Logger::getShared(const std::string & name) LoggerPtr Logger::getShared(const std::string & name)
{ {
std::lock_guard<std::mutex> lock(getLoggerMutex()); std::lock_guard<std::mutex> lock(getLoggerMutex());
bool logger_exists = _pLoggerMap && _pLoggerMap->contains(name);
return makeLoggerPtr(unsafeGet(name)); Logger & logger = unsafeGet(name);
/** If logger already exists, then this shared pointer does not own it.
* If logger does not exists, logger infrastructure could be already destroyed
* or logger was created.
*/
if (logger_exists)
{
logger.duplicate();
}
else if (_pLoggerMap)
{
_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
}
return makeLoggerPtr(logger);
} }
Logger& Logger::unsafeGet(const std::string& name) Logger& Logger::unsafeGet(const std::string& name)
{ {
Logger* pLogger = find(name); Logger* pLogger = find(name);
@ -364,7 +422,10 @@ LoggerPtr Logger::createShared(const std::string & name, Channel * pChannel, int
{ {
std::lock_guard<std::mutex> lock(getLoggerMutex()); std::lock_guard<std::mutex> lock(getLoggerMutex());
return makeLoggerPtr(unsafeCreate(name, pChannel, level)); Logger & logger = unsafeCreate(name, pChannel, level);
_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
return makeLoggerPtr(logger);
} }
Logger& Logger::root() Logger& Logger::root()
@ -389,10 +450,14 @@ void Logger::shutdown()
if (_pLoggerMap) if (_pLoggerMap)
{ {
for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) for (auto & it : *_pLoggerMap)
{ {
it->second->release(); if (it.second.owned_by_shared_ptr)
continue;
it.second.logger->release();
} }
delete _pLoggerMap; delete _pLoggerMap;
_pLoggerMap = 0; _pLoggerMap = 0;
} }
@ -405,32 +470,12 @@ Logger* Logger::find(const std::string& name)
{ {
LoggerMap::iterator it = _pLoggerMap->find(name); LoggerMap::iterator it = _pLoggerMap->find(name);
if (it != _pLoggerMap->end()) if (it != _pLoggerMap->end())
return it->second; return it->second.logger;
} }
return 0; return 0;
} }
bool Logger::destroy(const std::string& name)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
if (_pLoggerMap)
{
LoggerMap::iterator it = _pLoggerMap->find(name);
if (it != _pLoggerMap->end())
{
if (it->second->release() == 1)
_pLoggerMap->erase(it);
return true;
}
}
return false;
}
void Logger::names(std::vector<std::string>& names) void Logger::names(std::vector<std::string>& names)
{ {
std::lock_guard<std::mutex> lock(getLoggerMutex()); std::lock_guard<std::mutex> lock(getLoggerMutex());
@ -538,7 +583,8 @@ void Logger::add(Logger* pLogger)
{ {
if (!_pLoggerMap) if (!_pLoggerMap)
_pLoggerMap = new LoggerMap; _pLoggerMap = new LoggerMap;
_pLoggerMap->insert(LoggerMap::value_type(pLogger->name(), pLogger));
_pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
} }

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54482) SET(VERSION_REVISION 54483)
SET(VERSION_MAJOR 24) SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 1) SET(VERSION_MINOR 2)
SET(VERSION_PATCH 1) SET(VERSION_PATCH 1)
SET(VERSION_GITHASH a2faa65b080a587026c86844f3a20c74d23a86f8) SET(VERSION_GITHASH 5a024dfc0936e062770d0cfaad0805b57c1fba17)
SET(VERSION_DESCRIBE v24.1.1.1-testing) SET(VERSION_DESCRIBE v24.2.1.1-testing)
SET(VERSION_STRING 24.1.1.1) SET(VERSION_STRING 24.2.1.1)
# end of autochange # end of autochange

View File

@ -79,7 +79,10 @@ if (SANITIZE_COVERAGE)
# But the actual coverage will be enabled on per-library basis: for ClickHouse code, but not for 3rd-party. # But the actual coverage will be enabled on per-library basis: for ClickHouse code, but not for 3rd-party.
set (COVERAGE_FLAGS "-fsanitize-coverage=trace-pc-guard,pc-table") set (COVERAGE_FLAGS "-fsanitize-coverage=trace-pc-guard,pc-table")
endif()
set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table") set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table")
set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table) set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table)
else()
set (WITHOUT_COVERAGE_FLAGS "")
set (WITHOUT_COVERAGE_FLAGS_LIST "")
endif()

View File

@ -1,5 +1,6 @@
if (APPLE OR SANITIZE STREQUAL "undefined" OR SANITIZE STREQUAL "memory") if (APPLE OR SANITIZE STREQUAL "memory")
# llvm-tblgen, that is used during LLVM build, doesn't work with UBSan. # llvm-tblgen, that is used during LLVM build, will throw MSAN errors when running (breaking the build)
# TODO: Retest when upgrading LLVM or build only llvm-tblgen without sanitizers
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
set (ENABLE_DWARF_PARSER_DEFAULT OFF) set (ENABLE_DWARF_PARSER_DEFAULT OFF)
else() else()

2
contrib/simdjson vendored

@ -1 +1 @@
Subproject commit 1075e8609c4afa253162d441437af929c29e31bb Subproject commit 6060be2fdf62edf4a8f51a8b0883d57d09397b30

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc # lts / testing / prestable / etc
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.12.2.59" ARG VERSION="24.1.1.2048"
ARG PACKAGES="clickhouse-keeper" ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS="" ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc # lts / testing / prestable / etc
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.12.2.59" ARG VERSION="24.1.1.2048"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS="" ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.12.2.59" ARG VERSION="24.1.1.2048"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image # set non-empty deb_location_url url to create a docker image

View File

@ -246,16 +246,19 @@ clickhouse-client -q "system flush logs" ||:
stop_logs_replication stop_logs_replication
# Try to get logs while server is running # Try to get logs while server is running
successfuly_saved=0 failed_to_save_logs=0
for table in query_log zookeeper_log trace_log transactions_info_log metric_log for table in query_log zookeeper_log trace_log transactions_info_log metric_log
do do
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst; } 2>&1 )
successfuly_saved=$? echo "$err"
[[ "0" != "${#err}" ]] && failed_to_save_logs=1
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 )
successfuly_saved=$((successfuly_saved | $?)) echo "$err"
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst [[ "0" != "${#err}" ]] && failed_to_save_logs=1
successfuly_saved=$((successfuly_saved | $?)) err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst; } 2>&1 )
echo "$err"
[[ "0" != "${#err}" ]] && failed_to_save_logs=1
fi fi
done done
@ -280,7 +283,7 @@ fi
# If server crashed dump system logs with clickhouse-local # If server crashed dump system logs with clickhouse-local
if [ $successfuly_saved -ne 0 ]; then if [ $failed_to_save_logs -ne 0 ]; then
# Compress tables. # Compress tables.
# #
# NOTE: # NOTE:

View File

@ -78,6 +78,8 @@ function configure()
randomize_config_boolean_value use_compression zookeeper randomize_config_boolean_value use_compression zookeeper
fi fi
randomize_config_boolean_value allow_experimental_block_number_column block_number
# for clickhouse-server (via service) # for clickhouse-server (via service)
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
# for clickhouse-client # for clickhouse-client

View File

@ -122,6 +122,7 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
rm /etc/clickhouse-server/config.d/block_number.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml

View File

@ -0,0 +1,438 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.1.1.2048-stable (5a024dfc093) FIXME as compared to v23.12.1.1368-stable (a2faa65b080)
#### Backward Incompatible Change
* The setting `print_pretty_type_names` is turned on by default. You can turn it off to keep the old behavior or `SET compatibility = '23.12'`. [#57726](https://github.com/ClickHouse/ClickHouse/pull/57726) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)).
* The function `reverseDNSQuery` is no longer available. This closes [#58368](https://github.com/ClickHouse/ClickHouse/issues/58368). [#58369](https://github.com/ClickHouse/ClickHouse/pull/58369) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Enable various changes to improve the access control in the configuration file. These changes affect the behavior, and you check the `config.xml` in the `access_control_improvements` section. In case you are not confident, keep the values in the configuration file as they were in the previous version. [#58584](https://github.com/ClickHouse/ClickHouse/pull/58584) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow queries without aliases for subqueries for `PASTE JOIN`. [#58654](https://github.com/ClickHouse/ClickHouse/pull/58654) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix sumMapFiltered with NaN values. NaN values are now placed at the end (instead of randomly) and considered different from any values. `-0` is now also treated as equal to `0`; since 0 values are discarded, `-0` values are discarded too. [#58959](https://github.com/ClickHouse/ClickHouse/pull/58959) ([Raúl Marín](https://github.com/Algunenano)).
* The function `visibleWidth` will behave according to the docs. In previous versions, it simply counted code points after string serialization, like the `lengthUTF8` function, but didn't consider zero-width and combining characters, full-width characters, tabs, and deletes. Now the behavior is changed accordingly. If you want to keep the old behavior, set `function_visible_width_behavior` to `0`, or set `compatibility` to `23.12` or lower. [#59022](https://github.com/ClickHouse/ClickHouse/pull/59022) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Kusto dialect is disabled until these two bugs will be fixed: [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037) and [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036). [#59305](https://github.com/ClickHouse/ClickHouse/pull/59305) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### New Feature
* Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/ split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)).
* Added statement `SYSTEM RELOAD ASYNCHRONOUS METRICS` which updates the asynchronous metrics. Mostly useful for testing and development. [#53710](https://github.com/ClickHouse/ClickHouse/pull/53710) ([Robert Schulze](https://github.com/rschu1ze)).
* Certain settings (currently `min_compress_block_size` and `max_compress_block_size`) can now be specified at column-level where they take precedence over the corresponding table-level setting. Example: `CREATE TABLE tab (col String SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840)) ENGINE = MergeTree ORDER BY tuple();`. [#55201](https://github.com/ClickHouse/ClickHouse/pull/55201) ([Duc Canh Le](https://github.com/canhld94)).
* Add `quantileDDSketch` aggregate function as well as the corresponding `quantilesDDSketch` and `medianDDSketch`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)).
* Added function `seriesDecomposeSTL()` which decomposes a time series into a season, a trend and a residual component. [#57078](https://github.com/ClickHouse/ClickHouse/pull/57078) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* Introduced MySQL Binlog Client for MaterializedMySQL: One binlog connection for many databases. [#57323](https://github.com/ClickHouse/ClickHouse/pull/57323) ([Val Doroshchuk](https://github.com/valbok)).
* Intel QuickAssist Technology (QAT) provides hardware-accelerated compression and cryptograpy. ClickHouse got a new compression codec `ZSTD_QAT` which utilizes QAT for zstd compression. The codec uses [Intel's QATlib](https://github.com/intel/qatlib) and [Inte's QAT ZSTD Plugin](https://github.com/intel/QAT-ZSTD-Plugin). Right now, only compression can be accelerated in hardware (a software fallback kicks in in case QAT could not be initialized), decompression always runs in software. [#57509](https://github.com/ClickHouse/ClickHouse/pull/57509) ([jasperzhu](https://github.com/jinjunzh)).
* Implementing the new way how object storage keys are generated for s3 disks. Now the format could be defined in terms of `re2` regex syntax with `key_template` option in disc description. [#57663](https://github.com/ClickHouse/ClickHouse/pull/57663) ([Sema Checherinda](https://github.com/CheSema)).
* Table system.dropped_tables_parts contains parts of system.dropped_tables tables (dropped but not yet removed tables). [#58038](https://github.com/ClickHouse/ClickHouse/pull/58038) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Implement Variant data type that represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). Variant type is available under a setting `allow_experimental_variant_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#58047](https://github.com/ClickHouse/ClickHouse/pull/58047) ([Kruglov Pavel](https://github.com/Avogar)).
* Add settings `max_materialized_views_size_for_table` to limit the number of materialized views attached to a table. [#58068](https://github.com/ClickHouse/ClickHouse/pull/58068) ([zhongyuankai](https://github.com/zhongyuankai)).
* `clickhouse-format` improvements: * support INSERT queries with `VALUES` * support comments (use `--comments` to output them) * support `--max_line_length` option to format only long queries in multiline. [#58246](https://github.com/ClickHouse/ClickHouse/pull/58246) ([vdimir](https://github.com/vdimir)).
* Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add table `system.database_engines`. [#58390](https://github.com/ClickHouse/ClickHouse/pull/58390) ([Bharat Nallan](https://github.com/bharatnc)).
* Added FROM <Replicas> modifier for SYSTEM SYNC REPLICA LIGHTWEIGHT query. The FROM modifier ensures we wait for for fetches and drop-ranges only for the specified source replicas, as well as any replica not in zookeeper or with an empty source_replica. [#58393](https://github.com/ClickHouse/ClickHouse/pull/58393) ([Jayme Bird](https://github.com/jaymebrd)).
* Add function `arrayShingles()` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)).
* Added functions `punycodeEncode()`, `punycodeDecode()`, `idnaEncode()` and `idnaDecode()` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)).
* Added string similarity functions `dramerauLevenshteinDistance()`, `jaroSimilarity()` and `jaroWinklerSimilarity()`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)).
* Add two settings `output_format_compression_level` to change output compression level and `output_format_compression_zstd_window_log` to explicitly set compression window size and enable long-range mode for zstd compression if output compression method is `zstd`. Applied for `INTO OUTFILE` and when writing to table functions `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`. [#58539](https://github.com/ClickHouse/ClickHouse/pull/58539) ([Duc Canh Le](https://github.com/canhld94)).
* Automatically disable ANSI escape sequences in Pretty formats if the output is not a terminal. Add new `auto` mode to setting `output_format_pretty_color`. [#58614](https://github.com/ClickHouse/ClickHouse/pull/58614) ([Shaun Struwig](https://github.com/Blargian)).
* Added setting `update_insert_deduplication_token_in_dependent_materialized_views`. This setting allows to update insert deduplication token with table identifier during insert in dependent materialized views. Closes [#59165](https://github.com/ClickHouse/ClickHouse/issues/59165). [#59238](https://github.com/ClickHouse/ClickHouse/pull/59238) ([Maksim Kita](https://github.com/kitaisreal)).
#### Performance Improvement
* More cache-friendly final implementation. Note on the behaviour change: previously queries with `FINAL` modifier that read with a single stream (e.g. `max_threads=1`) produced sorted output without explicitly provided `ORDER BY` clause. This behaviour no longer exists when `enable_vertical_final = true` (and it is so by default). [#54366](https://github.com/ClickHouse/ClickHouse/pull/54366) ([Duc Canh Le](https://github.com/canhld94)).
* Optimize array element function when input is array(map)/array(array(num)/array(array(string))/array(bigint)/array(decimal). Current implementation causes too many reallocs. The optimization speed up by ~6x especially when input type is array(map). [#56403](https://github.com/ClickHouse/ClickHouse/pull/56403) ([李扬](https://github.com/taiyang-li)).
* Bypass `Poco::BasicBufferedStreamBuf` abstraction when reading from S3 (namely `ReadBufferFromIStream`) to avoid extra copying of data. [#56961](https://github.com/ClickHouse/ClickHouse/pull/56961) ([Nikita Taranov](https://github.com/nickitat)).
* Read column once while reading more that one subcolumn from it in Compact parts. [#57631](https://github.com/ClickHouse/ClickHouse/pull/57631) ([Kruglov Pavel](https://github.com/Avogar)).
* Rewrite the AST of sum(column + literal) function. [#57853](https://github.com/ClickHouse/ClickHouse/pull/57853) ([Jiebin Sun](https://github.com/jiebinn)).
* The evaluation of function `match()` now utilizes skipping indices `ngrambf_v1` and `tokenbf_v1`. [#57882](https://github.com/ClickHouse/ClickHouse/pull/57882) ([凌涛](https://github.com/lingtaolf)).
* Default coordinator for parallel replicas is rewritten for better cache locality (same mark ranges are almost always assigned to the same replicas). Consistent hashing is used also during work stealing, so better tail latency is expected. It has been tested for linear scalability on a hundred of replicas. [#57968](https://github.com/ClickHouse/ClickHouse/pull/57968) ([Nikita Taranov](https://github.com/nickitat)).
* MergeTree FINAL to not compare rows from same non-L0 part. [#58142](https://github.com/ClickHouse/ClickHouse/pull/58142) ([Duc Canh Le](https://github.com/canhld94)).
* Speed up iota calls (filling array with consecutive numbers). [#58271](https://github.com/ClickHouse/ClickHouse/pull/58271) ([Raúl Marín](https://github.com/Algunenano)).
* The evaluation of function `match()` now utilizes inverted indices. [#58284](https://github.com/ClickHouse/ClickHouse/pull/58284) ([凌涛](https://github.com/lingtaolf)).
* Speedup MIN/MAX for non numeric types. [#58334](https://github.com/ClickHouse/ClickHouse/pull/58334) ([Raúl Marín](https://github.com/Algunenano)).
* Enable JIT compilation for aggregation without a key. Closes [#41461](https://github.com/ClickHouse/ClickHouse/issues/41461). Originally [#53757](https://github.com/ClickHouse/ClickHouse/issues/53757). [#58440](https://github.com/ClickHouse/ClickHouse/pull/58440) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The performance experiments of **OnTime** on the Intel server with up to AVX2 (and BMI2) support show that this change could effectively improve the QPS of **Q2** and **Q3** by **5.0%** and **3.7%** through reducing the cycle ratio of the hotspot, **_DB::MergeTreeRangeReader::ReadResult::optimize_**, **from 11.48% to 1.09%** and **from 8.09% to 0.67%** respectively while having no impact on others. [#58800](https://github.com/ClickHouse/ClickHouse/pull/58800) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
* Use one thread less in `clickhouse-local`. [#58968](https://github.com/ClickHouse/ClickHouse/pull/58968) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Large aggregation states of `uniqExact` will be merged in parallel in distrubuted queries. [#59009](https://github.com/ClickHouse/ClickHouse/pull/59009) ([Nikita Taranov](https://github.com/nickitat)).
* Lower memory usage after reading from `MergeTree` tables. [#59290](https://github.com/ClickHouse/ClickHouse/pull/59290) ([Anton Popov](https://github.com/CurtizJ)).
* Lower memory usage in vertical merges. [#59340](https://github.com/ClickHouse/ClickHouse/pull/59340) ([Anton Popov](https://github.com/CurtizJ)).
#### Improvement
* Enable MySQL/MariaDB on macOS. This closes [#21191](https://github.com/ClickHouse/ClickHouse/issues/21191). [#46316](https://github.com/ClickHouse/ClickHouse/pull/46316) ([Robert Schulze](https://github.com/rschu1ze)).
* Do not interpret numbers with leading zeroes as octals. [#55575](https://github.com/ClickHouse/ClickHouse/pull/55575) ([Joanna Hulboj](https://github.com/jh0x)).
* Replace HTTP outgoing buffering based on std ostream with CH Buffer. Add bytes counting metrics for interfaces. [#56064](https://github.com/ClickHouse/ClickHouse/pull/56064) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Disable `max_rows_in_set_to_optimize_join` by default. [#56396](https://github.com/ClickHouse/ClickHouse/pull/56396) ([vdimir](https://github.com/vdimir)).
* Add `<host_name>` config parameter that allows avoiding resolving hostnames in DDLWorker. This mitigates the possibility of the queue being stuck in case of a change in cluster definition. Closes [#57573](https://github.com/ClickHouse/ClickHouse/issues/57573). [#57603](https://github.com/ClickHouse/ClickHouse/pull/57603) ([Nikolay Degterinsky](https://github.com/evillique)).
* Increase `load_metadata_threads` to 16 for the filesystem cache. It will make the server start up faster. [#57732](https://github.com/ClickHouse/ClickHouse/pull/57732) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve the `multiIf` function performance when the type is Nullable. [#57745](https://github.com/ClickHouse/ClickHouse/pull/57745) ([KevinyhZou](https://github.com/KevinyhZou)).
* Add ability to throttle merges/mutations (`max_mutations_bandwidth_for_server`/`max_merges_bandwidth_for_server`). [#57877](https://github.com/ClickHouse/ClickHouse/pull/57877) ([Azat Khuzhin](https://github.com/azat)).
* Replaced undocumented (boolean) column `is_hot_reloadable` in system table `system.server_settings` by (Enum8) column `changeable_without_restart` with possible values `No`, `Yes`, `IncreaseOnly` and `DecreaseOnly`. Also documented the column. [#58029](https://github.com/ClickHouse/ClickHouse/pull/58029) ([skyoct](https://github.com/skyoct)).
* ClusterDiscovery supports setting username and password, close [#58063](https://github.com/ClickHouse/ClickHouse/issues/58063). [#58123](https://github.com/ClickHouse/ClickHouse/pull/58123) ([vdimir](https://github.com/vdimir)).
* Support query parameters in ALTER TABLE ... PART. [#58297](https://github.com/ClickHouse/ClickHouse/pull/58297) ([Azat Khuzhin](https://github.com/azat)).
* Create consumers for Kafka tables on fly (but keep them for some period - `kafka_consumers_pool_ttl_ms`, since last used), this should fix problem with statistics for `system.kafka_consumers` (that does not consumed when nobody reads from Kafka table, which leads to live memory leak and slow table detach) and also this PR enables stats for `system.kafka_consumers` by default again. [#58310](https://github.com/ClickHouse/ClickHouse/pull/58310) ([Azat Khuzhin](https://github.com/azat)).
* Sparkbar as an alias to sparkbar. [#58335](https://github.com/ClickHouse/ClickHouse/pull/58335) ([凌涛](https://github.com/lingtaolf)).
* Avoid sending ComposeObject requests after upload to GCS. [#58343](https://github.com/ClickHouse/ClickHouse/pull/58343) ([Azat Khuzhin](https://github.com/azat)).
* Correctly handle keys with dot in the name in configurations XMLs. [#58354](https://github.com/ClickHouse/ClickHouse/pull/58354) ([Azat Khuzhin](https://github.com/azat)).
* Added comments (brief descriptions) to all columns of system tables. The are several reasons fro this: - We use system tables a lot and sometimes is could be very difficult for developer to understand the purpose and the meaning of a particular column. - We change (add new ones or modify existing) system tables a lot and the documentation for them is always outdated. For example take a look at the documentation page for [`system.parts`](https://clickhouse.com/docs/en/operations/system-tables/parts). It misses a lot of columns - We would like to eventually generate documentation directly from ClickHouse. [#58356](https://github.com/ClickHouse/ClickHouse/pull/58356) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Make function `format` return constant on constant arguments. This closes [#58355](https://github.com/ClickHouse/ClickHouse/issues/58355). [#58358](https://github.com/ClickHouse/ClickHouse/pull/58358) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Attach all system tables in `clickhouse-local`, including `system.parts`. This closes [#58312](https://github.com/ClickHouse/ClickHouse/issues/58312). [#58359](https://github.com/ClickHouse/ClickHouse/pull/58359) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support for `Enum` data types in function `transform`. This closes [#58241](https://github.com/ClickHouse/ClickHouse/issues/58241). [#58360](https://github.com/ClickHouse/ClickHouse/pull/58360) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow registering database engines independently. [#58365](https://github.com/ClickHouse/ClickHouse/pull/58365) ([Bharat Nallan](https://github.com/bharatnc)).
* Adding a setting `max_estimated_execution_time` to separate `max_execution_time` and `max_estimated_execution_time`. [#58402](https://github.com/ClickHouse/ClickHouse/pull/58402) ([Zhang Yifan](https://github.com/zhangyifan27)).
* Allow registering interpreters independently. [#58443](https://github.com/ClickHouse/ClickHouse/pull/58443) ([Bharat Nallan](https://github.com/bharatnc)).
* Provide hint when an invalid database engine name is used. [#58444](https://github.com/ClickHouse/ClickHouse/pull/58444) ([Bharat Nallan](https://github.com/bharatnc)).
* Avoid huge memory consumption during Keeper startup for more cases. [#58455](https://github.com/ClickHouse/ClickHouse/pull/58455) ([Antonio Andelic](https://github.com/antonio2368)).
* Add settings for better control of indexes type in Arrow dictionary. Use signed integer type for indexes by default as Arrow recommends. Closes [#57401](https://github.com/ClickHouse/ClickHouse/issues/57401). [#58519](https://github.com/ClickHouse/ClickHouse/pull/58519) ([Kruglov Pavel](https://github.com/Avogar)).
* Added function `sqidDecode()` which decodes [Sqids](https://sqids.org/). [#58544](https://github.com/ClickHouse/ClickHouse/pull/58544) ([Robert Schulze](https://github.com/rschu1ze)).
* Allow to read Bool values into String in JSON input formats. It's done under a setting `input_format_json_read_bools_as_strings` that is enabled by default. [#58561](https://github.com/ClickHouse/ClickHouse/pull/58561) ([Kruglov Pavel](https://github.com/Avogar)).
* Implement [#58575](https://github.com/ClickHouse/ClickHouse/issues/58575) Support `CLICKHOUSE_PASSWORD_FILE ` environment variable when running the docker image. [#58583](https://github.com/ClickHouse/ClickHouse/pull/58583) ([Eyal Halpern Shalev](https://github.com/Eyal-Shalev)).
* When executing some queries, which require a lot of streams for reading data, the error `"Paste JOIN requires sorted tables only"` was previously thrown. Now the numbers of streams resize to 1 in that case. [#58608](https://github.com/ClickHouse/ClickHouse/pull/58608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)).
* Better message for INVALID_IDENTIFIER error. [#58703](https://github.com/ClickHouse/ClickHouse/pull/58703) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Improved handling of signed numeric literals in normalizeQuery. [#58710](https://github.com/ClickHouse/ClickHouse/pull/58710) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Support Point data type for MySQL. [#58721](https://github.com/ClickHouse/ClickHouse/pull/58721) ([Kseniia Sumarokova](https://github.com/kssenii)).
* When comparing a Float32 column and a const string, read the string as Float32 (instead of Float64). [#58724](https://github.com/ClickHouse/ClickHouse/pull/58724) ([Raúl Marín](https://github.com/Algunenano)).
* Improve S3 compatible, add Ecloud EOS storage support. [#58786](https://github.com/ClickHouse/ClickHouse/pull/58786) ([xleoken](https://github.com/xleoken)).
* Allow `KILL QUERY` to cancel backups / restores. This PR also makes running backups and restores visible in `system.processes`. Also there is a new setting in the server configuration now - `shutdown_wait_backups_and_restores` (default=true) which makes the server either wait on shutdown for all running backups and restores to finish or just cancel them. [#58804](https://github.com/ClickHouse/ClickHouse/pull/58804) ([Vitaly Baranov](https://github.com/vitlibar)).
* Avro format support Zstd codec. Closes [#58735](https://github.com/ClickHouse/ClickHouse/issues/58735). [#58805](https://github.com/ClickHouse/ClickHouse/pull/58805) ([flynn](https://github.com/ucasfl)).
* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#58835](https://github.com/ClickHouse/ClickHouse/pull/58835) ([Serge Klochkov](https://github.com/slvrtrn)).
* Fixing a problem described in [#58719](https://github.com/ClickHouse/ClickHouse/issues/58719). [#58841](https://github.com/ClickHouse/ClickHouse/pull/58841) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Make sure that for custom (created from SQL) disks ether `filesystem_caches_path` (a common directory prefix for all filesystem caches) or `custom_cached_disks_base_directory` (a common directory prefix for only filesystem caches created from custom disks) is specified in server config. `custom_cached_disks_base_directory` has higher priority for custom disks over `filesystem_caches_path`, which is used if the former one is absent. Filesystem cache setting `path` must lie inside that directory, otherwise exception will be thrown preventing disk to be created. This will not affect disks created on an older version and server was upgraded - then the exception will not be thrown to allow the server to successfully start). `custom_cached_disks_base_directory` is added to default server config as `/var/lib/clickhouse/caches/`. Closes [#57825](https://github.com/ClickHouse/ClickHouse/issues/57825). [#58869](https://github.com/ClickHouse/ClickHouse/pull/58869) ([Kseniia Sumarokova](https://github.com/kssenii)).
* MySQL interface gained compatibility with `SHOW WARNINGS`/`SHOW COUNT(*) WARNINGS` queries, though the returned result is always an empty set. [#58929](https://github.com/ClickHouse/ClickHouse/pull/58929) ([Serge Klochkov](https://github.com/slvrtrn)).
* Skip unavailable replicas when executing parallel distributed `INSERT SELECT`. [#58931](https://github.com/ClickHouse/ClickHouse/pull/58931) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Display word-descriptive log level while enabling structured log formatting in json. [#58936](https://github.com/ClickHouse/ClickHouse/pull/58936) ([Tim Liou](https://github.com/wheatdog)).
* MySQL interface gained support for `CAST(x AS SIGNED)` and `CAST(x AS UNSIGNED)` statements via data type aliases: `SIGNED` for Int64, and `UNSIGNED` for UInt64. This improves compatibility with BI tools such as Looker Studio. [#58954](https://github.com/ClickHouse/ClickHouse/pull/58954) ([Serge Klochkov](https://github.com/slvrtrn)).
* Function `seriesDecomposeSTL()` now returns a baseline component as season + trend components. [#58961](https://github.com/ClickHouse/ClickHouse/pull/58961) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* Fix memory management in copyDataToS3File. [#58962](https://github.com/ClickHouse/ClickHouse/pull/58962) ([Vitaly Baranov](https://github.com/vitlibar)).
* Change working directory to data path in docker container. [#58975](https://github.com/ClickHouse/ClickHouse/pull/58975) ([cangyin](https://github.com/cangyin)).
* Added setting for Azure Blob Storage `azure_max_unexpected_write_error_retries` , can also be set from config under azure section. [#59001](https://github.com/ClickHouse/ClickHouse/pull/59001) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Keeper improvement: reduce Keeper's memory usage for stored nodes. [#59002](https://github.com/ClickHouse/ClickHouse/pull/59002) ([Antonio Andelic](https://github.com/antonio2368)).
* Allow server to start with broken data lake table. Closes [#58625](https://github.com/ClickHouse/ClickHouse/issues/58625). [#59080](https://github.com/ClickHouse/ClickHouse/pull/59080) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fixes https://github.com/ClickHouse/ClickHouse/pull/59120#issuecomment-1906177350. [#59122](https://github.com/ClickHouse/ClickHouse/pull/59122) ([Arthur Passos](https://github.com/arthurpassos)).
* The state of URL's #hash in the dashboard is now compressed using [lz-string](https://github.com/pieroxy/lz-string). The default size of the state string is compressed from 6856B to 2823B. [#59124](https://github.com/ClickHouse/ClickHouse/pull/59124) ([Amos Bird](https://github.com/amosbird)).
* Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)).
* Prohibit mutable operations (`INSERT`/`ALTER`/`OPTIMIZE`/...) on read-only/write-once storages with a proper `TABLE_IS_READ_ONLY` error (to avoid leftovers). Avoid leaving left-overs on write-once disks (`format_version.txt`) on `CREATE`/`ATTACH`. Ignore `DROP` for `ReplicatedMergeTree` (so as for `MergeTree`). Fix iterating over `s3_plain` (`MetadataStorageFromPlainObjectStorage::iterateDirectory`). Note read-only is `web` disk, and write-once is `s3_plain`. [#59170](https://github.com/ClickHouse/ClickHouse/pull/59170) ([Azat Khuzhin](https://github.com/azat)).
* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#59293](https://github.com/ClickHouse/ClickHouse/pull/59293) ([Serge Klochkov](https://github.com/slvrtrn)).
* Fix bug in experimental `_block_number` column which could lead to logical error during complex combination of `ALTER`s and `merge`s. Fixes [#56202](https://github.com/ClickHouse/ClickHouse/issues/56202). Replaces [#58601](https://github.com/ClickHouse/ClickHouse/issues/58601). CC @SmitaRKulkarni. [#59295](https://github.com/ClickHouse/ClickHouse/pull/59295) ([alesapin](https://github.com/alesapin)).
* Play UI understands when an exception is returned inside JSON. Adjustment for [#52853](https://github.com/ClickHouse/ClickHouse/issues/52853). [#59303](https://github.com/ClickHouse/ClickHouse/pull/59303) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* `/binary` HTTP handler allows to specify user, host, and optionally, password in the query string. [#59311](https://github.com/ClickHouse/ClickHouse/pull/59311) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support backups for compressed in-memory tables. This closes [#57893](https://github.com/ClickHouse/ClickHouse/issues/57893). [#59315](https://github.com/ClickHouse/ClickHouse/pull/59315) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve exception message of function regexp_extract, close [#56393](https://github.com/ClickHouse/ClickHouse/issues/56393). [#59319](https://github.com/ClickHouse/ClickHouse/pull/59319) ([李扬](https://github.com/taiyang-li)).
* Support the FORMAT clause in BACKUP and RESTORE queries. [#59338](https://github.com/ClickHouse/ClickHouse/pull/59338) ([Vitaly Baranov](https://github.com/vitlibar)).
* Function `concatWithSeparator()` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). For example, `SELECT concatWithSeparator('.', 'number', 1)` now returns `number.1`. [#59341](https://github.com/ClickHouse/ClickHouse/pull/59341) ([Robert Schulze](https://github.com/rschu1ze)).
#### Build/Testing/Packaging Improvement
* Improve aliases for clickhouse binary (now `ch`/`clickhouse` is `clickhouse-local` or `clickhouse` depends on the arguments) and add bash completion for new aliases. [#58344](https://github.com/ClickHouse/ClickHouse/pull/58344) ([Azat Khuzhin](https://github.com/azat)).
* Add settings changes check to CI to check that all settings changes are reflected in settings changes history. [#58555](https://github.com/ClickHouse/ClickHouse/pull/58555) ([Kruglov Pavel](https://github.com/Avogar)).
* Use tables directly attached from S3 in stateful tests. [#58791](https://github.com/ClickHouse/ClickHouse/pull/58791) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Save the whole `fuzzer.log` as an archive instead of the last 100k lines. `tail -n 100000` often removes lines with table definitions. Example:. [#58821](https://github.com/ClickHouse/ClickHouse/pull/58821) ([Dmitry Novik](https://github.com/novikd)).
* Enable Rust on OSX ARM64 (this will add fuzzy search in client with skim and prql language, though I don't think that are people who hosts ClickHouse on darwin, so it is mostly for fuzzy search in client I would say). [#59272](https://github.com/ClickHouse/ClickHouse/pull/59272) ([Azat Khuzhin](https://github.com/azat)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Add join keys conversion for nested lowcardinality [#51550](https://github.com/ClickHouse/ClickHouse/pull/51550) ([vdimir](https://github.com/vdimir)).
* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix a bug with projections and the aggregate_functions_null_for_empty setting during insertion. [#56944](https://github.com/ClickHouse/ClickHouse/pull/56944) ([Amos Bird](https://github.com/amosbird)).
* Fixed potential exception due to stale profile UUID [#57263](https://github.com/ClickHouse/ClickHouse/pull/57263) ([Vasily Nemkov](https://github.com/Enmk)).
* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
* Ignore MVs with dropped target table during pushing to views [#57520](https://github.com/ClickHouse/ClickHouse/pull/57520) ([Kruglov Pavel](https://github.com/Avogar)).
* [RFC] Eliminate possible race between ALTER_METADATA and MERGE_PARTS [#57755](https://github.com/ClickHouse/ClickHouse/pull/57755) ([Azat Khuzhin](https://github.com/azat)).
* Fix the exprs order bug in group by with rollup [#57786](https://github.com/ClickHouse/ClickHouse/pull/57786) ([Chen768959](https://github.com/Chen768959)).
* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Allow users to work with symlinks in user_files_path (again) [#58447](https://github.com/ClickHouse/ClickHouse/pull/58447) ([Duc Canh Le](https://github.com/canhld94)).
* Fix segfault when graphite table does not have agg function [#58453](https://github.com/ClickHouse/ClickHouse/pull/58453) ([Duc Canh Le](https://github.com/canhld94)).
* Delay reading from StorageKafka to allow multiple reads in materialized views [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)).
* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)).
* Enable ordinary databases while restoration [#58520](https://github.com/ClickHouse/ClickHouse/pull/58520) ([Jihyuk Bok](https://github.com/tomahawk28)).
* Fix hive threadpool read ORC/Parquet/... Failed [#58537](https://github.com/ClickHouse/ClickHouse/pull/58537) ([sunny](https://github.com/sunny19930321)).
* Hide credentials in system.backup_log base_backup_name column [#58550](https://github.com/ClickHouse/ClickHouse/pull/58550) ([Daniel Pozo Escalona](https://github.com/danipozo)).
* toStartOfInterval for milli- microsencods values rounding [#58557](https://github.com/ClickHouse/ClickHouse/pull/58557) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Disable max_joined_block_rows in ConcurrentHashJoin [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)).
* Fix join using nullable in old analyzer [#58596](https://github.com/ClickHouse/ClickHouse/pull/58596) ([vdimir](https://github.com/vdimir)).
* `makeDateTime64()`: Allow non-const fraction argument [#58597](https://github.com/ClickHouse/ClickHouse/pull/58597) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix possible NULL dereference during symbolizing inline frames [#58607](https://github.com/ClickHouse/ClickHouse/pull/58607) ([Azat Khuzhin](https://github.com/azat)).
* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix broken partition key analysis when doing projection optimization [#58638](https://github.com/ClickHouse/ClickHouse/pull/58638) ([Amos Bird](https://github.com/amosbird)).
* Query cache: Fix per-user quota [#58731](https://github.com/ClickHouse/ClickHouse/pull/58731) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)).
* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)).
* Don't process requests in Keeper during shutdown [#58765](https://github.com/ClickHouse/ClickHouse/pull/58765) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix Segfault in `SlabsPolygonIndex::find` [#58771](https://github.com/ClickHouse/ClickHouse/pull/58771) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)).
* Table CREATE DROP Poco::Logger memory leak fix [#58831](https://github.com/ClickHouse/ClickHouse/pull/58831) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix HTTP compressors finalization [#58846](https://github.com/ClickHouse/ClickHouse/pull/58846) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Multiple read file log storage in mv [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Restriction for the access key id for s3. [#58900](https://github.com/ClickHouse/ClickHouse/pull/58900) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix possible crash in clickhouse-local during loading suggestions [#58907](https://github.com/ClickHouse/ClickHouse/pull/58907) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash when indexHint() is used [#58911](https://github.com/ClickHouse/ClickHouse/pull/58911) ([Dmitry Novik](https://github.com/novikd)).
* Fix StorageURL forgetting headers on server restart [#58933](https://github.com/ClickHouse/ClickHouse/pull/58933) ([Michael Kolupaev](https://github.com/al13n321)).
* Analyzer: fix storage replacement with insertion block [#58958](https://github.com/ClickHouse/ClickHouse/pull/58958) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix seek in ReadBufferFromZipArchive [#58966](https://github.com/ClickHouse/ClickHouse/pull/58966) ([Michael Kolupaev](https://github.com/al13n321)).
* `DROP INDEX` of inverted index now removes all relevant files from persistence [#59040](https://github.com/ClickHouse/ClickHouse/pull/59040) ([mochi](https://github.com/MochiXu)).
* Fix data race on query_factories_info [#59049](https://github.com/ClickHouse/ClickHouse/pull/59049) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Disable "Too many redirects" error retry [#59099](https://github.com/ClickHouse/ClickHouse/pull/59099) ([skyoct](https://github.com/skyoct)).
* Fix aggregation issue in mixed x86_64 and ARM clusters [#59132](https://github.com/ClickHouse/ClickHouse/pull/59132) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Fix not started database shutdown deadlock [#59137](https://github.com/ClickHouse/ClickHouse/pull/59137) ([Sergei Trifonov](https://github.com/serxa)).
* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix crash with nullable timezone for `toString` [#59190](https://github.com/ClickHouse/ClickHouse/pull/59190) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix abort in iceberg metadata on bad file paths [#59275](https://github.com/ClickHouse/ClickHouse/pull/59275) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix architecture name in select of Rust target [#59307](https://github.com/ClickHouse/ClickHouse/pull/59307) ([p1rattttt](https://github.com/p1rattttt)).
* Fix not-ready set for system.tables [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix lazy initialization in RabbitMQ [#59352](https://github.com/ClickHouse/ClickHouse/pull/59352) ([Kruglov Pavel](https://github.com/Avogar)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Refreshable materialized views (takeover)"'. [#58296](https://github.com/ClickHouse/ClickHouse/pull/58296) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Fix an error in the release script - it didn't allow to make 23.12."'. [#58381](https://github.com/ClickHouse/ClickHouse/pull/58381) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* NO CL ENTRY: 'Revert "Use CH Buffer for HTTP out stream, add metrics for interfaces"'. [#58450](https://github.com/ClickHouse/ClickHouse/pull/58450) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Second attempt: Use CH Buffer for HTTP out stream, add metrics for interfaces'. [#58475](https://github.com/ClickHouse/ClickHouse/pull/58475) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* NO CL ENTRY: 'Revert "Merging [#53757](https://github.com/ClickHouse/ClickHouse/issues/53757)"'. [#58542](https://github.com/ClickHouse/ClickHouse/pull/58542) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Add support for MySQL `net_write_timeout` and `net_read_timeout` settings"'. [#58872](https://github.com/ClickHouse/ClickHouse/pull/58872) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Extend performance test norm_dist.xml"'. [#58989](https://github.com/ClickHouse/ClickHouse/pull/58989) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892)"'. [#58990](https://github.com/ClickHouse/ClickHouse/pull/58990) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Allow parallel replicas for JOIN with analyzer [part 1]."'. [#59059](https://github.com/ClickHouse/ClickHouse/pull/59059) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Consume leading zeroes when parsing a number in ConstantExpressionTemplate"'. [#59070](https://github.com/ClickHouse/ClickHouse/pull/59070) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Revert "Allow parallel replicas for JOIN with analyzer [part 1].""'. [#59076](https://github.com/ClickHouse/ClickHouse/pull/59076) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* NO CL ENTRY: 'Revert "Allow to attach partition from table with different partition expression when destination partition expression doesn't re-partition"'. [#59120](https://github.com/ClickHouse/ClickHouse/pull/59120) ([Robert Schulze](https://github.com/rschu1ze)).
* NO CL ENTRY: 'DisksApp.cpp: fix typo (specifiged → specified)'. [#59140](https://github.com/ClickHouse/ClickHouse/pull/59140) ([Nikolay Edigaryev](https://github.com/edigaryev)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Analyzer: Fix resolving subcolumns in JOIN [#49703](https://github.com/ClickHouse/ClickHouse/pull/49703) ([vdimir](https://github.com/vdimir)).
* Analyzer: always qualify execution names [#53705](https://github.com/ClickHouse/ClickHouse/pull/53705) ([Dmitry Novik](https://github.com/novikd)).
* Insert quorum: check host node version in addition [#55528](https://github.com/ClickHouse/ClickHouse/pull/55528) ([Igor Nikonov](https://github.com/devcrafter)).
* Remove more old code of projection analysis [#55579](https://github.com/ClickHouse/ClickHouse/pull/55579) ([Anton Popov](https://github.com/CurtizJ)).
* Better exception messages in input formats [#57053](https://github.com/ClickHouse/ClickHouse/pull/57053) ([Kruglov Pavel](https://github.com/Avogar)).
* Parallel replicas custom key: skip unavailable replicas [#57235](https://github.com/ClickHouse/ClickHouse/pull/57235) ([Igor Nikonov](https://github.com/devcrafter)).
* Small change in log message in MergeTreeDataMergerMutator [#57550](https://github.com/ClickHouse/ClickHouse/pull/57550) ([Nikita Taranov](https://github.com/nickitat)).
* fs cache: small optimization [#57615](https://github.com/ClickHouse/ClickHouse/pull/57615) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Customizable dependency failure handling for AsyncLoader [#57697](https://github.com/ClickHouse/ClickHouse/pull/57697) ([Sergei Trifonov](https://github.com/serxa)).
* Bring test back [#57700](https://github.com/ClickHouse/ClickHouse/pull/57700) ([Nikita Taranov](https://github.com/nickitat)).
* Change default database name in clickhouse-local to 'default' [#57774](https://github.com/ClickHouse/ClickHouse/pull/57774) ([Kruglov Pavel](https://github.com/Avogar)).
* Add option `--show-whitespaces-in-diff` to clickhouse-test [#57870](https://github.com/ClickHouse/ClickHouse/pull/57870) ([vdimir](https://github.com/vdimir)).
* Update `query_masking_rules` when reloading the config, attempt 2 [#57993](https://github.com/ClickHouse/ClickHouse/pull/57993) ([Mikhail Koviazin](https://github.com/mkmkme)).
* Remove unneeded parameter `use_external_buffer` from `AsynchronousReadBuffer*` [#58077](https://github.com/ClickHouse/ClickHouse/pull/58077) ([Nikita Taranov](https://github.com/nickitat)).
* Print another message in Bugfix check if internal check had been failed [#58091](https://github.com/ClickHouse/ClickHouse/pull/58091) ([vdimir](https://github.com/vdimir)).
* Refactor StorageMerge virtual columns filtering. [#58255](https://github.com/ClickHouse/ClickHouse/pull/58255) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Analyzer: fix tuple comparison when result is always null [#58266](https://github.com/ClickHouse/ClickHouse/pull/58266) ([vdimir](https://github.com/vdimir)).
* Fix an error in the release script - it didn't allow to make 23.12. [#58288](https://github.com/ClickHouse/ClickHouse/pull/58288) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update version_date.tsv and changelogs after v23.12.1.1368-stable [#58290](https://github.com/ClickHouse/ClickHouse/pull/58290) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Fix test_storage_s3_queue/test.py::test_drop_table [#58293](https://github.com/ClickHouse/ClickHouse/pull/58293) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix timeout in 01732_race_condition_storage_join_long [#58298](https://github.com/ClickHouse/ClickHouse/pull/58298) ([vdimir](https://github.com/vdimir)).
* Handle another case for preprocessing in Keeper [#58308](https://github.com/ClickHouse/ClickHouse/pull/58308) ([Antonio Andelic](https://github.com/antonio2368)).
* Disable max_bytes_before_external* in 00172_hits_joins [#58309](https://github.com/ClickHouse/ClickHouse/pull/58309) ([vdimir](https://github.com/vdimir)).
* Analyzer: support functional arguments in USING clause [#58317](https://github.com/ClickHouse/ClickHouse/pull/58317) ([Dmitry Novik](https://github.com/novikd)).
* Fixed logical error in CheckSortedTransform [#58318](https://github.com/ClickHouse/ClickHouse/pull/58318) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Refreshable materialized views again [#58320](https://github.com/ClickHouse/ClickHouse/pull/58320) ([Michael Kolupaev](https://github.com/al13n321)).
* Organize symbols from src/* into DB namespace [#58336](https://github.com/ClickHouse/ClickHouse/pull/58336) ([Amos Bird](https://github.com/amosbird)).
* Add a style check against DOS and Windows [#58345](https://github.com/ClickHouse/ClickHouse/pull/58345) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Check what happen if remove array joined columns from KeyCondition [#58346](https://github.com/ClickHouse/ClickHouse/pull/58346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Upload time of the perf tests into artifacts as test_duration_ms [#58348](https://github.com/ClickHouse/ClickHouse/pull/58348) ([Azat Khuzhin](https://github.com/azat)).
* Keep exception format string in retries ctl [#58351](https://github.com/ClickHouse/ClickHouse/pull/58351) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix replication.lib helper (system.mutations has database not current_database) [#58352](https://github.com/ClickHouse/ClickHouse/pull/58352) ([Azat Khuzhin](https://github.com/azat)).
* Refactor StorageHDFS and StorageFile virtual columns filtering [#58353](https://github.com/ClickHouse/ClickHouse/pull/58353) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix suspended workers for AsyncLoader [#58362](https://github.com/ClickHouse/ClickHouse/pull/58362) ([Sergei Trifonov](https://github.com/serxa)).
* Remove stale events from README [#58364](https://github.com/ClickHouse/ClickHouse/pull/58364) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Do not fail the CI on an expired token [#58384](https://github.com/ClickHouse/ClickHouse/pull/58384) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add a test for [#38534](https://github.com/ClickHouse/ClickHouse/issues/38534) [#58391](https://github.com/ClickHouse/ClickHouse/pull/58391) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* fix database engine validation inside database factory [#58395](https://github.com/ClickHouse/ClickHouse/pull/58395) ([Bharat Nallan](https://github.com/bharatnc)).
* Fix bad formatting of the `timeDiff` compatibility alias [#58398](https://github.com/ClickHouse/ClickHouse/pull/58398) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix a comment; remove unused method; stop using pointers [#58399](https://github.com/ClickHouse/ClickHouse/pull/58399) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix test_user_valid_until [#58409](https://github.com/ClickHouse/ClickHouse/pull/58409) ([Nikolay Degterinsky](https://github.com/evillique)).
* Make a test not depend on the lack of floating point associativity [#58439](https://github.com/ClickHouse/ClickHouse/pull/58439) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `02944_dynamically_change_filesystem_cache_size` [#58445](https://github.com/ClickHouse/ClickHouse/pull/58445) ([Nikolay Degterinsky](https://github.com/evillique)).
* Analyzer: Fix LOGICAL_ERROR with LowCardinality [#58457](https://github.com/ClickHouse/ClickHouse/pull/58457) ([Dmitry Novik](https://github.com/novikd)).
* Replace `std::regex` by re2 [#58458](https://github.com/ClickHouse/ClickHouse/pull/58458) ([Robert Schulze](https://github.com/rschu1ze)).
* Improve perf tests [#58478](https://github.com/ClickHouse/ClickHouse/pull/58478) ([Raúl Marín](https://github.com/Algunenano)).
* Check if I can remove KeyCondition analysis on AST. [#58480](https://github.com/ClickHouse/ClickHouse/pull/58480) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix some thread pool settings not updating at runtime [#58485](https://github.com/ClickHouse/ClickHouse/pull/58485) ([Michael Kolupaev](https://github.com/al13n321)).
* Lower log levels for some Raft logs to new test level [#58487](https://github.com/ClickHouse/ClickHouse/pull/58487) ([Antonio Andelic](https://github.com/antonio2368)).
* PartsSplitter small refactoring [#58506](https://github.com/ClickHouse/ClickHouse/pull/58506) ([Maksim Kita](https://github.com/kitaisreal)).
* Sync content of the docker test images [#58507](https://github.com/ClickHouse/ClickHouse/pull/58507) ([Max K.](https://github.com/maxknv)).
* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)).
* Minor fixups for `sqid()` [#58517](https://github.com/ClickHouse/ClickHouse/pull/58517) ([Robert Schulze](https://github.com/rschu1ze)).
* Update version_date.tsv and changelogs after v23.12.2.59-stable [#58545](https://github.com/ClickHouse/ClickHouse/pull/58545) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.11.4.24-stable [#58546](https://github.com/ClickHouse/ClickHouse/pull/58546) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.8.9.54-lts [#58547](https://github.com/ClickHouse/ClickHouse/pull/58547) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.10.6.60-stable [#58548](https://github.com/ClickHouse/ClickHouse/pull/58548) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.3.19.32-lts [#58549](https://github.com/ClickHouse/ClickHouse/pull/58549) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update CHANGELOG.md [#58559](https://github.com/ClickHouse/ClickHouse/pull/58559) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Fix test 02932_kill_query_sleep [#58560](https://github.com/ClickHouse/ClickHouse/pull/58560) ([Vitaly Baranov](https://github.com/vitlibar)).
* CI fix. Add packager script to build digest [#58571](https://github.com/ClickHouse/ClickHouse/pull/58571) ([Max K.](https://github.com/maxknv)).
* fix and test that S3Clients are reused [#58573](https://github.com/ClickHouse/ClickHouse/pull/58573) ([Sema Checherinda](https://github.com/CheSema)).
* Follow-up to [#58482](https://github.com/ClickHouse/ClickHouse/issues/58482) [#58574](https://github.com/ClickHouse/ClickHouse/pull/58574) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Do not load database engines in suggest [#58586](https://github.com/ClickHouse/ClickHouse/pull/58586) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix wrong message in Keeper [#58588](https://github.com/ClickHouse/ClickHouse/pull/58588) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add some missing LLVM includes [#58594](https://github.com/ClickHouse/ClickHouse/pull/58594) ([Raúl Marín](https://github.com/Algunenano)).
* Small fix in Keeper [#58598](https://github.com/ClickHouse/ClickHouse/pull/58598) ([Antonio Andelic](https://github.com/antonio2368)).
* Update analyzer_tech_debt.txt [#58599](https://github.com/ClickHouse/ClickHouse/pull/58599) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Simplify release.py script [#58600](https://github.com/ClickHouse/ClickHouse/pull/58600) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update analyzer_tech_debt.txt [#58602](https://github.com/ClickHouse/ClickHouse/pull/58602) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Refactor stacktrace symbolizer to avoid copy-paste [#58610](https://github.com/ClickHouse/ClickHouse/pull/58610) ([Azat Khuzhin](https://github.com/azat)).
* Add intel AMX checking [#58617](https://github.com/ClickHouse/ClickHouse/pull/58617) ([Roman Glinskikh](https://github.com/omgronny)).
* Optional `client` argument for `S3Helper` [#58619](https://github.com/ClickHouse/ClickHouse/pull/58619) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add sorting to 02366_kql_summarize.sql [#58621](https://github.com/ClickHouse/ClickHouse/pull/58621) ([Raúl Marín](https://github.com/Algunenano)).
* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Remove more projections code [#58628](https://github.com/ClickHouse/ClickHouse/pull/58628) ([Anton Popov](https://github.com/CurtizJ)).
* Remove finalize() from ~WriteBufferFromEncryptedFile [#58629](https://github.com/ClickHouse/ClickHouse/pull/58629) ([Vitaly Baranov](https://github.com/vitlibar)).
* Update test_replicated_database/test.py [#58647](https://github.com/ClickHouse/ClickHouse/pull/58647) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Try disabling `muzzy_decay_ms` in jemalloc [#58648](https://github.com/ClickHouse/ClickHouse/pull/58648) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix test_replicated_database::test_startup_without_zk flakiness [#58649](https://github.com/ClickHouse/ClickHouse/pull/58649) ([Azat Khuzhin](https://github.com/azat)).
* Fix 01600_remerge_sort_lowered_memory_bytes_ratio flakiness (due to settings randomization) [#58650](https://github.com/ClickHouse/ClickHouse/pull/58650) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer: Fix assertion in HashJoin with duplicate columns [#58652](https://github.com/ClickHouse/ClickHouse/pull/58652) ([vdimir](https://github.com/vdimir)).
* Document that `match()` can use `ngrambf_v1` and `tokenbf_v1` indexes [#58655](https://github.com/ClickHouse/ClickHouse/pull/58655) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix perf tests duration (checks.test_duration_ms) [#58656](https://github.com/ClickHouse/ClickHouse/pull/58656) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer: Correctly handle constant set in index [#58657](https://github.com/ClickHouse/ClickHouse/pull/58657) ([Dmitry Novik](https://github.com/novikd)).
* fix a typo in stress randomization setting [#58658](https://github.com/ClickHouse/ClickHouse/pull/58658) ([Sema Checherinda](https://github.com/CheSema)).
* Small follow-up to `std::regex` --> `re2` conversion ([#58458](https://github.com/ClickHouse/ClickHouse/issues/58458)) [#58678](https://github.com/ClickHouse/ClickHouse/pull/58678) ([Robert Schulze](https://github.com/rschu1ze)).
* Remove `<regex>` from libcxx [#58681](https://github.com/ClickHouse/ClickHouse/pull/58681) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix bad log message [#58698](https://github.com/ClickHouse/ClickHouse/pull/58698) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Some small improvements to version_helper from [#57203](https://github.com/ClickHouse/ClickHouse/issues/57203) [#58712](https://github.com/ClickHouse/ClickHouse/pull/58712) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Small fixes in different helpers [#58717](https://github.com/ClickHouse/ClickHouse/pull/58717) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix bug in new (not released yet) parallel replicas coordinator [#58722](https://github.com/ClickHouse/ClickHouse/pull/58722) ([Nikita Taranov](https://github.com/nickitat)).
* Analyzer: Fix LOGICAL_ERROR in CountDistinctPass [#58723](https://github.com/ClickHouse/ClickHouse/pull/58723) ([Dmitry Novik](https://github.com/novikd)).
* Fix reading of offsets subcolumn (`size0`) from `Nested` [#58729](https://github.com/ClickHouse/ClickHouse/pull/58729) ([Anton Popov](https://github.com/CurtizJ)).
* Fix Mac OS X [#58733](https://github.com/ClickHouse/ClickHouse/pull/58733) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* fix stress with generate-template-key [#58740](https://github.com/ClickHouse/ClickHouse/pull/58740) ([Sema Checherinda](https://github.com/CheSema)).
* more relaxed check [#58751](https://github.com/ClickHouse/ClickHouse/pull/58751) ([Sema Checherinda](https://github.com/CheSema)).
* Fix usage of small buffers for remote reading [#58768](https://github.com/ClickHouse/ClickHouse/pull/58768) ([Nikita Taranov](https://github.com/nickitat)).
* Add missing includes when _LIBCPP_REMOVE_TRANSITIVE_INCLUDES enabled [#58770](https://github.com/ClickHouse/ClickHouse/pull/58770) ([Artem Alperin](https://github.com/hdnpth)).
* Remove some code [#58772](https://github.com/ClickHouse/ClickHouse/pull/58772) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove some code [#58790](https://github.com/ClickHouse/ClickHouse/pull/58790) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix trash in performance tests [#58794](https://github.com/ClickHouse/ClickHouse/pull/58794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix data race in Keeper [#58806](https://github.com/ClickHouse/ClickHouse/pull/58806) ([Antonio Andelic](https://github.com/antonio2368)).
* Increase log level to trace to help debug `00993_system_parts_race_condition_drop_zookeeper` [#58809](https://github.com/ClickHouse/ClickHouse/pull/58809) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* DatabaseCatalog background tasks add log names [#58832](https://github.com/ClickHouse/ClickHouse/pull/58832) ([Maksim Kita](https://github.com/kitaisreal)).
* Analyzer: Resolve GROUPING function on shards [#58833](https://github.com/ClickHouse/ClickHouse/pull/58833) ([Dmitry Novik](https://github.com/novikd)).
* Allow parallel replicas for JOIN with analyzer [part 1]. [#58838](https://github.com/ClickHouse/ClickHouse/pull/58838) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix `isRetry` method [#58839](https://github.com/ClickHouse/ClickHouse/pull/58839) ([alesapin](https://github.com/alesapin)).
* fs cache: fix data race in slru [#58842](https://github.com/ClickHouse/ClickHouse/pull/58842) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix reading from an invisible part in new (not released yet) parallel replicas coordinator [#58844](https://github.com/ClickHouse/ClickHouse/pull/58844) ([Nikita Taranov](https://github.com/nickitat)).
* Fix bad log message [#58849](https://github.com/ClickHouse/ClickHouse/pull/58849) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Set max_bytes_before_external_group_by in 01961_roaring_memory_tracking [#58863](https://github.com/ClickHouse/ClickHouse/pull/58863) ([vdimir](https://github.com/vdimir)).
* Fix `00089_group_by_arrays_of_fixed` with external aggregation [#58873](https://github.com/ClickHouse/ClickHouse/pull/58873) ([Antonio Andelic](https://github.com/antonio2368)).
* DiskWeb minor improvement in loading [#58874](https://github.com/ClickHouse/ClickHouse/pull/58874) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix RPN construction for indexHint [#58875](https://github.com/ClickHouse/ClickHouse/pull/58875) ([Dmitry Novik](https://github.com/novikd)).
* Analyzer: add test with GROUP BY on shards [#58876](https://github.com/ClickHouse/ClickHouse/pull/58876) ([Dmitry Novik](https://github.com/novikd)).
* Jepsen job to reuse builds [#58881](https://github.com/ClickHouse/ClickHouse/pull/58881) ([Max K.](https://github.com/maxknv)).
* Fix ambiguity in the setting description [#58883](https://github.com/ClickHouse/ClickHouse/pull/58883) ([Denny Crane](https://github.com/den-crane)).
* Less error prone interface of read buffers [#58886](https://github.com/ClickHouse/ClickHouse/pull/58886) ([Anton Popov](https://github.com/CurtizJ)).
* Add metric for keeper memory soft limit [#58890](https://github.com/ClickHouse/ClickHouse/pull/58890) ([Pradeep Chhetri](https://github.com/chhetripradeep)).
* Add a test for [#47988](https://github.com/ClickHouse/ClickHouse/issues/47988) [#58893](https://github.com/ClickHouse/ClickHouse/pull/58893) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Whitespaces [#58894](https://github.com/ClickHouse/ClickHouse/pull/58894) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix data race in `AggregatingTransform` [#58896](https://github.com/ClickHouse/ClickHouse/pull/58896) ([Antonio Andelic](https://github.com/antonio2368)).
* Update SLRUFileCachePriority.cpp [#58898](https://github.com/ClickHouse/ClickHouse/pull/58898) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add tests for [#57193](https://github.com/ClickHouse/ClickHouse/issues/57193) [#58899](https://github.com/ClickHouse/ClickHouse/pull/58899) ([Raúl Marín](https://github.com/Algunenano)).
* Add log for already download binary in Jepsen [#58901](https://github.com/ClickHouse/ClickHouse/pull/58901) ([Antonio Andelic](https://github.com/antonio2368)).
* fs cache: minor refactoring [#58902](https://github.com/ClickHouse/ClickHouse/pull/58902) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Checking on flaky test_parallel_replicas_custom_key_failover [#58909](https://github.com/ClickHouse/ClickHouse/pull/58909) ([Igor Nikonov](https://github.com/devcrafter)).
* Style fix [#58913](https://github.com/ClickHouse/ClickHouse/pull/58913) ([Dmitry Novik](https://github.com/novikd)).
* Opentelemetry spans to analyze CPU and S3 bottlenecks on inserts [#58914](https://github.com/ClickHouse/ClickHouse/pull/58914) ([Alexander Gololobov](https://github.com/davenger)).
* Fix fault handler in case of thread (for fault handler) cannot be spawned [#58917](https://github.com/ClickHouse/ClickHouse/pull/58917) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer: Support GROUP BY injective function elimination [#58919](https://github.com/ClickHouse/ClickHouse/pull/58919) ([Dmitry Novik](https://github.com/novikd)).
* Cancel MasterCI in PRs [#58920](https://github.com/ClickHouse/ClickHouse/pull/58920) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix and test for azure [#58697](https://github.com/ClickHouse/ClickHouse/issues/58697) [#58921](https://github.com/ClickHouse/ClickHouse/pull/58921) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Extend performance test norm_dist.xml [#58922](https://github.com/ClickHouse/ClickHouse/pull/58922) ([Robert Schulze](https://github.com/rschu1ze)).
* Add regression test for parallel replicas (follow up [#58722](https://github.com/ClickHouse/ClickHouse/issues/58722), [#58844](https://github.com/ClickHouse/ClickHouse/issues/58844)) [#58923](https://github.com/ClickHouse/ClickHouse/pull/58923) ([Nikita Taranov](https://github.com/nickitat)).
* Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892) [#58927](https://github.com/ClickHouse/ClickHouse/pull/58927) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `FunctionToSubcolumnsPass` in debug build [#58930](https://github.com/ClickHouse/ClickHouse/pull/58930) ([Anton Popov](https://github.com/CurtizJ)).
* Call `getMaxFileDescriptorCount` once in Keeper [#58938](https://github.com/ClickHouse/ClickHouse/pull/58938) ([Antonio Andelic](https://github.com/antonio2368)).
* Add missing files to digests [#58942](https://github.com/ClickHouse/ClickHouse/pull/58942) ([Raúl Marín](https://github.com/Algunenano)).
* Analyzer: fix join column not found with compound identifiers [#58943](https://github.com/ClickHouse/ClickHouse/pull/58943) ([vdimir](https://github.com/vdimir)).
* CI: pr_info to provide event_type for job scripts [#58947](https://github.com/ClickHouse/ClickHouse/pull/58947) ([Max K.](https://github.com/maxknv)).
* Using the destination object for paths generation in S3copy. [#58949](https://github.com/ClickHouse/ClickHouse/pull/58949) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix data race in slru (2) [#58950](https://github.com/ClickHouse/ClickHouse/pull/58950) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix flaky test_postgresql_replica_database_engine_2/test.py::test_dependent_loading [#58951](https://github.com/ClickHouse/ClickHouse/pull/58951) ([Kseniia Sumarokova](https://github.com/kssenii)).
* More safe way to dump system logs in tests [#58955](https://github.com/ClickHouse/ClickHouse/pull/58955) ([alesapin](https://github.com/alesapin)).
* Add a comment about sparse checkout [#58960](https://github.com/ClickHouse/ClickHouse/pull/58960) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Follow up to [#58357](https://github.com/ClickHouse/ClickHouse/issues/58357) [#58963](https://github.com/ClickHouse/ClickHouse/pull/58963) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Better error message about tuples [#58971](https://github.com/ClickHouse/ClickHouse/pull/58971) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix timeout for prometheus exporter for HTTP/1.1 (due to keep-alive) [#58981](https://github.com/ClickHouse/ClickHouse/pull/58981) ([Azat Khuzhin](https://github.com/azat)).
* Fix 02891_array_shingles with analyzer [#58982](https://github.com/ClickHouse/ClickHouse/pull/58982) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix script name in SQL example in executable.md [#58984](https://github.com/ClickHouse/ClickHouse/pull/58984) ([Lino Uruñuela](https://github.com/Wachynaky)).
* Fix typo [#58986](https://github.com/ClickHouse/ClickHouse/pull/58986) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Revert flaky [#58992](https://github.com/ClickHouse/ClickHouse/pull/58992) ([Raúl Marín](https://github.com/Algunenano)).
* Revive: Parallel replicas custom key: skip unavailable replicas [#58993](https://github.com/ClickHouse/ClickHouse/pull/58993) ([Igor Nikonov](https://github.com/devcrafter)).
* Make performance test `test norm_dist.xml` more realistic [#58995](https://github.com/ClickHouse/ClickHouse/pull/58995) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix 02404_memory_bound_merging with analyzer (follow up [#56419](https://github.com/ClickHouse/ClickHouse/issues/56419)) [#58996](https://github.com/ClickHouse/ClickHouse/pull/58996) ([Nikita Taranov](https://github.com/nickitat)).
* Add test for [#58930](https://github.com/ClickHouse/ClickHouse/issues/58930) [#58999](https://github.com/ClickHouse/ClickHouse/pull/58999) ([Anton Popov](https://github.com/CurtizJ)).
* initialization ConnectionTimeouts [#59000](https://github.com/ClickHouse/ClickHouse/pull/59000) ([Sema Checherinda](https://github.com/CheSema)).
* DiskWeb fix loading [#59006](https://github.com/ClickHouse/ClickHouse/pull/59006) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Update log level for http buffer [#59008](https://github.com/ClickHouse/ClickHouse/pull/59008) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Change log level for super imporant message in Keeper [#59010](https://github.com/ClickHouse/ClickHouse/pull/59010) ([alesapin](https://github.com/alesapin)).
* Fix async loader stress test [#59011](https://github.com/ClickHouse/ClickHouse/pull/59011) ([Sergei Trifonov](https://github.com/serxa)).
* Remove `StaticResourceManager` [#59013](https://github.com/ClickHouse/ClickHouse/pull/59013) ([Sergei Trifonov](https://github.com/serxa)).
* preserve 'amz-sdk-invocation-id' and 'amz-sdk-request' headers with gcp [#59015](https://github.com/ClickHouse/ClickHouse/pull/59015) ([Sema Checherinda](https://github.com/CheSema)).
* Update rename.md [#59017](https://github.com/ClickHouse/ClickHouse/pull/59017) ([filimonov](https://github.com/filimonov)).
* очепятка [#59024](https://github.com/ClickHouse/ClickHouse/pull/59024) ([edpyt](https://github.com/edpyt)).
* Split resource scheduler off `IO/` into `Common/Scheduler/` [#59025](https://github.com/ClickHouse/ClickHouse/pull/59025) ([Sergei Trifonov](https://github.com/serxa)).
* Add a parameter for testing purposes [#59027](https://github.com/ClickHouse/ClickHouse/pull/59027) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix test 02932_kill_query_sleep when running with query cache [#59041](https://github.com/ClickHouse/ClickHouse/pull/59041) ([Vitaly Baranov](https://github.com/vitlibar)).
* CI: Jepsen: fix sanity check in ci.py [#59043](https://github.com/ClickHouse/ClickHouse/pull/59043) ([Max K.](https://github.com/maxknv)).
* CI: add ci_config classes for job and build names [#59046](https://github.com/ClickHouse/ClickHouse/pull/59046) ([Max K.](https://github.com/maxknv)).
* remove flaky test [#59066](https://github.com/ClickHouse/ClickHouse/pull/59066) ([Sema Checherinda](https://github.com/CheSema)).
* Followup to 57853 [#59068](https://github.com/ClickHouse/ClickHouse/pull/59068) ([Dmitry Novik](https://github.com/novikd)).
* Follow-up to [#59027](https://github.com/ClickHouse/ClickHouse/issues/59027) [#59075](https://github.com/ClickHouse/ClickHouse/pull/59075) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `test_parallel_replicas_invisible_parts` [#59077](https://github.com/ClickHouse/ClickHouse/pull/59077) ([Nikita Taranov](https://github.com/nickitat)).
* Increase max_bytes_before_external_group_by for 00165_jit_aggregate_functions [#59078](https://github.com/ClickHouse/ClickHouse/pull/59078) ([Raúl Marín](https://github.com/Algunenano)).
* Fix stateless/run.sh [#59079](https://github.com/ClickHouse/ClickHouse/pull/59079) ([Kseniia Sumarokova](https://github.com/kssenii)).
* CI: hot fix for reuse [#59081](https://github.com/ClickHouse/ClickHouse/pull/59081) ([Max K.](https://github.com/maxknv)).
* Fix server shutdown due to exception while loading metadata [#59083](https://github.com/ClickHouse/ClickHouse/pull/59083) ([Sergei Trifonov](https://github.com/serxa)).
* Coordinator returns ranges for reading in sorted order [#59089](https://github.com/ClickHouse/ClickHouse/pull/59089) ([Nikita Taranov](https://github.com/nickitat)).
* Raise timeout in 02294_decimal_second_errors [#59090](https://github.com/ClickHouse/ClickHouse/pull/59090) ([Raúl Marín](https://github.com/Algunenano)).
* Add `[[nodiscard]]` to a couple of methods [#59093](https://github.com/ClickHouse/ClickHouse/pull/59093) ([Nikita Taranov](https://github.com/nickitat)).
* Docs: Update integer and float aliases [#59100](https://github.com/ClickHouse/ClickHouse/pull/59100) ([Robert Schulze](https://github.com/rschu1ze)).
* Avoid election timeouts during startup in Keeper [#59102](https://github.com/ClickHouse/ClickHouse/pull/59102) ([Antonio Andelic](https://github.com/antonio2368)).
* Add missing setting max_estimated_execution_time in SettingsChangesHistory [#59104](https://github.com/ClickHouse/ClickHouse/pull/59104) ([Kruglov Pavel](https://github.com/Avogar)).
* Rename some inverted index test files [#59106](https://github.com/ClickHouse/ClickHouse/pull/59106) ([Robert Schulze](https://github.com/rschu1ze)).
* Further reduce runtime of `norm_distance.xml` [#59108](https://github.com/ClickHouse/ClickHouse/pull/59108) ([Robert Schulze](https://github.com/rschu1ze)).
* Minor follow-up to [#53710](https://github.com/ClickHouse/ClickHouse/issues/53710) [#59109](https://github.com/ClickHouse/ClickHouse/pull/59109) ([Robert Schulze](https://github.com/rschu1ze)).
* Update stateless/run.sh [#59116](https://github.com/ClickHouse/ClickHouse/pull/59116) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Followup 57875 [#59117](https://github.com/ClickHouse/ClickHouse/pull/59117) ([Dmitry Novik](https://github.com/novikd)).
* Fixing build [#59130](https://github.com/ClickHouse/ClickHouse/pull/59130) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Capability check for `s3_plain` [#59145](https://github.com/ClickHouse/ClickHouse/pull/59145) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix `02015_async_inserts_stress_long` [#59146](https://github.com/ClickHouse/ClickHouse/pull/59146) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix AggregateFunctionNothing result type issues introducing it with different names [#59147](https://github.com/ClickHouse/ClickHouse/pull/59147) ([vdimir](https://github.com/vdimir)).
* Fix url encoding issue [#59162](https://github.com/ClickHouse/ClickHouse/pull/59162) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Upgrade simdjson to v3.6.3 [#59166](https://github.com/ClickHouse/ClickHouse/pull/59166) ([Robert Schulze](https://github.com/rschu1ze)).
* Decrease log level for one log message [#59168](https://github.com/ClickHouse/ClickHouse/pull/59168) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix broken cache for non-existing temp_path [#59172](https://github.com/ClickHouse/ClickHouse/pull/59172) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Move some headers [#59175](https://github.com/ClickHouse/ClickHouse/pull/59175) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Analyzer: Fix CTE name clash resolution [#59177](https://github.com/ClickHouse/ClickHouse/pull/59177) ([Dmitry Novik](https://github.com/novikd)).
* Fix another place with special symbols in the URL [#59184](https://github.com/ClickHouse/ClickHouse/pull/59184) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Actions dag build filter actions refactoring [#59228](https://github.com/ClickHouse/ClickHouse/pull/59228) ([Maksim Kita](https://github.com/kitaisreal)).
* Minor cleanup of msan usage [#59229](https://github.com/ClickHouse/ClickHouse/pull/59229) ([Robert Schulze](https://github.com/rschu1ze)).
* Load server configs in clickhouse local [#59231](https://github.com/ClickHouse/ClickHouse/pull/59231) ([pufit](https://github.com/pufit)).
* Make libssh build dependent on `-DENABLE_LIBRARIES` [#59242](https://github.com/ClickHouse/ClickHouse/pull/59242) ([Robert Schulze](https://github.com/rschu1ze)).
* Disable copy constructor for MultiVersion [#59244](https://github.com/ClickHouse/ClickHouse/pull/59244) ([Vitaly Baranov](https://github.com/vitlibar)).
* CI: fix ci configuration for nightly job [#59252](https://github.com/ClickHouse/ClickHouse/pull/59252) ([Max K.](https://github.com/maxknv)).
* Fix 02475_bson_each_row_format flakiness (due to small parsing block) [#59253](https://github.com/ClickHouse/ClickHouse/pull/59253) ([Azat Khuzhin](https://github.com/azat)).
* Improve pytest --pdb experience by preserving dockerd on SIGINT (v2) [#59255](https://github.com/ClickHouse/ClickHouse/pull/59255) ([Azat Khuzhin](https://github.com/azat)).
* Fix fasttest by pinning pip dependencies [#59256](https://github.com/ClickHouse/ClickHouse/pull/59256) ([Azat Khuzhin](https://github.com/azat)).
* Added AtomicLogger [#59273](https://github.com/ClickHouse/ClickHouse/pull/59273) ([Maksim Kita](https://github.com/kitaisreal)).
* Update test_reload_after_fail_in_cache_dictionary for analyzer [#59274](https://github.com/ClickHouse/ClickHouse/pull/59274) ([vdimir](https://github.com/vdimir)).
* Update run.sh [#59280](https://github.com/ClickHouse/ClickHouse/pull/59280) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add missing setting optimize_injective_functions_in_group_by to SettingsChangesHistory [#59283](https://github.com/ClickHouse/ClickHouse/pull/59283) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix perf tests (after sumMap starts to filter out -0.) [#59287](https://github.com/ClickHouse/ClickHouse/pull/59287) ([Azat Khuzhin](https://github.com/azat)).
* Use fresh ZooKeeper client on DROP (to have higher chances on success) [#59288](https://github.com/ClickHouse/ClickHouse/pull/59288) ([Azat Khuzhin](https://github.com/azat)).
* Additional check [#59292](https://github.com/ClickHouse/ClickHouse/pull/59292) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* No debug symbols in Rust [#59306](https://github.com/ClickHouse/ClickHouse/pull/59306) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix deadlock in `AsyncLoader::stop()` [#59308](https://github.com/ClickHouse/ClickHouse/pull/59308) ([Sergei Trifonov](https://github.com/serxa)).
* Speed up `00165_jit_aggregate_functions` [#59312](https://github.com/ClickHouse/ClickHouse/pull/59312) ([Nikita Taranov](https://github.com/nickitat)).
* CI: WA for issue with perf test with artifact reuse [#59325](https://github.com/ClickHouse/ClickHouse/pull/59325) ([Max K.](https://github.com/maxknv)).
* Fix typo [#59329](https://github.com/ClickHouse/ClickHouse/pull/59329) ([Raúl Marín](https://github.com/Algunenano)).
* Simplify query_run_metric_arrays in perf tests [#59333](https://github.com/ClickHouse/ClickHouse/pull/59333) ([Raúl Marín](https://github.com/Algunenano)).
* IVolume constructor improve exception message [#59335](https://github.com/ClickHouse/ClickHouse/pull/59335) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix upgrade check for new setting [#59343](https://github.com/ClickHouse/ClickHouse/pull/59343) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix sccache when building without coverage [#59345](https://github.com/ClickHouse/ClickHouse/pull/59345) ([Raúl Marín](https://github.com/Algunenano)).
* Loggers initialization fix [#59347](https://github.com/ClickHouse/ClickHouse/pull/59347) ([Maksim Kita](https://github.com/kitaisreal)).
* Add setting update_insert_deduplication_token_in_dependent_materialized_views to settings changes history [#59349](https://github.com/ClickHouse/ClickHouse/pull/59349) ([Maksim Kita](https://github.com/kitaisreal)).
* Slightly better memory usage in `AsynchronousBoundedReadBuffer` [#59354](https://github.com/ClickHouse/ClickHouse/pull/59354) ([Anton Popov](https://github.com/CurtizJ)).
* Try to make variant tests a bit faster [#59355](https://github.com/ClickHouse/ClickHouse/pull/59355) ([Kruglov Pavel](https://github.com/Avogar)).
* Minor typos in Settings.h [#59371](https://github.com/ClickHouse/ClickHouse/pull/59371) ([Jordi Villar](https://github.com/jrdi)).
* Rename `quantileDDSketch` to `quantileDD` [#59372](https://github.com/ClickHouse/ClickHouse/pull/59372) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -109,6 +109,9 @@ Do not check for a particular wording of error message, it may change in the fut
If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is run in CI in correct configurations, where the server is configured to support distributed queries. If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is run in CI in correct configurations, where the server is configured to support distributed queries.
### Working with Temporary Files
Sometimes in a shell test you may need to create a file on the fly to work with. Keep in mind that some CI checks run tests in parallel, so if you are creating or removing a temporary file in your script without a unique name this can cause some of the CI checks, such as Flaky, to fail. To get around this you should use environment variable `$CLICKHOUSE_TEST_UNIQUE_NAME` to give temporary files a name unique to the test that is running. That way you can be sure that the file you are creating during setup or removing during cleanup is the file only in use by that test and not some other test which is running in parallel.
## Known Bugs {#known-bugs} ## Known Bugs {#known-bugs}

View File

@ -2040,6 +2040,32 @@ SELECT * FROM test_table
└───┘ └───┘
``` ```
## update_insert_deduplication_token_in_dependent_materialized_views {#update-insert-deduplication-token-in-dependent-materialized-views}
Allows to update `insert_deduplication_token` with table identifier during insert in dependent materialized views, if setting `deduplicate_blocks_in_dependent_materialized_views` is enabled and `insert_deduplication_token` is set.
Possible values:
0 — Disabled.
1 — Enabled.
Default value: 0.
Usage:
If setting `deduplicate_blocks_in_dependent_materialized_views` is enabled, `insert_deduplication_token` is passed to dependent materialized views. But in complex INSERT flows it is possible that we want to avoid deduplication for dependent materialized views.
Example:
```
landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1
| |
└--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘
```
In this example we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will be inserted into `ds_2_1`. Without `update_insert_deduplication_token_in_dependent_materialized_views` setting enabled, those two different blocks will be deduplicated, because different blocks from `mv_2_1` and `mv_2_2` will have the same `insert_deduplication_token`.
If setting `update_insert_deduplication_token_in_dependent_materialized_views` is enabled, during each insert into dependent materialized views `insert_deduplication_token` is updated with table identifier, so block from `mv_2_1` and block from `mv_2_2` will have different `insert_deduplication_token` and will not be deduplicated.
## insert_keeper_max_retries ## insert_keeper_max_retries
The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeeper) requests during insert into replicated MergeTree. Only Keeper requests which failed due to network error, Keeper session timeout, or request timeout are considered for retries. The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeeper) requests during insert into replicated MergeTree. Only Keeper requests which failed due to network error, Keeper session timeout, or request timeout are considered for retries.
@ -5165,7 +5191,7 @@ SETTINGS(dictionary_use_async_executor=1, max_threads=8);
## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key} ## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key}
When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files. When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files.
When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section.
Default value: `false`. Default value: `false`.
@ -5176,6 +5202,95 @@ When set to `false` than all attempts are made with identical timeouts.
Default value: `true`. Default value: `true`.
## allow_experimental_variant_type {#allow_experimental_variant_type}
Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md).
Default value: `false`.
## use_variant_as_common_type {#use_variant_as_common_type}
Allows to use `Variant` type as a result type for [if](../../sql-reference/functions/conditional-functions.md/#if)/[multiIf](../../sql-reference/functions/conditional-functions.md/#multiif)/[array](../../sql-reference/functions/array-functions.md)/[map](../../sql-reference/functions/tuple-map-functions.md) functions when there is no common type for argument types.
Example:
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(if(number % 2, number, range(number))) as variant_type FROM numbers(1);
SELECT if(number % 2, number, range(number)) as variant FROM numbers(5);
```
```text
┌─variant_type───────────────────┐
│ Variant(Array(UInt64), UInt64) │
└────────────────────────────────┘
┌─variant───┐
│ [] │
│ 1 │
│ [0,1] │
│ 3 │
│ [0,1,2,3] │
└───────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL)) AS variant_type FROM numbers(1);
SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4);
```
```text
─variant_type─────────────────────────┐
│ Variant(Array(UInt8), String, UInt8) │
└──────────────────────────────────────┘
┌─variant───────┐
│ 42 │
│ [1,2,3] │
│ Hello, World! │
│ ᴺᵁᴸᴸ │
└───────────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(array(range(number), number, 'str_' || toString(number))) as array_of_variants_type from numbers(1);
SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3);
```
```text
┌─array_of_variants_type────────────────────────┐
│ Array(Variant(Array(UInt64), String, UInt64)) │
└───────────────────────────────────────────────┘
┌─array_of_variants─┐
│ [[],0,'str_0'] │
│ [[0],1,'str_1'] │
│ [[0,1],2,'str_2'] │
└───────────────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(map('a', range(number), 'b', number, 'c', 'str_' || toString(number))) as map_of_variants_type from numbers(1);
SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3);
```
```text
┌─map_of_variants_type────────────────────────────────┐
│ Map(String, Variant(Array(UInt64), String, UInt64)) │
└─────────────────────────────────────────────────────┘
┌─map_of_variants───────────────┐
│ {'a':[],'b':0,'c':'str_0'} │
│ {'a':[0],'b':1,'c':'str_1'} │
│ {'a':[0,1],'b':2,'c':'str_2'} │
└───────────────────────────────┘
```
Default value: `false`.
## max_partition_size_to_drop ## max_partition_size_to_drop
Restriction on dropping partitions in query time. Restriction on dropping partitions in query time.

View File

@ -287,7 +287,7 @@ Number of threads in the HashedDictionary thread pool running a task.
### IOPrefetchThreads ### IOPrefetchThreads
Number of threads in the IO prefertch thread pool. Number of threads in the IO prefetch thread pool.
### IOPrefetchThreadsActive ### IOPrefetchThreadsActive

View File

@ -25,6 +25,8 @@ Columns:
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries. - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions.
- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — The total count of sequential authentication failures. If the user entered the correct password before exceed `failed_sequential_authentications` threshold then the counter will be reset.
- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Maximum count of sequential authentication failures.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time). - `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time).
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time. - `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time.

View File

@ -28,8 +28,10 @@ Columns:
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries. - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of bytes read from all tables and table functions participated in queries.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum of bytes read from all tables and table functions.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time). - `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — The total count of sequential authentication failures. If the user entered the correct password before exceed `failed_sequential_authentications` threshold then the counter will be reset.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of query execution time. - `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum count of sequential authentication failures.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — The total query execution time, in seconds (wall time).
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Maximum of query execution time.
## See Also {#see-also} ## See Also {#see-also}

View File

@ -0,0 +1,48 @@
---
toc_priority: 112
---
# groupArraySorted {#groupArraySorted}
Returns an array with the first N items in ascending order.
``` sql
groupArraySorted(N)(column)
```
**Arguments**
- `N` The number of elements to return.
If the parameter is omitted, default value is the size of input.
- `column` The value (Integer, String, Float and other Generic types).
**Example**
Gets the first 10 numbers:
``` sql
SELECT groupArraySorted(10)(number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number)─┐
│ [0,1,2,3,4,5,6,7,8,9] │
└──────────────────────────────┘
```
Gets all the String implementations of all numbers in column:
``` sql
SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5));
```
``` text
┌─groupArraySorted(str)────────┐
│ ['0','1','2','3','4'] │
└──────────────────────────────┘
```

View File

@ -54,6 +54,7 @@ ClickHouse-specific aggregate functions:
- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md) - [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md) - [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
- [groupArraySample](./grouparraysample.md) - [groupArraySample](./grouparraysample.md)
- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md)
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md) - [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md) - [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md) - [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)
@ -88,7 +89,7 @@ ClickHouse-specific aggregate functions:
- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) - [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16) - [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted) - [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
- [quantileDDSketch](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch) - [quantileDD](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch)
- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md) - [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md)
- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md) - [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) - [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
@ -105,4 +106,3 @@ ClickHouse-specific aggregate functions:
- [sparkBar](./sparkbar.md) - [sparkBar](./sparkbar.md)
- [sumCount](./sumcount.md) - [sumCount](./sumcount.md)
- [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md) - [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md)

View File

@ -18,7 +18,7 @@ Functions:
- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest). - `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest).
- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted). - `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted).
- `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16). - `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16).
- `medianDDSketch` — Alias for [quantileDDSketch](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch). - `medianDD` — Alias for [quantileDD](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch).
**Example** **Example**

View File

@ -1,10 +1,10 @@
--- ---
slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch
sidebar_position: 211 sidebar_position: 211
title: quantileDDSketch title: quantileDD
--- ---
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DDSketch](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf). Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DD](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf).
**Syntax** **Syntax**
@ -44,13 +44,13 @@ Input table has an integer and a float columns:
Query to calculate 0.75-quantile (third quartile): Query to calculate 0.75-quantile (third quartile):
``` sql ``` sql
SELECT quantileDDSketch(0.01, 0.75)(a), quantileDDSketch(0.01, 0.75)(b) FROM example_table; SELECT quantileDD(0.01, 0.75)(a), quantileDD(0.01, 0.75)(b) FROM example_table;
``` ```
Result: Result:
``` text ``` text
┌─quantileDDSketch(0.01, 0.75)(a)─┬─quantileDDSketch(0.01, 0.75)(b)─┐ ┌─quantileDD(0.01, 0.75)(a)─┬─quantileDD(0.01, 0.75)(b)─┐
│ 2.974233423476717 │ 1.01 │ │ 2.974233423476717 │ 1.01 │
└─────────────────────────────────┴─────────────────────────────────┘ └─────────────────────────────────┴─────────────────────────────────┘
``` ```

View File

@ -9,7 +9,7 @@ sidebar_position: 201
Syntax: `quantiles(level1, level2, …)(x)` Syntax: `quantiles(level1, level2, …)(x)`
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDDSketch`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
## quantilesExactExclusive ## quantilesExactExclusive

View File

@ -7,7 +7,7 @@ sidebar_label: JSON
# JSON # JSON
:::note :::note
This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead.
::: :::
Stores JavaScript Object Notation (JSON) documents in a single column. Stores JavaScript Object Notation (JSON) documents in a single column.
@ -15,7 +15,8 @@ Stores JavaScript Object Notation (JSON) documents in a single column.
`JSON` is an alias for `Object('json')`. `JSON` is an alias for `Object('json')`.
:::note :::note
The JSON data type is an experimental feature. To use it, set `allow_experimental_object_type = 1`. The JSON data type is an obsolete feature. Do not use it.
If you want to use it, set `allow_experimental_object_type = 1`.
::: :::
## Example ## Example

View File

@ -0,0 +1,274 @@
---
slug: /en/sql-reference/data-types/json
sidebar_position: 55
sidebar_label: Variant
---
# Variant(T1, T2, T3, ...)
This type represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type
has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value).
The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1).
Nested types can be arbitrary types except Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types.
:::note
The Variant data type is an experimental feature. To use it, set `allow_experimental_variant_type = 1`.
:::
## Creating Variant
Using `Variant` type in table column definition:
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT v FROM test;
```
```text
┌─v─────────────┐
│ ᴺᵁᴸᴸ │
│ 42 │
│ Hello, World! │
│ [1,2,3] │
└───────────────┘
```
Using CAST from ordinary columns:
```sql
SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant;
```
```text
┌─type_name──────────────────────────────┬─variant───────┐
│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │
└────────────────────────────────────────┴───────────────┘
```
Using functions `if/multiIf` when arguments don't have common type (setting `use_variant_as_common_type` should be enabled for it):
```sql
SET use_variant_as_common_type = 1;
SELECT if(number % 2, number, range(number)) as variant FROM numbers(5);
```
```text
┌─variant───┐
│ [] │
│ 1 │
│ [0,1] │
│ 3 │
│ [0,1,2,3] │
└───────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4);
```
```text
┌─variant───────┐
│ 42 │
│ [1,2,3] │
│ Hello, World! │
│ ᴺᵁᴸᴸ │
└───────────────┘
```
Using functions 'array/map' if array elements/map values don't have common type (setting `use_variant_as_common_type` should be enabled for it):
```sql
SET use_variant_as_common_type = 1;
SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3);
```
```text
┌─array_of_variants─┐
│ [[],0,'str_0'] │
│ [[0],1,'str_1'] │
│ [[0,1],2,'str_2'] │
└───────────────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3);
```
```text
┌─map_of_variants───────────────┐
│ {'a':[],'b':0,'c':'str_0'} │
│ {'a':[0],'b':1,'c':'str_1'} │
│ {'a':[0,1],'b':2,'c':'str_2'} │
└───────────────────────────────┘
```
## Reading Variant nested types as subcolumns
Variant type supports reading a single nested type from a Variant column using the type name as a subcolumn.
So, if you have column `variant Variant(T1, T2, T3)` you can read a subcolumn of type `T2` using syntax `variant.T2`,
this subcolumn will have type `Nullable(T2)` if `T2` can be inside `Nullable` and `T2` otherwise. This subcolumn will
be the same size as original `Variant` column and will contain `NULL` values (or empty values if `T2` cannot be inside `Nullable`)
in all rows in which original `Variant` column doesn't have type `T2`.
Variant subcolumns can be also read using function `variantElement(variant_column, type_name)`.
Examples:
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT v, v.String, v.UInt64, v.`Array(UInt64)` FROM test;
```
```text
┌─v─────────────┬─v.String──────┬─v.UInt64─┬─v.Array(UInt64)─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└───────────────┴───────────────┴──────────┴─────────────────┘
```
```sql
SELECT toTypeName(v.String), toTypeName(v.UInt64), toTypeName(v.`Array(UInt64)`) FROM test LIMIT 1;
```
```text
┌─toTypeName(v.String)─┬─toTypeName(v.UInt64)─┬─toTypeName(v.Array(UInt64))─┐
│ Nullable(String) │ Nullable(UInt64) │ Array(UInt64) │
└──────────────────────┴──────────────────────┴─────────────────────────────┘
```
```sql
SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test;
```
```text
┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘
```
To know what variant is stored in each row function `variantType(variant_column)` can be used. It returns `Enum` with variant type name for each row (or `'None'` if row is `NULL`).
Example:
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT variantType(v) from test;
```
```text
┌─variantType(v)─┐
│ None │
│ UInt64 │
│ String │
│ Array(UInt64) │
└────────────────┘
```
```sql
SELECT toTypeName(variantType(v)) FROM test LIMIT 1;
```
```text
┌─toTypeName(variantType(v))──────────────────────────────────────────┐
│ Enum8('None' = -1, 'Array(UInt64)' = 0, 'String' = 1, 'UInt64' = 2) │
└─────────────────────────────────────────────────────────────────────┘
```
## Conversion between Variant column and other columns
There are 3 possible conversions that can be performed with Variant column.
### Converting an ordinary column to a Variant column
It is possible to convert ordinary column with type `T` to a `Variant` column containing this type:
```sql
SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant;
```
```text
┌─type_name──────────────────────────────┬─variant───────┐
│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │
└────────────────────────────────────────┴───────────────┘
```
### Converting a Variant column to an ordinary column
It is possible to convert a `Variant` column to an ordinary column. In this case all nested variants will be converted to a destination type:
```sql
CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('42.42');
SELECT v::Nullable(Float64) FROM test;
```
```text
┌─CAST(v, 'Nullable(Float64)')─┐
│ ᴺᵁᴸᴸ │
│ 42 │
│ 42.42 │
└──────────────────────────────┘
```
### Converting a Variant to another Variant
It is possible to convert a `Variant` column to another `Variant` column, but only if the destination `Variant` column contains all nested types from the original `Variant`:
```sql
CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('String');
SELECT v::Variant(UInt64, String, Array(UInt64)) FROM test;
```
```text
┌─CAST(v, 'Variant(UInt64, String, Array(UInt64))')─┐
│ ᴺᵁᴸᴸ │
│ 42 │
│ String │
└───────────────────────────────────────────────────┘
```
## Reading Variant type from the data
All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Variant` type. During data parsing ClickHouse tries to insert value into most appropriate variant type.
Example:
```sql
SELECT
v,
variantElement(v, 'String') AS str,
variantElement(v, 'UInt64') AS num,
variantElement(v, 'Float64') AS float,
variantElement(v, 'DateTime') AS date,
variantElement(v, 'Array(UInt64)') AS arr
FROM format(JSONEachRow, 'v Variant(String, UInt64, Float64, DateTime, Array(UInt64))', $$
{"v" : "Hello, World!"},
{"v" : 42},
{"v" : 42.42},
{"v" : "2020-01-01 00:00:00"},
{"v" : [1, 2, 3]}
$$)
```
```text
┌─v───────────────────┬─str───────────┬──num─┬─float─┬────────────────date─┬─arr─────┐
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42.42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │
│ 2020-01-01 00:00:00 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-01 00:00:00 │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└─────────────────────┴───────────────┴──────┴───────┴─────────────────────┴─────────┘
```

View File

@ -2832,6 +2832,88 @@ Result:
└─────────────────────────────────────────────────────────────────────────┘ └─────────────────────────────────────────────────────────────────────────┘
``` ```
## variantElement
Extracts a column with specified type from a `Variant` column.
**Syntax**
```sql
variantElement(variant, type_name, [, default_value])
```
**Arguments**
- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md).
- `type_name` — The name of the variant type to extract. [String](../../sql-reference/data-types/string.md).
- `default_value` - The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional.
**Returned value**
- Subcolumn of a `Variant` column with specified type.
**Example**
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test;
```
```text
┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘
```
## variantType
Returns the variant type name for each row of `Variant` column. If row contains NULL, it returns `'None'` for it.
**Syntax**
```sql
variantType(variant)
```
**Arguments**
- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md).
**Returned value**
- Enum8 column with variant type name for each row.
**Example**
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT variantType(v) FROM test;
```
```text
┌─variantType(v)─┐
│ None │
│ UInt64 │
│ String │
│ Array(UInt64) │
└────────────────┘
```
```sql
SELECT toTypeName(variantType(v)) FROM test LIMIT 1;
```
```text
┌─toTypeName(variantType(v))──────────────────────────────────────────┐
│ Enum8('None' = -1, 'Array(UInt64)' = 0, 'String' = 1, 'UInt64' = 2) │
└─────────────────────────────────────────────────────────────────────┘
```
## minSampleSizeConversion ## minSampleSizeConversion
Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples. Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples.

View File

@ -515,7 +515,7 @@ Alias: `concat_ws`
**Arguments** **Arguments**
- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). - sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
- exprN — expression to be concatenated. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). - exprN — expression to be concatenated. Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.
**Returned values** **Returned values**

View File

@ -77,8 +77,8 @@ The number of data points in `series` should be at least twice the value of `per
**Returned value** **Returned value**
- An array of three arrays where the first array include seasonal components, the second array - trend, - An array of four arrays where the first array include seasonal components, the second array - trend,
and the third array - residue component. the third array - residue component, and the fourth array - baseline(seasonal + trend) component.
Type: [Array](../../sql-reference/data-types/array.md). Type: [Array](../../sql-reference/data-types/array.md).
@ -107,6 +107,10 @@ Result:
[ [
0, 0.0000019073486, -0.0000019073486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0000019073486, 0, 0, 0.0000019073486, -0.0000019073486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0000019073486, 0,
0 0
],
[
10.1, 20.449999, 40.340004, 10.100001, 20.45, 40.34, 10.100001, 20.45, 40.34, 10.1, 20.45, 40.34,
10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.100002, 20.45, 40.34
]] │ ]] │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
``` ```

View File

@ -112,7 +112,7 @@ Note that:
For the query to run successfully, the following conditions must be met: For the query to run successfully, the following conditions must be met:
- Both tables must have the same structure. - Both tables must have the same structure.
- Both tables must have the same partition key, the same order by key and the same primary key. - Both tables must have the same order by key and the same primary key.
- Both tables must have the same indices and projections. - Both tables must have the same indices and projections.
- Both tables must have the same storage policy. - Both tables must have the same storage policy.

View File

@ -21,7 +21,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table. Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table.
Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table. Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table.
`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md). `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).

View File

@ -26,8 +26,11 @@ slug: /ru/operations/system-tables/quota_usage
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах. - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах. - `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Общее количество неудачных попыток подряд ввести пароль. Если пользователь ввел верный пароль до преодоления порогового значения `max_failed_sequential_authentications` то счетчик неудачных попыток будет сброшен.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса. - `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Максимальное количество неудачных попыток подряд ввести пароль.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also} ## Смотрите также {#see-also}

View File

@ -29,9 +29,10 @@ slug: /ru/operations/system-tables/quotas_usage
- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах. - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество строк, считываемых из всех таблиц и табличных функций, участвующих в запросах.
- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах. - `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — общее количество байт, считанных из всех таблиц и табличных функций, участвующих в запросах.
- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальное количество байт, считываемых из всех таблиц и табличных функций.
- `failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Общее количество неудачных попыток подряд ввести пароль. Если пользователь ввел верный пароль до преодоления порогового значения `max_failed_sequential_authentications` то счетчик неудачных попыток будет сброшен.
- `max_failed_sequential_authentications` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/float.md))) — Максимальное количество неудачных попыток подряд ввести пароль.
- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах. - `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — общее время выполнения запроса, в секундах.
- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса. - `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — максимальное время выполнения запроса.
## Смотрите также {#see-also} ## Смотрите также {#see-also}
- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) - [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement)

View File

@ -22,7 +22,7 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md). Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md). В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).

View File

@ -20,7 +20,7 @@ CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name]
``` ```
Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md). Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md).
Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). Параметры `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time`, `failed_sequential_authentications` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md).
В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md). В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).

View File

@ -5,6 +5,7 @@
<title>ClickHouse Dashboard</title> <title>ClickHouse Dashboard</title>
<link rel="icon" href=""> <link rel="icon" href="">
<script src="https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.iife.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.iife.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/lz-string@1.5.0/libs/lz-string.min.js"></script>
<style> <style>
:root { :root {
--color: black; --color: black;
@ -484,7 +485,6 @@
* *
* TODO: * TODO:
* - zoom on the graphs should work on touch devices; * - zoom on the graphs should work on touch devices;
* - compress the state for URL's #hash;
* - footer with "about" or a link to source code; * - footer with "about" or a link to source code;
* - allow to configure a table on a server to save the dashboards; * - allow to configure a table on a server to save the dashboards;
* - if a query returned one value, display this value instead of a diagram; * - if a query returned one value, display this value instead of a diagram;
@ -1384,7 +1384,7 @@ document.getElementById('params').onsubmit = function(event) {
function saveState() { function saveState() {
const state = { host, user, queries, params, search_query, customized }; const state = { host, user, queries, params, search_query, customized };
history.pushState(state, '', history.pushState(state, '',
window.location.pathname + (window.location.search || '') + '#' + btoa(JSON.stringify(state))); window.location.pathname + (window.location.search || '') + '#' + LZString.compressToEncodedURIComponent(JSON.stringify(state)));
} }
async function searchQueries() { async function searchQueries() {
@ -1450,7 +1450,7 @@ window.onpopstate = function(event) {
if (window.location.hash) { if (window.location.hash) {
try { try {
let search_query_, customized_; let search_query_, customized_;
({host, user, queries, params, search_query_, customized_} = JSON.parse(atob(window.location.hash.substring(1)))); ({host, user, queries, params, search_query_, customized_} = JSON.parse(LZString.decompressFromEncodedURIComponent(window.location.hash.substring(1))));
// For compatibility with old URLs' hashes // For compatibility with old URLs' hashes
search_query = search_query_ !== undefined ? search_query_ : search_query; search_query = search_query_ !== undefined ? search_query_ : search_query;
customized = customized_ !== undefined ? customized_ : true; customized = customized_ !== undefined ? customized_ : true;

View File

@ -0,0 +1 @@
var LZString=function(){var r=String.fromCharCode,o="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",n="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-$",e={};function t(r,o){if(!e[r]){e[r]={};for(var n=0;n<r.length;n++)e[r][r.charAt(n)]=n}return e[r][o]}var i={compressToBase64:function(r){if(null==r)return"";var n=i._compress(r,6,function(r){return o.charAt(r)});switch(n.length%4){default:case 0:return n;case 1:return n+"===";case 2:return n+"==";case 3:return n+"="}},decompressFromBase64:function(r){return null==r?"":""==r?null:i._decompress(r.length,32,function(n){return t(o,r.charAt(n))})},compressToUTF16:function(o){return null==o?"":i._compress(o,15,function(o){return r(o+32)})+" "},decompressFromUTF16:function(r){return null==r?"":""==r?null:i._decompress(r.length,16384,function(o){return r.charCodeAt(o)-32})},compressToUint8Array:function(r){for(var o=i.compress(r),n=new Uint8Array(2*o.length),e=0,t=o.length;e<t;e++){var s=o.charCodeAt(e);n[2*e]=s>>>8,n[2*e+1]=s%256}return n},decompressFromUint8Array:function(o){if(null==o)return i.decompress(o);for(var n=new Array(o.length/2),e=0,t=n.length;e<t;e++)n[e]=256*o[2*e]+o[2*e+1];var s=[];return n.forEach(function(o){s.push(r(o))}),i.decompress(s.join(""))},compressToEncodedURIComponent:function(r){return null==r?"":i._compress(r,6,function(r){return n.charAt(r)})},decompressFromEncodedURIComponent:function(r){return null==r?"":""==r?null:(r=r.replace(/ /g,"+"),i._decompress(r.length,32,function(o){return t(n,r.charAt(o))}))},compress:function(o){return i._compress(o,16,function(o){return r(o)})},_compress:function(r,o,n){if(null==r)return"";var e,t,i,s={},u={},a="",p="",c="",l=2,f=3,h=2,d=[],m=0,v=0;for(i=0;i<r.length;i+=1)if(a=r.charAt(i),Object.prototype.hasOwnProperty.call(s,a)||(s[a]=f++,u[a]=!0),p=c+a,Object.prototype.hasOwnProperty.call(s,p))c=p;else{if(Object.prototype.hasOwnProperty.call(u,c)){if(c.charCodeAt(0)<256){for(e=0;e<h;e++)m<<=1,v==o-1?(v=0,d.push(n(m)),m=0):v++;for(t=c.charCodeAt(0),e=0;e<8;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1}else{for(t=1,e=0;e<h;e++)m=m<<1|t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t=0;for(t=c.charCodeAt(0),e=0;e<16;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1}0==--l&&(l=Math.pow(2,h),h++),delete u[c]}else for(t=s[c],e=0;e<h;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1;0==--l&&(l=Math.pow(2,h),h++),s[p]=f++,c=String(a)}if(""!==c){if(Object.prototype.hasOwnProperty.call(u,c)){if(c.charCodeAt(0)<256){for(e=0;e<h;e++)m<<=1,v==o-1?(v=0,d.push(n(m)),m=0):v++;for(t=c.charCodeAt(0),e=0;e<8;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1}else{for(t=1,e=0;e<h;e++)m=m<<1|t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t=0;for(t=c.charCodeAt(0),e=0;e<16;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1}0==--l&&(l=Math.pow(2,h),h++),delete u[c]}else for(t=s[c],e=0;e<h;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1;0==--l&&(l=Math.pow(2,h),h++)}for(t=2,e=0;e<h;e++)m=m<<1|1&t,v==o-1?(v=0,d.push(n(m)),m=0):v++,t>>=1;for(;;){if(m<<=1,v==o-1){d.push(n(m));break}v++}return d.join("")},decompress:function(r){return null==r?"":""==r?null:i._decompress(r.length,32768,function(o){return r.charCodeAt(o)})},_decompress:function(o,n,e){var t,i,s,u,a,p,c,l=[],f=4,h=4,d=3,m="",v=[],g={val:e(0),position:n,index:1};for(t=0;t<3;t+=1)l[t]=t;for(s=0,a=Math.pow(2,2),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;switch(s){case 0:for(s=0,a=Math.pow(2,8),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;c=r(s);break;case 1:for(s=0,a=Math.pow(2,16),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;c=r(s);break;case 2:return""}for(l[3]=c,i=c,v.push(c);;){if(g.index>o)return"";for(s=0,a=Math.pow(2,d),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;switch(c=s){case 0:for(s=0,a=Math.pow(2,8),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;l[h++]=r(s),c=h-1,f--;break;case 1:for(s=0,a=Math.pow(2,16),p=1;p!=a;)u=g.val&g.position,g.position>>=1,0==g.position&&(g.position=n,g.val=e(g.index++)),s|=(u>0?1:0)*p,p<<=1;l[h++]=r(s),c=h-1,f--;break;case 2:return v.join("")}if(0==f&&(f=Math.pow(2,d),d++),l[c])m=l[c];else{if(c!==h)return null;m=i+i.charAt(0)}v.push(m),l[h++]=i+m.charAt(0),i=m,0==--f&&(f=Math.pow(2,d),d++)}}};return i}();"function"==typeof define&&define.amd?define(function(){return LZString}):"undefined"!=typeof module&&null!=module?module.exports=LZString:"undefined"!=typeof angular&&null!=angular&&angular.module("LZString",[]).factory("LZString",function(){return LZString});

View File

@ -559,12 +559,33 @@ AccessChangesNotifier & AccessControl::getChangesNotifier()
} }
AuthResult AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const AuthResult AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const String & forwarded_address) const
{ {
// NOTE: In the case where the user has never been logged in using LDAP,
// Then user_id is not generated, and the authentication quota will always be nullptr.
auto authentication_quota = getAuthenticationQuota(credentials.getUserName(), address, forwarded_address);
if (authentication_quota)
{
/// Reserve a single try from the quota to check whether we have another authentication try.
/// This is required for correct behavior in this situation:
/// User has 1 login failures quota.
/// * At the first login with an invalid password: Increase the quota counter. 1 (used) > 1 (max) is false.
///   Then try to authenticate the user and throw an AUTHENTICATION_FAILED error.
/// * In case of the second try: increase quota counter, 2 (used) > 1 (max), then throw QUOTA_EXCEED
///   and don't let the user authenticate.
///
/// The authentication failures counter will be reset after successful authentication.
authentication_quota->used(QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS, 1);
}
try try
{ {
return MultipleAccessStorage::authenticate(credentials, address, *external_authenticators, allow_no_password, const auto auth_result = MultipleAccessStorage::authenticate(credentials, address, *external_authenticators, allow_no_password,
allow_plaintext_password); allow_plaintext_password);
if (authentication_quota)
authentication_quota->reset(QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS);
return auth_result;
} }
catch (...) catch (...)
{ {
@ -763,7 +784,34 @@ std::shared_ptr<const EnabledQuota> AccessControl::getEnabledQuota(
const String & forwarded_address, const String & forwarded_address,
const String & custom_quota_key) const const String & custom_quota_key) const
{ {
return quota_cache->getEnabledQuota(user_id, user_name, enabled_roles, address, forwarded_address, custom_quota_key); return quota_cache->getEnabledQuota(user_id, user_name, enabled_roles, address, forwarded_address, custom_quota_key, true);
}
std::shared_ptr<const EnabledQuota> AccessControl::getAuthenticationQuota(
const String & user_name, const Poco::Net::IPAddress & address, const std::string & forwarded_address) const
{
auto user_id = find<User>(user_name);
UserPtr user;
if (user_id && (user = tryRead<User>(*user_id)))
{
const auto new_current_roles = user->granted_roles.findGranted(user->default_roles);
const auto roles_info = getEnabledRolesInfo(new_current_roles, {});
// client_key is not received at the moment of authentication during TCP connection
// if key type is set to QuotaKeyType::CLIENT_KEY
// QuotaCache::QuotaInfo::calculateKey will throw exception without throw_if_client_key_empty = false
String quota_key;
bool throw_if_client_key_empty = false;
return quota_cache->getEnabledQuota(*user_id,
user->getName(),
roles_info->enabled_roles,
address,
forwarded_address,
quota_key,
throw_if_client_key_empty);
}
else
return nullptr;
} }

View File

@ -118,7 +118,7 @@ public:
scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const; scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const;
scope_guard subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const; scope_guard subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const;
AuthResult authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const; AuthResult authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const String & forwarded_address) const;
/// Makes a backup of access entities. /// Makes a backup of access entities.
void restoreFromBackup(RestorerFromBackup & restorer) override; void restoreFromBackup(RestorerFromBackup & restorer) override;
@ -206,6 +206,11 @@ public:
const String & forwarded_address, const String & forwarded_address,
const String & custom_quota_key) const; const String & custom_quota_key) const;
std::shared_ptr<const EnabledQuota> getAuthenticationQuota(
const String & user_name,
const Poco::Net::IPAddress & address,
const std::string & forwarded_address) const;
std::vector<QuotaUsage> getAllQuotasUsage() const; std::vector<QuotaUsage> getAllQuotasUsage() const;
std::shared_ptr<const EnabledSettings> getEnabledSettings( std::shared_ptr<const EnabledSettings> getEnabledSettings(

View File

@ -111,6 +111,11 @@ const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type)
static const auto info = make_info("WRITTEN_BYTES", 1); static const auto info = make_info("WRITTEN_BYTES", 1);
return info; return info;
} }
case QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS:
{
static const auto info = make_info("FAILED_SEQUENTIAL_AUTHENTICATIONS", 1);
return info;
}
case QuotaType::MAX: break; case QuotaType::MAX: break;
} }
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected quota type: {}", static_cast<int>(type)); throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected quota type: {}", static_cast<int>(type));

View File

@ -11,16 +11,17 @@ using QuotaValue = UInt64;
/// Kinds of resource what we wish to quota. /// Kinds of resource what we wish to quota.
enum class QuotaType enum class QuotaType
{ {
QUERIES, /// Number of queries. QUERIES, /// Number of queries.
QUERY_SELECTS, /// Number of select queries. QUERY_SELECTS, /// Number of select queries.
QUERY_INSERTS, /// Number of insert queries. QUERY_INSERTS, /// Number of insert queries.
ERRORS, /// Number of queries with exceptions. ERRORS, /// Number of queries with exceptions.
RESULT_ROWS, /// Number of rows returned as result. RESULT_ROWS, /// Number of rows returned as result.
RESULT_BYTES, /// Number of bytes returned as result. RESULT_BYTES, /// Number of bytes returned as result.
READ_ROWS, /// Number of rows read from tables. READ_ROWS, /// Number of rows read from tables.
READ_BYTES, /// Number of bytes read from tables. READ_BYTES, /// Number of bytes read from tables.
EXECUTION_TIME, /// Total amount of query execution time in nanoseconds. EXECUTION_TIME, /// Total amount of query execution time in nanoseconds.
WRITTEN_BYTES, /// Number of bytes written to tables. WRITTEN_BYTES, /// Number of bytes written to tables.
FAILED_SEQUENTIAL_AUTHENTICATIONS, /// Number of recent failed authentications.
MAX MAX
}; };

View File

@ -105,6 +105,16 @@ struct EnabledQuota::Impl
std::uniform_int_distribution<Int64> distribution{0, count - 1}; std::uniform_int_distribution<Int64> distribution{0, count - 1};
return std::chrono::system_clock::duration(distribution(thread_local_rng)); return std::chrono::system_clock::duration(distribution(thread_local_rng));
} }
static void resetQuotaValue(const Intervals & intervals, QuotaType quota_type, QuotaValue value, std::chrono::system_clock::time_point current_time)
{
const auto quota_type_i = static_cast<size_t>(quota_type);
for (const auto & interval : intervals.intervals)
{
interval.used[quota_type_i] = value;
interval.getEndOfInterval(current_time);
}
}
}; };
@ -285,6 +295,12 @@ void EnabledQuota::checkExceeded(QuotaType quota_type) const
} }
void EnabledQuota::reset(QuotaType quota_type) const
{
const auto loaded = intervals.load();
Impl::resetQuotaValue(*loaded, quota_type, 0, std::chrono::system_clock::now());
}
std::optional<QuotaUsage> EnabledQuota::getUsage() const std::optional<QuotaUsage> EnabledQuota::getUsage() const
{ {
auto loaded = intervals.load(); auto loaded = intervals.load();

View File

@ -52,6 +52,8 @@ public:
void checkExceeded() const; void checkExceeded() const;
void checkExceeded(QuotaType quota_type) const; void checkExceeded(QuotaType quota_type) const;
void reset(QuotaType quota_type) const;
/// Returns the information about quota consumption. /// Returns the information about quota consumption.
std::optional<QuotaUsage> getUsage() const; std::optional<QuotaUsage> getUsage() const;

View File

@ -30,7 +30,7 @@ void QuotaCache::QuotaInfo::setQuota(const QuotaPtr & quota_, const UUID & quota
} }
String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled) const String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled, bool throw_if_client_key_empty) const
{ {
const auto & params = enabled.params; const auto & params = enabled.params;
switch (quota->key_type) switch (quota->key_type)
@ -55,8 +55,15 @@ String QuotaCache::QuotaInfo::calculateKey(const EnabledQuota & enabled) const
{ {
if (!params.client_key.empty()) if (!params.client_key.empty())
return params.client_key; return params.client_key;
throw Exception(ErrorCodes::QUOTA_REQUIRES_CLIENT_KEY, "Quota {} (for user {}) requires a client supplied key.",
quota->getName(), params.user_name); if (throw_if_client_key_empty)
throw Exception(
ErrorCodes::QUOTA_REQUIRES_CLIENT_KEY,
"Quota {} (for user {}) requires a client supplied key.",
quota->getName(),
params.user_name);
else
return ""; // Authentication quota has no client key at time of authentication.
} }
case QuotaKeyType::CLIENT_KEY_OR_USER_NAME: case QuotaKeyType::CLIENT_KEY_OR_USER_NAME:
{ {
@ -165,7 +172,14 @@ QuotaCache::QuotaCache(const AccessControl & access_control_)
QuotaCache::~QuotaCache() = default; QuotaCache::~QuotaCache() = default;
std::shared_ptr<const EnabledQuota> QuotaCache::getEnabledQuota(const UUID & user_id, const String & user_name, const boost::container::flat_set<UUID> & enabled_roles, const Poco::Net::IPAddress & client_address, const String & forwarded_address, const String & client_key) std::shared_ptr<const EnabledQuota> QuotaCache::getEnabledQuota(
const UUID & user_id,
const String & user_name,
const boost::container::flat_set<UUID> & enabled_roles,
const Poco::Net::IPAddress & client_address,
const String & forwarded_address,
const String & client_key,
bool throw_if_client_key_empty)
{ {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
ensureAllQuotasRead(); ensureAllQuotasRead();
@ -188,11 +202,10 @@ std::shared_ptr<const EnabledQuota> QuotaCache::getEnabledQuota(const UUID & use
auto res = std::shared_ptr<EnabledQuota>(new EnabledQuota(params)); auto res = std::shared_ptr<EnabledQuota>(new EnabledQuota(params));
enabled_quotas.emplace(std::move(params), res); enabled_quotas.emplace(std::move(params), res);
chooseQuotaToConsumeFor(*res); chooseQuotaToConsumeFor(*res, throw_if_client_key_empty);
return res; return res;
} }
void QuotaCache::ensureAllQuotasRead() void QuotaCache::ensureAllQuotasRead()
{ {
/// `mutex` is already locked. /// `mutex` is already locked.
@ -257,13 +270,13 @@ void QuotaCache::chooseQuotaToConsume()
i = enabled_quotas.erase(i); i = enabled_quotas.erase(i);
else else
{ {
chooseQuotaToConsumeFor(*elem); chooseQuotaToConsumeFor(*elem, true);
++i; ++i;
} }
} }
} }
void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled) void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled, bool throw_if_client_key_empty)
{ {
/// `mutex` is already locked. /// `mutex` is already locked.
boost::shared_ptr<const Intervals> intervals; boost::shared_ptr<const Intervals> intervals;
@ -271,7 +284,7 @@ void QuotaCache::chooseQuotaToConsumeFor(EnabledQuota & enabled)
{ {
if (info.roles->match(enabled.params.user_id, enabled.params.enabled_roles)) if (info.roles->match(enabled.params.user_id, enabled.params.enabled_roles))
{ {
String key = info.calculateKey(enabled); String key = info.calculateKey(enabled, throw_if_client_key_empty);
intervals = info.getOrBuildIntervals(key); intervals = info.getOrBuildIntervals(key);
break; break;
} }

View File

@ -28,7 +28,8 @@ public:
const boost::container::flat_set<UUID> & enabled_roles, const boost::container::flat_set<UUID> & enabled_roles,
const Poco::Net::IPAddress & address, const Poco::Net::IPAddress & address,
const String & forwarded_address, const String & forwarded_address,
const String & client_key); const String & client_key,
bool throw_if_client_key_empty);
std::vector<QuotaUsage> getAllQuotasUsage() const; std::vector<QuotaUsage> getAllQuotasUsage() const;
@ -41,7 +42,7 @@ private:
QuotaInfo(const QuotaPtr & quota_, const UUID & quota_id_) { setQuota(quota_, quota_id_); } QuotaInfo(const QuotaPtr & quota_, const UUID & quota_id_) { setQuota(quota_, quota_id_); }
void setQuota(const QuotaPtr & quota_, const UUID & quota_id_); void setQuota(const QuotaPtr & quota_, const UUID & quota_id_);
String calculateKey(const EnabledQuota & enabled_quota) const; String calculateKey(const EnabledQuota & enabled_quota, bool throw_if_client_key_empty) const;
boost::shared_ptr<const Intervals> getOrBuildIntervals(const String & key); boost::shared_ptr<const Intervals> getOrBuildIntervals(const String & key);
boost::shared_ptr<const Intervals> rebuildIntervals(const String & key, std::chrono::system_clock::time_point current_time); boost::shared_ptr<const Intervals> rebuildIntervals(const String & key, std::chrono::system_clock::time_point current_time);
void rebuildAllIntervals(); void rebuildAllIntervals();
@ -56,7 +57,7 @@ private:
void quotaAddedOrChanged(const UUID & quota_id, const std::shared_ptr<const Quota> & new_quota); void quotaAddedOrChanged(const UUID & quota_id, const std::shared_ptr<const Quota> & new_quota);
void quotaRemoved(const UUID & quota_id); void quotaRemoved(const UUID & quota_id);
void chooseQuotaToConsume(); void chooseQuotaToConsume();
void chooseQuotaToConsumeFor(EnabledQuota & enabled_quota); void chooseQuotaToConsumeFor(EnabledQuota & enabled_quota, bool throw_if_client_key_empty);
const AccessControl & access_control; const AccessControl & access_control;
mutable std::mutex mutex; mutable std::mutex mutex;

View File

@ -291,8 +291,17 @@ public:
const UInt64 size = value.size(); const UInt64 size = value.size();
checkArraySize(size, max_elems); checkArraySize(size, max_elems);
writeVarUInt(size, buf); writeVarUInt(size, buf);
for (const auto & element : value)
writeBinaryLittleEndian(element, buf);
if constexpr (std::endian::native == std::endian::little)
{
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
}
else
{
for (const auto & element : value)
writeBinaryLittleEndian(element, buf);
}
if constexpr (Trait::last) if constexpr (Trait::last)
writeBinaryLittleEndian(this->data(place).total_values, buf); writeBinaryLittleEndian(this->data(place).total_values, buf);
@ -315,8 +324,16 @@ public:
auto & value = this->data(place).value; auto & value = this->data(place).value;
value.resize_exact(size, arena); value.resize_exact(size, arena);
for (auto & element : value)
readBinaryLittleEndian(element, buf); if constexpr (std::endian::native == std::endian::little)
{
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
}
else
{
for (auto & element : value)
readBinaryLittleEndian(element, buf);
}
if constexpr (Trait::last) if constexpr (Trait::last)
readBinaryLittleEndian(this->data(place).total_values, buf); readBinaryLittleEndian(this->data(place).total_values, buf);

View File

@ -0,0 +1,414 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <base/sort.h>
#include <algorithm>
#include <type_traits>
#include <utility>
#include <Common/RadixSort.h>
#include <Common/Exception.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnConst.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
extern const int TOO_LARGE_ARRAY_SIZE;
}
namespace
{
enum class GroupArraySortedStrategy
{
heap,
sort
};
constexpr size_t group_array_sorted_sort_strategy_max_elements_threshold = 1000000;
template <typename T, GroupArraySortedStrategy strategy>
struct GroupArraySortedData
{
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
static constexpr size_t partial_sort_max_elements_factor = 2;
static constexpr bool is_value_generic_field = std::is_same_v<T, Field>;
Array values;
static bool compare(const T & lhs, const T & rhs)
{
if constexpr (is_value_generic_field)
{
return lhs < rhs;
}
else
{
return CompareHelper<T>::less(lhs, rhs, -1);
}
}
struct Comparator
{
bool operator()(const T & lhs, const T & rhs)
{
return compare(lhs, rhs);
}
};
ALWAYS_INLINE void heapReplaceTop()
{
size_t size = values.size();
if (size < 2)
return;
size_t child_index = 1;
if (values.size() > 2 && compare(values[1], values[2]))
++child_index;
/// Check if we are in order
if (compare(values[child_index], values[0]))
return;
size_t current_index = 0;
auto current = values[current_index];
do
{
/// We are not in heap-order, swap the parent with it's largest child.
values[current_index] = values[child_index];
current_index = child_index;
// Recompute the child based off of the updated parent
child_index = 2 * child_index + 1;
if (child_index >= size)
break;
if ((child_index + 1) < size && compare(values[child_index], values[child_index + 1]))
{
/// Right child exists and is greater than left child.
++child_index;
}
/// Check if we are in order.
} while (!compare(values[child_index], current));
values[current_index] = current;
}
ALWAYS_INLINE void sortAndLimit(size_t max_elements, Arena * arena)
{
if constexpr (is_value_generic_field)
{
::sort(values.begin(), values.end(), Comparator());
}
else
{
bool try_sort = trySort(values.begin(), values.end(), Comparator());
if (!try_sort)
RadixSort<RadixSortNumTraits<T>>::executeLSD(values.data(), values.size());
}
if (values.size() > max_elements)
values.resize(max_elements, arena);
}
ALWAYS_INLINE void partialSortAndLimitIfNeeded(size_t max_elements, Arena * arena)
{
if (values.size() < max_elements * partial_sort_max_elements_factor)
return;
::nth_element(values.begin(), values.begin() + max_elements, values.end(), Comparator());
values.resize(max_elements, arena);
}
ALWAYS_INLINE void addElement(T && element, size_t max_elements, Arena * arena)
{
if constexpr (strategy == GroupArraySortedStrategy::heap)
{
if (values.size() >= max_elements)
{
/// Element is greater or equal than current max element, it cannot be in k min elements
if (!compare(element, values[0]))
return;
values[0] = std::move(element);
heapReplaceTop();
return;
}
values.push_back(std::move(element), arena);
std::push_heap(values.begin(), values.end(), Comparator());
}
else
{
values.push_back(std::move(element), arena);
partialSortAndLimitIfNeeded(max_elements, arena);
}
}
ALWAYS_INLINE void insertResultInto(IColumn & to, size_t max_elements, Arena * arena)
{
auto & result_array = assert_cast<ColumnArray &>(to);
auto & result_array_offsets = result_array.getOffsets();
sortAndLimit(max_elements, arena);
result_array_offsets.push_back(result_array_offsets.back() + values.size());
if (values.empty())
return;
if constexpr (is_value_generic_field)
{
auto & result_array_data = result_array.getData();
for (auto & value : values)
result_array_data.insert(value);
}
else
{
auto & result_array_data = assert_cast<ColumnVector<T> &>(result_array.getData()).getData();
size_t result_array_data_insert_begin = result_array_data.size();
result_array_data.resize(result_array_data_insert_begin + values.size());
for (size_t i = 0; i < values.size(); ++i)
result_array_data[result_array_data_insert_begin + i] = values[i];
}
}
};
template <typename T>
using GroupArraySortedDataHeap = GroupArraySortedData<T, GroupArraySortedStrategy::heap>;
template <typename T>
using GroupArraySortedDataSort = GroupArraySortedData<T, GroupArraySortedStrategy::sort>;
constexpr UInt64 aggregate_function_group_array_sorted_max_element_size = 0xFFFFFF;
template <typename Data, typename T>
class GroupArraySorted final
: public IAggregateFunctionDataHelper<Data, GroupArraySorted<Data, T>>
{
public:
explicit GroupArraySorted(
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elements_)
: IAggregateFunctionDataHelper<Data, GroupArraySorted<Data, T>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, max_elements(max_elements_)
, serialization(data_type_->getDefaultSerialization())
{
if (max_elements > aggregate_function_group_array_sorted_max_element_size)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Too large limit parameter for groupArraySorted aggregate function, it should not exceed {}",
aggregate_function_group_array_sorted_max_element_size);
}
String getName() const override { return "groupArraySorted"; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if constexpr (std::is_same_v<T, Field>)
{
auto row_value = (*columns[0])[row_num];
this->data(place).addElement(std::move(row_value), max_elements, arena);
}
else
{
auto row_value = assert_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num];
this->data(place).addElement(std::move(row_value), max_elements, arena);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & rhs_values = this->data(rhs).values;
for (auto rhs_element : rhs_values)
this->data(place).addElement(std::move(rhs_element), max_elements, arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & values = this->data(place).values;
size_t size = values.size();
writeVarUInt(size, buf);
if constexpr (std::is_same_v<T, Field>)
{
for (const Field & element : values)
{
if (element.isNull())
{
writeBinary(false, buf);
}
else
{
writeBinary(true, buf);
serialization->serializeBinary(element, buf, {});
}
}
}
else
{
if constexpr (std::endian::native == std::endian::little)
{
buf.write(reinterpret_cast<const char *>(values.data()), size * sizeof(values[0]));
}
else
{
for (const auto & element : values)
writeBinaryLittleEndian(element, buf);
}
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > max_elements))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elements);
auto & values = this->data(place).values;
values.resize_exact(size, arena);
if constexpr (std::is_same_v<T, Field>)
{
for (Field & element : values)
{
UInt8 is_null = 0;
readBinary(is_null, buf);
if (!is_null)
serialization->deserializeBinary(element, buf, {});
}
}
else
{
if constexpr (std::endian::native == std::endian::little)
{
buf.readStrict(reinterpret_cast<char *>(values.data()), size * sizeof(values[0]));
}
else
{
for (auto & element : values)
readBinaryLittleEndian(element, buf);
}
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
this->data(place).insertResultInto(to, max_elements, arena);
}
bool allocatesMemoryInArena() const override { return true; }
private:
UInt64 max_elements;
SerializationPtr serialization;
};
template <typename T>
using GroupArraySortedHeap = GroupArraySorted<GroupArraySortedDataHeap<T>, T>;
template <typename T>
using GroupArraySortedSort = GroupArraySorted<GroupArraySortedDataSort<T>, T>;
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
AggregateFunctionPtr createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date) return std::make_shared<AggregateFunctionTemplate<UInt16>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::DateTime) return std::make_shared<AggregateFunctionTemplate<UInt32>>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::IPv4) return std::make_shared<AggregateFunctionTemplate<IPv4>>(std::forward<TArgs>(args)...);
return AggregateFunctionPtr(createWithNumericType<AggregateFunctionTemplate, TArgs...>(argument_type, std::forward<TArgs>(args)...));
}
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
inline AggregateFunctionPtr createAggregateFunctionGroupArraySortedImpl(const DataTypePtr & argument_type, const Array & parameters, TArgs ... args)
{
if (auto res = createWithNumericOrTimeType<AggregateFunctionTemplate>(*argument_type, argument_type, parameters, std::forward<TArgs>(args)...))
return AggregateFunctionPtr(res);
return std::make_shared<AggregateFunctionTemplate<Field>>(argument_type, parameters, std::forward<TArgs>(args)...);
}
AggregateFunctionPtr createAggregateFunctionGroupArray(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertUnary(name, argument_types);
UInt64 max_elems = std::numeric_limits<UInt64>::max();
if (parameters.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should have limit argument", name);
}
else if (parameters.size() == 1)
{
auto type = parameters[0].getType();
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
max_elems = parameters[0].get<UInt64>();
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} does not support this number of arguments", name);
if (max_elems > group_array_sorted_sort_strategy_max_elements_threshold)
return createAggregateFunctionGroupArraySortedImpl<GroupArraySortedSort>(argument_types[0], parameters, max_elems);
return createAggregateFunctionGroupArraySortedImpl<GroupArraySortedHeap>(argument_types[0], parameters, max_elems);
}
}
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = false };
factory.registerFunction("groupArraySorted", { createAggregateFunctionGroupArray, properties });
}
}

View File

@ -31,7 +31,7 @@ namespace ErrorCodes
template <typename> class QuantileTiming; template <typename> class QuantileTiming;
template <typename> class QuantileGK; template <typename> class QuantileGK;
template <typename> class QuantileDDSketch; template <typename> class QuantileDD;
/** Generic aggregate function for calculation of quantiles. /** Generic aggregate function for calculation of quantiles.
* It depends on quantile calculation data structure. Look at Quantile*.h for various implementations. * It depends on quantile calculation data structure. Look at Quantile*.h for various implementations.
@ -64,7 +64,7 @@ private:
using ColVecType = ColumnVectorOrDecimal<Value>; using ColVecType = ColumnVectorOrDecimal<Value>;
static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>); static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
static constexpr bool is_quantile_ddsketch = std::is_same_v<Data, QuantileDDSketch<Value>>; static constexpr bool is_quantile_ddsketch = std::is_same_v<Data, QuantileDD<Value>>;
static_assert(!is_decimal<Value> || !returns_float); static_assert(!is_decimal<Value> || !returns_float);
QuantileLevels<Float64> levels; QuantileLevels<Float64> levels;
@ -334,7 +334,7 @@ struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBF
struct NameQuantileGK { static constexpr auto name = "quantileGK"; }; struct NameQuantileGK { static constexpr auto name = "quantileGK"; };
struct NameQuantilesGK { static constexpr auto name = "quantilesGK"; }; struct NameQuantilesGK { static constexpr auto name = "quantilesGK"; };
struct NameQuantileDDSketch { static constexpr auto name = "quantileDDSketch"; }; struct NameQuantileDD { static constexpr auto name = "quantileDD"; };
struct NameQuantilesDDSketch { static constexpr auto name = "quantilesDDSketch"; }; struct NameQuantilesDD { static constexpr auto name = "quantilesDD"; };
} }

View File

@ -1,5 +1,5 @@
#include <AggregateFunctions/AggregateFunctionQuantile.h> #include <AggregateFunctions/AggregateFunctionQuantile.h>
#include <AggregateFunctions/QuantileDDSketch.h> #include <AggregateFunctions/QuantileDD.h>
#include <AggregateFunctions/AggregateFunctionFactory.h> #include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h> #include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h> #include <DataTypes/DataTypeDate.h>
@ -19,8 +19,8 @@ namespace ErrorCodes
namespace namespace
{ {
template <typename Value, bool float_return> using FuncQuantileDDSketch = AggregateFunctionQuantile<Value, QuantileDDSketch<Value>, NameQuantileDDSketch, false, std::conditional_t<float_return, Float64, void>, false, true>; template <typename Value, bool float_return> using FuncQuantileDD = AggregateFunctionQuantile<Value, QuantileDD<Value>, NameQuantileDD, false, std::conditional_t<float_return, Float64, void>, false, true>;
template <typename Value, bool float_return> using FuncQuantilesDDSketch = AggregateFunctionQuantile<Value, QuantileDDSketch<Value>, NameQuantilesDDSketch, false, std::conditional_t<float_return, Float64, void>, true, true>; template <typename Value, bool float_return> using FuncQuantilesDD = AggregateFunctionQuantile<Value, QuantileDD<Value>, NameQuantilesDD, false, std::conditional_t<float_return, Float64, void>, true, true>;
template <template <typename, bool> class Function> template <template <typename, bool> class Function>
@ -46,16 +46,16 @@ AggregateFunctionPtr createAggregateFunctionQuantile(
} }
void registerAggregateFunctionsQuantileDDSketch(AggregateFunctionFactory & factory) void registerAggregateFunctionsQuantileDD(AggregateFunctionFactory & factory)
{ {
/// For aggregate functions returning array we cannot return NULL on empty set. /// For aggregate functions returning array we cannot return NULL on empty set.
AggregateFunctionProperties properties = { .returns_default_when_only_null = true }; AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
factory.registerFunction(NameQuantileDDSketch::name, createAggregateFunctionQuantile<FuncQuantileDDSketch>); factory.registerFunction(NameQuantileDD::name, createAggregateFunctionQuantile<FuncQuantileDD>);
factory.registerFunction(NameQuantilesDDSketch::name, { createAggregateFunctionQuantile<FuncQuantilesDDSketch>, properties }); factory.registerFunction(NameQuantilesDD::name, { createAggregateFunctionQuantile<FuncQuantilesDD>, properties });
/// 'median' is an alias for 'quantile' /// 'median' is an alias for 'quantile'
factory.registerAlias("medianDDSketch", NameQuantileDDSketch::name); factory.registerAlias("medianDD", NameQuantileDD::name);
} }
} }

View File

@ -33,14 +33,14 @@ namespace DB
*/ */
template <typename Value> template <typename Value>
class QuantileDDSketch class QuantileDD
{ {
public: public:
using Weight = UInt64; using Weight = UInt64;
QuantileDDSketch() = default; QuantileDD() = default;
explicit QuantileDDSketch(Float64 relative_accuracy) : data(relative_accuracy) { } explicit QuantileDD(Float64 relative_accuracy) : data(relative_accuracy) { }
void add(const Value & x) void add(const Value & x)
{ {
@ -53,7 +53,7 @@ public:
data.add(x, w); data.add(x, w);
} }
void merge(const QuantileDDSketch &other) void merge(const QuantileDD &other)
{ {
data.merge(other.data); data.merge(other.data);
} }

View File

@ -1,2 +1,5 @@
clickhouse_add_executable (quantile-t-digest quantile-t-digest.cpp) clickhouse_add_executable (quantile-t-digest quantile-t-digest.cpp)
target_link_libraries (quantile-t-digest PRIVATE dbms clickhouse_aggregate_functions) target_link_libraries (quantile-t-digest PRIVATE dbms clickhouse_aggregate_functions)
clickhouse_add_executable (group_array_sorted group_array_sorted.cpp)
target_link_libraries (group_array_sorted PRIVATE dbms clickhouse_aggregate_functions)

View File

@ -0,0 +1,205 @@
#include <algorithm>
#include <type_traits>
#include <utility>
#include <iostream>
#include "pcg_random.hpp"
#include <Columns/ColumnVector.h>
#include <Common/ArenaAllocator.h>
#include <Common/RadixSort.h>
#include <Columns/ColumnArray.h>
using namespace DB;
template <typename T>
struct GroupArraySortedDataHeap
{
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array values;
static bool compare(const T & lhs, const T & rhs)
{
return lhs < rhs;
}
struct Comparator
{
bool operator()(const T & lhs, const T & rhs)
{
return compare(lhs, rhs);
}
};
ALWAYS_INLINE void replaceTop()
{
size_t size = values.size();
if (size < 2)
return;
size_t child_index = 1;
if (values.size() > 2 && compare(values[1], values[2]))
++child_index;
/// Check if we are in order
if (compare(values[child_index], values[0]))
return;
size_t current_index = 0;
auto current = values[current_index];
do
{
/// We are not in heap-order, swap the parent with it's largest child.
values[current_index] = values[child_index];
current_index = child_index;
// Recompute the child based off of the updated parent
child_index = 2 * child_index + 1;
if (child_index >= size)
break;
if ((child_index + 1) < size && compare(values[child_index], values[child_index + 1]))
{
/// Right child exists and is greater than left child.
++child_index;
}
/// Check if we are in order.
} while (!compare(values[child_index], current));
values[current_index] = current;
}
ALWAYS_INLINE void addElement(const T & element, size_t max_elements, Arena * arena)
{
if (values.size() >= max_elements)
{
/// Element is greater or equal than current max element, it cannot be in k min elements
if (!compare(element, values[0]))
return;
values[0] = element;
replaceTop();
return;
}
values.push_back(element, arena);
std::push_heap(values.begin(), values.end(), Comparator());
}
ALWAYS_INLINE void dump()
{
while (!values.empty())
{
std::pop_heap(values.begin(), values.end(), Comparator());
std::cerr << values.back() << ' ';
values.pop_back();
}
std::cerr << '\n';
}
};
template <typename T>
struct GroupArraySortedDataSort
{
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array values;
static bool compare(const T & lhs, const T & rhs)
{
return lhs < rhs;
}
struct Comparator
{
bool operator()(const T & lhs, const T & rhs)
{
return compare(lhs, rhs);
}
};
ALWAYS_INLINE void sortAndLimit(size_t max_elements, Arena * arena)
{
RadixSort<RadixSortNumTraits<T>>::executeLSD(values.data(), values.size());
values.resize(max_elements, arena);
}
ALWAYS_INLINE void partialSortAndLimitIfNeeded(size_t max_elements, Arena * arena)
{
if (values.size() < max_elements * 4)
return;
std::nth_element(values.begin(), values.begin() + max_elements, values.end(), Comparator());
values.resize(max_elements, arena);
}
ALWAYS_INLINE void addElement(const T & element, size_t max_elements, Arena * arena)
{
values.push_back(element, arena);
partialSortAndLimitIfNeeded(max_elements, arena);
}
};
template <typename SortedData>
NO_INLINE void benchmark(size_t elements, size_t max_elements)
{
Stopwatch watch;
watch.start();
SortedData data;
pcg64_fast rng;
Arena arena;
for (size_t i = 0; i < elements; ++i)
{
uint64_t value = rng();
data.addElement(value, max_elements, &arena);
}
watch.stop();
std::cerr << "Elapsed " << watch.elapsedMilliseconds() << " milliseconds" << '\n';
}
int main(int argc, char ** argv)
{
(void)(argc);
(void)(argv);
if (argc != 4)
{
std::cerr << "./group_array_sorted method elements max_elements" << '\n';
return 1;
}
std::string method = std::string(argv[1]);
uint64_t elements = std::atol(argv[2]); /// NOLINT
uint64_t max_elements = std::atol(argv[3]); /// NOLINT
std::cerr << "Method " << method << " elements " << elements << " max elements " << max_elements << '\n';
if (method == "heap")
{
benchmark<GroupArraySortedDataHeap<UInt64>>(elements, max_elements);
}
else if (method == "sort")
{
benchmark<GroupArraySortedDataSort<UInt64>>(elements, max_elements);
}
else
{
std::cerr << "Invalid method " << method << '\n';
return 1;
}
return 0;
}

View File

@ -15,6 +15,7 @@ void registerAggregateFunctionCount(AggregateFunctionFactory &);
void registerAggregateFunctionDeltaSum(AggregateFunctionFactory &); void registerAggregateFunctionDeltaSum(AggregateFunctionFactory &);
void registerAggregateFunctionDeltaSumTimestamp(AggregateFunctionFactory &); void registerAggregateFunctionDeltaSumTimestamp(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArray(AggregateFunctionFactory &); void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &); void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &); void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &); void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
@ -31,7 +32,7 @@ void registerAggregateFunctionsQuantileTimingWeighted(AggregateFunctionFactory &
void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileDDSketch(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileDD(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &);
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &); void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
@ -112,6 +113,7 @@ void registerAggregateFunctions()
registerAggregateFunctionDeltaSum(factory); registerAggregateFunctionDeltaSum(factory);
registerAggregateFunctionDeltaSumTimestamp(factory); registerAggregateFunctionDeltaSumTimestamp(factory);
registerAggregateFunctionGroupArray(factory); registerAggregateFunctionGroupArray(factory);
registerAggregateFunctionGroupArraySorted(factory);
registerAggregateFunctionGroupUniqArray(factory); registerAggregateFunctionGroupUniqArray(factory);
registerAggregateFunctionGroupArrayInsertAt(factory); registerAggregateFunctionGroupArrayInsertAt(factory);
registerAggregateFunctionsQuantile(factory); registerAggregateFunctionsQuantile(factory);
@ -128,7 +130,7 @@ void registerAggregateFunctions()
registerAggregateFunctionsQuantileTDigest(factory); registerAggregateFunctionsQuantileTDigest(factory);
registerAggregateFunctionsQuantileTDigestWeighted(factory); registerAggregateFunctionsQuantileTDigestWeighted(factory);
registerAggregateFunctionsQuantileBFloat16(factory); registerAggregateFunctionsQuantileBFloat16(factory);
registerAggregateFunctionsQuantileDDSketch(factory); registerAggregateFunctionsQuantileDD(factory);
registerAggregateFunctionsQuantileBFloat16Weighted(factory); registerAggregateFunctionsQuantileBFloat16Weighted(factory);
registerAggregateFunctionsQuantileApprox(factory); registerAggregateFunctionsQuantileApprox(factory);
registerAggregateFunctionsSequenceMatch(factory); registerAggregateFunctionsSequenceMatch(factory);

View File

@ -181,6 +181,23 @@ public:
node = std::make_shared<ColumnNode>(column, column_source); node = std::make_shared<ColumnNode>(column, column_source);
} }
else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node)
{
/// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`.
const auto & variant_element_constant_value = second_argument_constant_node->getValue();
String subcolumn_name;
if (variant_element_constant_value.getType() != Field::Types::String)
return;
subcolumn_name = variant_element_constant_value.get<const String &>();
column.name += '.';
column.name += subcolumn_name;
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapContains" && column_type.isMap()) else if (function_name == "mapContains" && column_type.isMap())
{ {
const auto & data_type_map = assert_cast<const DataTypeMap &>(*column.type); const auto & data_type_map = assert_cast<const DataTypeMap &>(*column.type);

View File

@ -1214,7 +1214,7 @@ private:
static void expandGroupByAll(QueryNode & query_tree_node_typed); static void expandGroupByAll(QueryNode & query_tree_node_typed);
static void expandOrderByAll(QueryNode & query_tree_node_typed); static void expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings);
static std::string static std::string
rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context); rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
@ -1396,6 +1396,8 @@ private:
/// Lambdas that are currently in resolve process /// Lambdas that are currently in resolve process
std::unordered_set<IQueryTreeNode *> lambdas_in_resolve_process; std::unordered_set<IQueryTreeNode *> lambdas_in_resolve_process;
std::unordered_set<std::string_view> cte_in_resolve_process;
/// Function name to user defined lambda map /// Function name to user defined lambda map
std::unordered_map<std::string, QueryTreeNodePtr> function_name_to_user_defined_lambda; std::unordered_map<std::string, QueryTreeNodePtr> function_name_to_user_defined_lambda;
@ -2332,8 +2334,11 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
query_tree_node_typed.setIsGroupByAll(false); query_tree_node_typed.setIsGroupByAll(false);
} }
void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed) void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings)
{ {
if (!settings.enable_order_by_all || !query_tree_node_typed.isOrderByAll())
return;
auto * all_node = query_tree_node_typed.getOrderBy().getNodes()[0]->as<SortNode>(); auto * all_node = query_tree_node_typed.getOrderBy().getNodes()[0]->as<SortNode>();
if (!all_node) if (!all_node)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not sort node."); throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not sort node.");
@ -2359,6 +2364,7 @@ void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
} }
query_tree_node_typed.getOrderByNode() = list_node; query_tree_node_typed.getOrderByNode() = list_node;
query_tree_node_typed.setIsOrderByAll(false);
} }
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded( std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
@ -3716,7 +3722,12 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
if (it->second.resolve_result.isResolved() && if (it->second.resolve_result.isResolved() &&
scope.use_identifier_lookup_to_result_cache && scope.use_identifier_lookup_to_result_cache &&
!scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup)) !scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup))
return it->second.resolve_result; {
if (!it->second.resolve_result.isResolvedFromCTEs() || !cte_in_resolve_process.contains(identifier_lookup.identifier.getFullName()))
{
return it->second.resolve_result;
}
}
} }
else else
{ {
@ -3773,8 +3784,23 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
if (!resolve_result.resolved_identifier && identifier_lookup.isTableExpressionLookup()) if (!resolve_result.resolved_identifier && identifier_lookup.isTableExpressionLookup())
{ {
auto cte_query_node_it = scope.cte_name_to_query_node.find(identifier_lookup.identifier.getFullName()); auto full_name = identifier_lookup.identifier.getFullName();
if (cte_query_node_it != scope.cte_name_to_query_node.end()) auto cte_query_node_it = scope.cte_name_to_query_node.find(full_name);
/// CTE may reference table expressions with the same name, e.g.:
///
/// WITH test1 AS (SELECT * FROM test1) SELECT * FROM test1;
///
/// Since we don't support recursive CTEs, `test1` identifier inside of CTE
/// references to table <default database name>.test1.
/// This means that the example above is equivalent to the following query:
///
/// SELECT * FROM test1;
///
/// To accomplish this behaviour it's not allowed to resolve identifiers to
/// CTE that is being resolved.
if (cte_query_node_it != scope.cte_name_to_query_node.end()
&& !cte_in_resolve_process.contains(full_name))
{ {
resolve_result.resolved_identifier = cte_query_node_it->second; resolve_result.resolved_identifier = cte_query_node_it->second;
resolve_result.resolve_place = IdentifierResolvePlace::CTE; resolve_result.resolve_place = IdentifierResolvePlace::CTE;
@ -5712,6 +5738,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
subquery_node = resolved_identifier_node->as<QueryNode>(); subquery_node = resolved_identifier_node->as<QueryNode>();
union_node = resolved_identifier_node->as<UnionNode>(); union_node = resolved_identifier_node->as<UnionNode>();
std::string_view cte_name = subquery_node ? subquery_node->getCTEName() : union_node->getCTEName();
if (subquery_node) if (subquery_node)
subquery_node->setIsCTE(false); subquery_node->setIsCTE(false);
else else
@ -5720,10 +5748,21 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
IdentifierResolveScope subquery_scope(resolved_identifier_node, &scope /*parent_scope*/); IdentifierResolveScope subquery_scope(resolved_identifier_node, &scope /*parent_scope*/);
subquery_scope.subquery_depth = scope.subquery_depth + 1; subquery_scope.subquery_depth = scope.subquery_depth + 1;
/// CTE is being resolved, it's required to forbid to resolve to it again
/// because recursive CTEs are not supported, e.g.:
///
/// WITH test1 AS (SELECT i + 1, j + 1 FROM test1) SELECT toInt64(4) i, toInt64(5) j FROM numbers(3) WHERE (i, j) IN test1;
///
/// In this example argument of function `in` is being resolve here. If CTE `test1` is not forbidden,
/// `test1` is resolved to CTE (not to the table) in `initializeQueryJoinTreeNode` function.
cte_in_resolve_process.insert(cte_name);
if (subquery_node) if (subquery_node)
resolveQuery(resolved_identifier_node, subquery_scope); resolveQuery(resolved_identifier_node, subquery_scope);
else else
resolveUnion(resolved_identifier_node, subquery_scope); resolveUnion(resolved_identifier_node, subquery_scope);
cte_in_resolve_process.erase(cte_name);
} }
} }
} }
@ -7116,6 +7155,10 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
max_subquery_depth); max_subquery_depth);
auto & query_node_typed = query_node->as<QueryNode &>(); auto & query_node_typed = query_node->as<QueryNode &>();
if (query_node_typed.isCTE())
cte_in_resolve_process.insert(query_node_typed.getCTEName());
const auto & settings = scope.context->getSettingsRef(); const auto & settings = scope.context->getSettingsRef();
bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube(); bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube();
@ -7137,8 +7180,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.hasHaving() && query_node_typed.isGroupByWithTotals() && is_rollup_or_cube) if (query_node_typed.hasHaving() && query_node_typed.isGroupByWithTotals() && is_rollup_or_cube)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING");
if (settings.enable_order_by_all && query_node_typed.isOrderByAll()) expandOrderByAll(query_node_typed, settings);
expandOrderByAll(query_node_typed);
/// Initialize aliases in query node scope /// Initialize aliases in query node scope
QueryExpressionsAliasVisitor visitor(scope); QueryExpressionsAliasVisitor visitor(scope);
@ -7455,11 +7497,18 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
node->removeAlias(); node->removeAlias();
query_node_typed.resolveProjectionColumns(std::move(projection_columns)); query_node_typed.resolveProjectionColumns(std::move(projection_columns));
if (query_node_typed.isCTE())
cte_in_resolve_process.erase(query_node_typed.getCTEName());
} }
void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, IdentifierResolveScope & scope) void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, IdentifierResolveScope & scope)
{ {
auto & union_node_typed = union_node->as<UnionNode &>(); auto & union_node_typed = union_node->as<UnionNode &>();
if (union_node_typed.isCTE())
cte_in_resolve_process.insert(union_node_typed.getCTEName());
auto & queries_nodes = union_node_typed.getQueries().getNodes(); auto & queries_nodes = union_node_typed.getQueries().getNodes();
for (auto & query_node : queries_nodes) for (auto & query_node : queries_nodes)
@ -7483,6 +7532,9 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier
scope.scope_node->formatASTForErrorMessage()); scope.scope_node->formatASTForErrorMessage());
} }
} }
if (union_node_typed.isCTE())
cte_in_resolve_process.erase(union_node_typed.getCTEName());
} }
} }

View File

@ -119,6 +119,9 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
if (is_group_by_all) if (is_group_by_all)
buffer << ", is_group_by_all: " << is_group_by_all; buffer << ", is_group_by_all: " << is_group_by_all;
if (is_order_by_all)
buffer << ", is_order_by_all: " << is_order_by_all;
std::string group_by_type; std::string group_by_type;
if (is_group_by_with_rollup) if (is_group_by_with_rollup)
group_by_type = "rollup"; group_by_type = "rollup";
@ -257,6 +260,7 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
is_group_by_with_cube == rhs_typed.is_group_by_with_cube && is_group_by_with_cube == rhs_typed.is_group_by_with_cube &&
is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets && is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets &&
is_group_by_all == rhs_typed.is_group_by_all && is_group_by_all == rhs_typed.is_group_by_all &&
is_order_by_all == rhs_typed.is_order_by_all &&
cte_name == rhs_typed.cte_name && cte_name == rhs_typed.cte_name &&
projection_columns == rhs_typed.projection_columns && projection_columns == rhs_typed.projection_columns &&
settings_changes == rhs_typed.settings_changes; settings_changes == rhs_typed.settings_changes;
@ -288,6 +292,7 @@ void QueryNode::updateTreeHashImpl(HashState & state) const
state.update(is_group_by_with_cube); state.update(is_group_by_with_cube);
state.update(is_group_by_with_grouping_sets); state.update(is_group_by_with_grouping_sets);
state.update(is_group_by_all); state.update(is_group_by_all);
state.update(is_order_by_all);
state.update(settings_changes.size()); state.update(settings_changes.size());
@ -306,18 +311,19 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
{ {
auto result_query_node = std::make_shared<QueryNode>(context); auto result_query_node = std::make_shared<QueryNode>(context);
result_query_node->is_subquery = is_subquery; result_query_node->is_subquery = is_subquery;
result_query_node->is_cte = is_cte; result_query_node->is_cte = is_cte;
result_query_node->is_distinct = is_distinct; result_query_node->is_distinct = is_distinct;
result_query_node->is_limit_with_ties = is_limit_with_ties; result_query_node->is_limit_with_ties = is_limit_with_ties;
result_query_node->is_group_by_with_totals = is_group_by_with_totals; result_query_node->is_group_by_with_totals = is_group_by_with_totals;
result_query_node->is_group_by_with_rollup = is_group_by_with_rollup; result_query_node->is_group_by_with_rollup = is_group_by_with_rollup;
result_query_node->is_group_by_with_cube = is_group_by_with_cube; result_query_node->is_group_by_with_cube = is_group_by_with_cube;
result_query_node->is_group_by_with_grouping_sets = is_group_by_with_grouping_sets; result_query_node->is_group_by_with_grouping_sets = is_group_by_with_grouping_sets;
result_query_node->is_group_by_all = is_group_by_all; result_query_node->is_group_by_all = is_group_by_all;
result_query_node->cte_name = cte_name; result_query_node->is_order_by_all = is_order_by_all;
result_query_node->projection_columns = projection_columns; result_query_node->cte_name = cte_name;
result_query_node->settings_changes = settings_changes; result_query_node->projection_columns = projection_columns;
result_query_node->settings_changes = settings_changes;
return result_query_node; return result_query_node;
} }
@ -332,6 +338,7 @@ ASTPtr QueryNode::toASTImpl(const ConvertToASTOptions & options) const
select_query->group_by_with_cube = is_group_by_with_cube; select_query->group_by_with_cube = is_group_by_with_cube;
select_query->group_by_with_grouping_sets = is_group_by_with_grouping_sets; select_query->group_by_with_grouping_sets = is_group_by_with_grouping_sets;
select_query->group_by_all = is_group_by_all; select_query->group_by_all = is_group_by_all;
select_query->order_by_all = is_order_by_all;
if (hasWith()) if (hasWith())
select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST(options)); select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST(options));

View File

@ -32,6 +32,8 @@ namespace ErrorCodes
M(UInt64, shard_num) \ M(UInt64, shard_num) \
M(UInt64, replica_num) \ M(UInt64, replica_num) \
M(Bool, check_parts) \ M(Bool, check_parts) \
M(Bool, check_projection_parts) \
M(Bool, allow_backup_broken_projections) \
M(Bool, internal) \ M(Bool, internal) \
M(String, host_id) \ M(String, host_id) \
M(OptionalUUID, backup_uuid) M(OptionalUUID, backup_uuid)

View File

@ -62,6 +62,12 @@ struct BackupSettings
/// Check checksums of the data parts before writing them to a backup. /// Check checksums of the data parts before writing them to a backup.
bool check_parts = true; bool check_parts = true;
/// Check checksums of the projection data parts before writing them to a backup.
bool check_projection_parts = true;
/// Allow to create backup with broken projections.
bool allow_backup_broken_projections = false;
/// Internal, should not be specified by user. /// Internal, should not be specified by user.
/// Whether this backup is a part of a distributed backup created by BACKUP ON CLUSTER. /// Whether this backup is a part of a distributed backup created by BACKUP ON CLUSTER.
bool internal = false; bool internal = false;

View File

@ -294,12 +294,14 @@ if (TARGET ch_contrib::gwp_asan)
endif() endif()
# Otherwise it will slow down stack traces printing too much. # Otherwise it will slow down stack traces printing too much.
set_source_files_properties( if ("${CMAKE_BUILD_TYPE_UC}" STREQUAL "DEBUG")
Common/Elf.cpp set_source_files_properties(
Common/Dwarf.cpp Common/Elf.cpp
Common/SymbolIndex.cpp Common/Dwarf.cpp
Common/ThreadFuzzer.cpp Common/SymbolIndex.cpp
PROPERTIES COMPILE_FLAGS "-O2 ${WITHOUT_COVERAGE_FLAGS}") Common/ThreadFuzzer.cpp
PROPERTIES COMPILE_FLAGS "-O2 ${WITHOUT_COVERAGE_FLAGS}")
endif()
target_link_libraries (clickhouse_common_io target_link_libraries (clickhouse_common_io
PRIVATE PRIVATE

View File

@ -159,4 +159,26 @@ void ColumnConst::compareColumn(
std::fill(compare_results.begin(), compare_results.end(), res); std::fill(compare_results.begin(), compare_results.end(), res);
} }
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value)
{
auto data = column->cloneEmpty();
data->insert(value);
return ColumnConst::create(std::move(data), 1);
}
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, size_t const_value_index)
{
auto data = column->cloneEmpty();
data->insertFrom(*column, const_value_index);
return ColumnConst::create(std::move(data), 1);
}
ColumnConst::Ptr createColumnConstWithDefaultValue(const ColumnPtr & column)
{
auto data = column->cloneEmpty();
data->insertDefault();
return ColumnConst::create(std::move(data), 1);
}
} }

View File

@ -292,4 +292,9 @@ public:
bool isCollationSupported() const override { return data->isCollationSupported(); } bool isCollationSupported() const override { return data->isCollationSupported(); }
}; };
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value);
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, size_t const_value_index);
ColumnConst::Ptr createColumnConstWithDefaultValue(const ColumnPtr &column);
} }

View File

@ -308,6 +308,13 @@ ColumnWithTypeAndName ColumnFunction::reduce() const
ProfileEvents::increment(ProfileEvents::CompiledFunctionExecute); ProfileEvents::increment(ProfileEvents::CompiledFunctionExecute);
res.column = function->execute(columns, res.type, elements_size); res.column = function->execute(columns, res.type, elements_size);
if (res.column->getDataType() != res.type->getColumnType())
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Unexpected return type from {}. Expected {}. Got {}",
function->getName(),
res.type->getColumnType(),
res.column->getDataType());
if (recursively_convert_result_to_full_column_if_low_cardinality) if (recursively_convert_result_to_full_column_if_low_cardinality)
{ {
res.column = recursiveRemoveLowCardinality(res.column); res.column = recursiveRemoveLowCardinality(res.column);

View File

@ -141,6 +141,11 @@ void ColumnMap::updateHashFast(SipHash & hash) const
nested->updateHashFast(hash); nested->updateHashFast(hash);
} }
void ColumnMap::insertFrom(const IColumn & src, size_t n)
{
nested->insertFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), n);
}
void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{ {
nested->insertRangeFrom( nested->insertRangeFrom(

View File

@ -64,6 +64,7 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override; void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override; void expand(const Filter & mask, bool inverted) override;

View File

@ -833,24 +833,22 @@ void ColumnNullable::checkConsistency() const
"Logical error: Sizes of nested column and null map of Nullable column are not equal"); "Logical error: Sizes of nested column and null map of Nullable column are not equal");
} }
ColumnPtr ColumnNullable::createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const ColumnPtr ColumnNullable::createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
{ {
ColumnPtr new_values; ColumnPtr new_values;
ColumnPtr new_null_map; ColumnPtr new_null_map;
if (default_field.getType() == Field::Types::Null) const ColumnNullable & nullable_column_with_default_value = assert_cast<const ColumnNullable &>(column_with_default_value.getDataColumn());
if (nullable_column_with_default_value.isNullAt(0))
{ {
auto default_column = nested_column->cloneEmpty();
default_column->insertDefault();
/// Value in main column, when null map is 1 is implementation defined. So, take any value. /// Value in main column, when null map is 1 is implementation defined. So, take any value.
new_values = nested_column->createWithOffsets(offsets, (*default_column)[0], total_rows, shift); new_values = nested_column->createWithOffsets(offsets, *createColumnConstWithDefaultValue(nested_column), total_rows, shift);
new_null_map = null_map->createWithOffsets(offsets, Field(1u), total_rows, shift); new_null_map = null_map->createWithOffsets(offsets, *createColumnConst(null_map, Field(1u)), total_rows, shift);
} }
else else
{ {
new_values = nested_column->createWithOffsets(offsets, default_field, total_rows, shift); new_values = nested_column->createWithOffsets(offsets, *ColumnConst::create(nullable_column_with_default_value.getNestedColumnPtr(), 1), total_rows, shift);
new_null_map = null_map->createWithOffsets(offsets, Field(0u), total_rows, shift); new_null_map = null_map->createWithOffsets(offsets, *createColumnConst(null_map, Field(0u)), total_rows, shift);
} }
return ColumnNullable::create(new_values, new_null_map); return ColumnNullable::create(new_values, new_null_map);
@ -896,10 +894,7 @@ ColumnPtr makeNullable(const ColumnPtr & column)
ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column) ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column)
{ {
if (isColumnNullable(*column)) if (isColumnNullableOrLowCardinalityNullable(*column))
return column;
if (isColumnLowCardinalityNullable(*column))
return column; return column;
if (isColumnConst(*column)) if (isColumnConst(*column))
@ -925,4 +920,21 @@ ColumnPtr makeNullableSafe(const ColumnPtr & column)
return column; return column;
} }
ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column)
{
if (isColumnNullableOrLowCardinalityNullable(*column))
return column;
if (isColumnConst(*column))
return ColumnConst::create(makeNullableOrLowCardinalityNullableSafe(assert_cast<const ColumnConst &>(*column).getDataColumnPtr()), column->size());
if (column->lowCardinality())
return assert_cast<const ColumnLowCardinality &>(*column).cloneNullable();
if (column->canBeInsideNullable())
return makeNullable(column);
return column;
}
} }

View File

@ -168,7 +168,7 @@ public:
getIndicesOfNonDefaultRowsImpl<ColumnNullable>(indices, from, limit); getIndicesOfNonDefaultRowsImpl<ColumnNullable>(indices, from, limit);
} }
ColumnPtr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const override; ColumnPtr createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override;
bool isNullable() const override { return true; } bool isNullable() const override { return true; }
bool isFixedAndContiguous() const override { return false; } bool isFixedAndContiguous() const override { return false; }
@ -232,5 +232,6 @@ private:
ColumnPtr makeNullable(const ColumnPtr & column); ColumnPtr makeNullable(const ColumnPtr & column);
ColumnPtr makeNullableSafe(const ColumnPtr & column); ColumnPtr makeNullableSafe(const ColumnPtr & column);
ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column); ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column);
ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column);
} }

View File

@ -2,6 +2,7 @@
#include <Columns/ColumnObject.h> #include <Columns/ColumnObject.h>
#include <Columns/ColumnsNumber.h> #include <Columns/ColumnsNumber.h>
#include <Columns/ColumnArray.h> #include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Common/iota.h> #include <Common/iota.h>
#include <DataTypes/ObjectUtils.h> #include <DataTypes/ObjectUtils.h>
#include <DataTypes/getLeastSupertype.h> #include <DataTypes/getLeastSupertype.h>
@ -475,7 +476,7 @@ void ColumnObject::Subcolumn::finalize()
{ {
auto values = part->index(*offsets, offsets->size()); auto values = part->index(*offsets, offsets->size());
values = castColumn({values, from_type, ""}, to_type); values = castColumn({values, from_type, ""}, to_type);
part = values->createWithOffsets(offsets_data, to_type->getDefault(), part_size, /*shift=*/ 0); part = values->createWithOffsets(offsets_data, *createColumnConstWithDefaultValue(result_column->getPtr()), part_size, /*shift=*/ 0);
} }
} }

View File

@ -1,6 +1,7 @@
#include <Columns/ColumnCompressed.h> #include <Columns/ColumnCompressed.h>
#include <Columns/ColumnSparse.h> #include <Columns/ColumnSparse.h>
#include <Columns/ColumnTuple.h> #include <Columns/ColumnTuple.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Processors/Transforms/ColumnGathererTransform.h> #include <Processors/Transforms/ColumnGathererTransform.h>
#include <Common/HashTable/Hash.h> #include <Common/HashTable/Hash.h>
@ -130,7 +131,7 @@ StringRef ColumnSparse::getDataAt(size_t n) const
ColumnPtr ColumnSparse::convertToFullColumnIfSparse() const ColumnPtr ColumnSparse::convertToFullColumnIfSparse() const
{ {
return values->createWithOffsets(getOffsetsData(), (*values)[0], _size, /*shift=*/ 1); return values->createWithOffsets(getOffsetsData(), *createColumnConst(values, 0), _size, /*shift=*/ 1);
} }
void ColumnSparse::insertSingleValue(const Inserter & inserter) void ColumnSparse::insertSingleValue(const Inserter & inserter)

File diff suppressed because it is too large Load Diff

307
src/Columns/ColumnVariant.h Normal file
View File

@ -0,0 +1,307 @@
#pragma once
#include <Columns/IColumn.h>
#include <Columns/ColumnVector.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
/**
* Column for storing Variant(...) type values.
* Variant type represents a union of other data types.
* For example, type Variant(T1, T2, ..., TN) means that each row of this type
* has a value of either type T1 or T2 or ... or TN or none of them (NULL value)
*
* ColumnVariant stores:
* - The discriminators column, which determines which variant is stored in each row.
* - The offsets column, which determines the offset in the corresponding variant column in each row.
* - The list of variant columns with only real values (so the sizes of variant columns can be different).
* Discriminator is an index of a variant in the variants list, it also has special value called NULL_DISCRIMINATOR
* that indicates that the value in the row is NULL.
*
* We want to be able to extend Variant column for free without rewriting the data, but as we don't care about the
* order of variants during Variant creation (we want Variant(T1, T2) to be the same as Variant(T2, T1)), we support
* some global order of nested types inside Variant during type creation, so after extension the order of variant types
* (and so their discriminators) can change. For example: Variant(T1, T3) -> Variant(T1, T2, T3).
* To avoid full rewrite of discriminators column on Variant extension, we differentiate local order of variants
* inside a column and global order of variants created during type creation. So, ColumnVariant stores only local
* discriminators and additionally stores the mapping between global and local discriminators.
* So, when we need to extend Variant column with new variant, we can just append it to a list of variant columns
* with new local discriminator and update mapping from global to local orders.
*
* Note that two instances of ColumnVariant can have different local orders, so we should always use global
* discriminators during inter-column interactions.
*
* Let's take an example with type Variant(UInt32, String, Array(UInt32)):
* During type creation we will sort types by their names and get the global order: Array(UInt32), String, UInt32.
* So, type Array(UInt32) will have global discriminator 0, String - 1 and UInt32 - 2.
* Let's say we have a column with local order (String, UInt32, Array(UInt32)) and values:
* 'Hello', 42, NULL, 'World', 43, [1, 2, 3], NULL, 44
*
* Let's see how these values will be stored in ColumnVariant:
*
* local_to_global_discriminators: {0 : 1, 1 : 2, 2 : 0}
* global_to_local_discriminators: {0 : 2, 1 : 0, 2 : 1}
* local_discriminators offsets String UInt32 Array(UInt32)
* 0 0 'Hello' 42 [1, 2, 3]
* 1 0 'World' 43
* NULL_DISCRIMINATOR 0 44
* 0 1
* 1 1
* 2 0
* NULL_DISCRIMINATOR 0
* 1 2
*
*/
class ColumnVariant final : public COWHelper<IColumn, ColumnVariant>
{
public:
using Discriminator = UInt8;
using Discriminators = PaddedPODArray<Discriminator>;
using ColumnDiscriminators = ColumnVector<Discriminator>;
using ColumnOffsets = ColumnVector<Offset>;
static constexpr UInt8 NULL_DISCRIMINATOR = std::numeric_limits<Discriminator>::max(); /// 255
static constexpr size_t MAX_NESTED_COLUMNS = std::numeric_limits<Discriminator>::max(); /// 255
private:
friend class COWHelper<IColumn, ColumnVariant>;
using NestedColumns = std::vector<WrappedPtr>;
/// Create an empty column with provided variants.
/// Variants are in global order.
explicit ColumnVariant(MutableColumns && variants_);
/// Variants are in local order according to provided mapping.
explicit ColumnVariant(MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
/// Create column from discriminators column and list of variant columns.
/// Offsets column should be constructed according to the discriminators.
/// Variants are in global order.
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumns && variants_);
/// Variants are in local order according to provided mapping.
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
/// Create column from discriminators column, offsets column and list of variant columns.
/// Variants are in global order.
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_);
/// Variants are in local order according to provided mapping.
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
ColumnVariant(const ColumnVariant &) = default;
public:
/** Create immutable column using immutable arguments. This arguments may be shared with other variants.
* Use IColumn::mutate in order to make mutable column and mutate shared nested variants.
*/
using Base = COWHelper<IColumn, ColumnVariant>;
static Ptr create(const Columns & variants_) { return create(variants_, {}); }
static Ptr create(const Columns & variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
static Ptr create(const ColumnPtr & local_discriminators_, const Columns & variants_) { return create(local_discriminators_, variants_, {}); }
static Ptr create(const ColumnPtr & local_discriminators_, const Columns & variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
static Ptr create(const ColumnPtr & local_discriminators_, const DB::ColumnPtr & offsets_, const Columns & variants_) { return create(local_discriminators_, offsets_, variants_, {}); }
static Ptr create(const ColumnPtr & local_discriminators_, const DB::ColumnPtr & offsets_, const Columns & variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
static MutablePtr create(MutableColumns && variants_)
{
return Base::create(std::move(variants_));
}
static MutablePtr create(MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_)
{
return Base::create(std::move(variants_), local_to_global_discriminators_);
}
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumns && variants_)
{
return Base::create(std::move(local_discriminators_), std::move(variants_));
}
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_)
{
return Base::create(std::move(local_discriminators_), std::move(variants_), local_to_global_discriminators_);
}
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_)
{
return Base::create(std::move(local_discriminators_), std::move(offsets_), std::move(variants_));
}
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_)
{
return Base::create(std::move(local_discriminators_), std::move(offsets_), std::move(variants_), local_to_global_discriminators_);
}
std::string getName() const override;
const char * getFamilyName() const override { return "Variant"; }
TypeIndex getDataType() const override { return TypeIndex::Variant; }
MutableColumnPtr cloneEmpty() const override;
MutableColumnPtr cloneResized(size_t size) const override;
size_t ALWAYS_INLINE offsetAt(size_t i) const { return getOffsets()[i]; }
Discriminator ALWAYS_INLINE localDiscriminatorAt(size_t i) const { return getLocalDiscriminators()[i]; }
Discriminator ALWAYS_INLINE globalDiscriminatorAt(size_t i) const { return globalDiscriminatorByLocal(getLocalDiscriminators()[i]); }
Discriminator ALWAYS_INLINE globalDiscriminatorByLocal(Discriminator local_discr) const
{
/// NULL_DISCRIMINATOR is always the same in local and global orders.
return local_discr == NULL_DISCRIMINATOR ? NULL_DISCRIMINATOR : local_to_global_discriminators[local_discr];
}
Discriminator ALWAYS_INLINE localDiscriminatorByGlobal(Discriminator global_discr) const
{
/// NULL_DISCRIMINATOR is always the same in local and global orders.
return global_discr == NULL_DISCRIMINATOR ? NULL_DISCRIMINATOR : global_to_local_discriminators[global_discr];
}
size_t size() const override
{
return offsets->size();
}
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;
bool isDefaultAt(size_t n) const override;
bool isNullAt(size_t n) const override;
StringRef getDataAt(size_t n) const override;
void insertData(const char * pos, size_t length) override;
void insert(const Field & x) override;
void insertIntoVariant(const Field & x, Discriminator global_discr);
void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
void insertDefault() override;
void insertManyDefaults(size_t length) override;
void popBack(size_t n) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type>
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
/// Variant type is not comparable.
int compareAt(size_t, size_t, const IColumn &, int) const override
{
return 0;
}
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method compareColumn is not supported for ColumnVariant");
}
bool hasEqualValues() const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
void reserve(size_t n) override;
void ensureOwnership() override;
size_t byteSize() const override;
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
ColumnPtr compress() const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
void finalize() override;
bool isFinalized() const override;
const IColumn & getVariantByLocalDiscriminator(size_t discr) const { return *variants[discr]; }
const IColumn & getVariantByGlobalDiscriminator(size_t discr) const { return *variants[global_to_local_discriminators.at(discr)]; }
IColumn & getVariantByLocalDiscriminator(size_t discr) { return *variants[discr]; }
IColumn & getVariantByGlobalDiscriminator(size_t discr) { return *variants[global_to_local_discriminators.at(discr)]; }
const ColumnPtr & getVariantPtrByLocalDiscriminator(size_t discr) const { return variants[discr]; }
const ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) const { return variants[global_to_local_discriminators.at(discr)]; }
ColumnPtr & getVariantPtrByLocalDiscriminator(size_t discr) { return variants[discr]; }
ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) { return variants[global_to_local_discriminators.at(discr)]; }
const IColumn & getLocalDiscriminatorsColumn() const { return *local_discriminators; }
IColumn & getLocalDiscriminatorsColumn() { return *local_discriminators; }
const ColumnPtr & getLocalDiscriminatorsPtr() const { return local_discriminators; }
ColumnPtr & getLocalDiscriminatorsPtr() { return local_discriminators; }
const Discriminators & ALWAYS_INLINE getLocalDiscriminators() const { return assert_cast<const ColumnDiscriminators &>(*local_discriminators).getData(); }
Discriminators & ALWAYS_INLINE getLocalDiscriminators() { return assert_cast<ColumnDiscriminators &>(*local_discriminators).getData(); }
const IColumn & getOffsetsColumn() const { return *offsets; }
IColumn & getOffsetsColumn() { return *offsets; }
const ColumnPtr & getOffsetsPtr() const { return offsets; }
ColumnPtr & getOffsetsPtr() { return offsets; }
const Offsets & ALWAYS_INLINE getOffsets() const { return assert_cast<const ColumnOffsets &>(*offsets).getData(); }
Offsets & ALWAYS_INLINE getOffsets() { return assert_cast<ColumnOffsets &>(*offsets).getData(); }
size_t getNumVariants() const { return variants.size(); }
bool hasOnlyNulls() const
{
/// If all variants are empty, we have only NULL values.
return std::all_of(variants.begin(), variants.end(), [](const WrappedPtr & v){ return v->empty(); });
}
/// Check if local and global order is the same.
bool hasGlobalVariantsOrder() const
{
for (size_t i = 0; i != local_to_global_discriminators.size(); ++i)
{
if (local_to_global_discriminators[i] != i)
return false;
}
return true;
}
/// Check if we have only 1 non-empty variant and no NULL values,
/// and if so, return the discriminator of this non-empty column.
std::optional<Discriminator> getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls() const;
/// Apply null map to a Variant column.
/// Replace corresponding discriminators with NULL_DISCRIMINATOR
/// and filter out rows in variants if needed.
void applyNullMap(const ColumnVector<UInt8>::Container & null_map);
void applyNegatedNullMap(const ColumnVector<UInt8>::Container & null_map);
private:
void initIdentityGlobalToLocalDiscriminatorsMapping();
template <bool inverted>
void applyNullMapImpl(const ColumnVector<UInt8>::Container & null_map);
WrappedPtr local_discriminators;
WrappedPtr offsets;
NestedColumns variants;
std::vector<Discriminator> global_to_local_discriminators;
std::vector<Discriminator> local_to_global_discriminators;
};
}

View File

@ -2,6 +2,7 @@
#include <Columns/ColumnCompressed.h> #include <Columns/ColumnCompressed.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Columns/ColumnConst.h>
#include <Columns/MaskOperations.h> #include <Columns/MaskOperations.h>
#include <Columns/RadixSortHelper.h> #include <Columns/RadixSortHelper.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
@ -940,7 +941,7 @@ ColumnPtr ColumnVector<T>::compress() const
} }
template <typename T> template <typename T>
ColumnPtr ColumnVector<T>::createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const ColumnPtr ColumnVector<T>::createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
{ {
if (offsets.size() + shift != size()) if (offsets.size() + shift != size())
throw Exception(ErrorCodes::LOGICAL_ERROR, throw Exception(ErrorCodes::LOGICAL_ERROR,
@ -949,7 +950,7 @@ ColumnPtr ColumnVector<T>::createWithOffsets(const IColumn::Offsets & offsets, c
auto res = this->create(); auto res = this->create();
auto & res_data = res->getData(); auto & res_data = res->getData();
T default_value = static_cast<T>(default_field.safeGet<T>()); T default_value = assert_cast<const ColumnVector<T> &>(column_with_default_value.getDataColumn()).getElement(0);
res_data.resize_fill(total_rows, default_value); res_data.resize_fill(total_rows, default_value);
for (size_t i = 0; i < offsets.size(); ++i) for (size_t i = 0; i < offsets.size(); ++i)
res_data[offsets[i]] = data[i + shift]; res_data[offsets[i]] = data[i + shift];

View File

@ -300,7 +300,7 @@ public:
return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit); return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
} }
ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const override; ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override;
ColumnPtr compress() const override; ColumnPtr compress() const override;

View File

@ -2,6 +2,7 @@
#include <IO/Operators.h> #include <IO/Operators.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h> #include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Core/Field.h> #include <Core/Field.h>
#include <DataTypes/Serializations/SerializationInfo.h> #include <DataTypes/Serializations/SerializationInfo.h>
@ -34,7 +35,7 @@ void IColumn::insertFrom(const IColumn & src, size_t n)
insert(src[n]); insert(src[n]);
} }
ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
{ {
if (offsets.size() + shift != size()) if (offsets.size() + shift != size())
throw Exception(ErrorCodes::LOGICAL_ERROR, throw Exception(ErrorCodes::LOGICAL_ERROR,
@ -50,14 +51,14 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & defa
current_offset = offsets[i]; current_offset = offsets[i];
if (offsets_diff > 1) if (offsets_diff > 1)
res->insertMany(default_field, offsets_diff - 1); res->insertManyFrom(column_with_default_value.getDataColumn(), 0, offsets_diff - 1);
res->insertFrom(*this, i + shift); res->insertFrom(*this, i + shift);
} }
ssize_t offsets_diff = static_cast<ssize_t>(total_rows) - current_offset; ssize_t offsets_diff = static_cast<ssize_t>(total_rows) - current_offset;
if (offsets_diff > 1) if (offsets_diff > 1)
res->insertMany(default_field, offsets_diff - 1); res->insertManyFrom(column_with_default_value.getDataColumn(), 0, offsets_diff - 1);
return res; return res;
} }
@ -83,6 +84,11 @@ bool isColumnNullable(const IColumn & column)
return checkColumn<ColumnNullable>(column); return checkColumn<ColumnNullable>(column);
} }
bool isColumnNullableOrLowCardinalityNullable(const IColumn & column)
{
return isColumnNullable(column) || isColumnLowCardinalityNullable(column);
}
bool isColumnConst(const IColumn & column) bool isColumnConst(const IColumn & column)
{ {
return checkColumn<ColumnConst>(column); return checkColumn<ColumnConst>(column);

View File

@ -34,6 +34,7 @@ class Arena;
class ColumnGathererStream; class ColumnGathererStream;
class Field; class Field;
class WeakHash32; class WeakHash32;
class ColumnConst;
/* /*
* Represents a set of equal ranges in previous column to perform sorting in current column. * Represents a set of equal ranges in previous column to perform sorting in current column.
@ -459,10 +460,10 @@ public:
/// Returns column with @total_size elements. /// Returns column with @total_size elements.
/// In result column values from current column are at positions from @offsets. /// In result column values from current column are at positions from @offsets.
/// Other values are filled by @default_value. /// Other values are filled by value from @column_with_default_value.
/// @shift means how much rows to skip from the beginning of current column. /// @shift means how much rows to skip from the beginning of current column.
/// Used to create full column from sparse. /// Used to create full column from sparse.
[[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const; [[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const;
/// Compress column in memory to some representation that allows to decompress it back. /// Compress column in memory to some representation that allows to decompress it back.
/// Return itself if compression is not applicable for this column type. /// Return itself if compression is not applicable for this column type.
@ -659,4 +660,7 @@ bool isColumnConst(const IColumn & column);
/// True if column's an ColumnNullable instance. It's just a syntax sugar for type check. /// True if column's an ColumnNullable instance. It's just a syntax sugar for type check.
bool isColumnNullable(const IColumn & column); bool isColumnNullable(const IColumn & column);
/// True if column's is ColumnNullable or ColumnLowCardinality with nullable nested column.
bool isColumnNullableOrLowCardinalityNullable(const IColumn & column);
} }

View File

@ -17,7 +17,7 @@ namespace ErrorCodes
} }
template <typename T> template <typename T>
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted) void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted, T default_value)
{ {
if (mask.size() < data.size()) if (mask.size() < data.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mask size should be no less than data size."); throw Exception(ErrorCodes::LOGICAL_ERROR, "Mask size should be no less than data size.");
@ -38,7 +38,7 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
--from; --from;
} }
else else
data[index] = T(); data[index] = default_value;
--index; --index;
} }
@ -49,7 +49,7 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
/// Explicit instantiations - not to place the implementation of the function above in the header file. /// Explicit instantiations - not to place the implementation of the function above in the header file.
#define INSTANTIATE(TYPE) \ #define INSTANTIATE(TYPE) \
template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool); template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool, TYPE);
INSTANTIATE(UInt8) INSTANTIATE(UInt8)
INSTANTIATE(UInt16) INSTANTIATE(UInt16)

View File

@ -13,7 +13,7 @@ namespace DB
/// If inverted is true, we will work with inverted mask. This function is used in implementations of /// If inverted is true, we will work with inverted mask. This function is used in implementations of
/// expand() method in IColumn interface. /// expand() method in IColumn interface.
template <typename T> template <typename T>
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted); void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted, T default_value = T());
struct MaskInfo struct MaskInfo
{ {

View File

@ -0,0 +1,692 @@
#include <Columns/ColumnVariant.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <gtest/gtest.h>
using namespace DB;
TEST(ColumnVariant, CreateFromEmptyColumns)
{
MutableColumns columns;
columns.push_back(ColumnUInt32::create());
columns.push_back(ColumnString::create());
auto column = ColumnVariant::create(std::move(columns));
ASSERT_TRUE(column->empty() && column->getLocalDiscriminators().empty() && column->getOffsets().empty());
}
TEST(ColumnVariant, CreateFromEmptyColumnsWithLocalOrder)
{
MutableColumns columns;
columns.push_back(ColumnUInt32::create());
columns.push_back(ColumnString::create());
std::vector<ColumnVariant::Discriminator> local_to_global_discriminators;
local_to_global_discriminators.push_back(1);
local_to_global_discriminators.push_back(0);
auto column = ColumnVariant::create(std::move(columns), local_to_global_discriminators);
ASSERT_TRUE(column->empty() && column->getLocalDiscriminators().empty() && column->getOffsets().empty());
ASSERT_EQ(column->localDiscriminatorByGlobal(0), 0);
ASSERT_EQ(column->localDiscriminatorByGlobal(1), 1);
ASSERT_EQ(column->globalDiscriminatorByLocal(0), 0);
ASSERT_EQ(column->globalDiscriminatorByLocal(1), 1);
}
MutableColumns createColumns1()
{
MutableColumns columns;
auto column1 = ColumnUInt64::create();
column1->insertValue(42);
columns.push_back(std::move(column1));
auto column2 = ColumnString::create();
column2->insertData("Hello", 5);
column2->insertData("World", 5);
columns.push_back(std::move(column2));
auto column3 = ColumnUInt32::create();
columns.push_back(std::move(column3));
return columns;
}
MutableColumnPtr createDiscriminators1()
{
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
return discriminators_column;
}
void reorderColumns(const std::vector<ColumnVariant::Discriminator> & local_to_global_order, MutableColumns & columns)
{
MutableColumns res;
for (auto global_discr : local_to_global_order)
res.push_back(std::move(columns[global_discr]));
columns = std::move(res);
}
template <typename Ptr>
void reorderDiscriminators(const std::vector<ColumnVariant::Discriminator> & local_to_global_order, Ptr & discriminators)
{
std::vector<ColumnVariant::Discriminator> global_to_local_order(local_to_global_order.size());
for (size_t i = 0; i != local_to_global_order.size(); ++i)
global_to_local_order[local_to_global_order[i]] = i;
auto & discriminators_data = assert_cast<ColumnVariant::ColumnDiscriminators *>(discriminators.get())->getData();
for (auto & discr : discriminators_data)
{
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
discr = global_to_local_order[discr];
}
}
MutableColumnPtr createOffsets1()
{
auto offsets = ColumnVariant::ColumnOffsets::create();
offsets->insertValue(0);
offsets->insertValue(0);
offsets->insertValue(0);
offsets->insertValue(1);
offsets->insertValue(0);
return offsets;
}
std::vector<ColumnVariant::Discriminator> createLocalToGlobalOrder1()
{
std::vector<ColumnVariant::Discriminator> local_to_global_discriminators;
local_to_global_discriminators.push_back(1);
local_to_global_discriminators.push_back(2);
local_to_global_discriminators.push_back(0);
return local_to_global_discriminators;
}
void checkColumnVariant1(ColumnVariant * column)
{
const auto & offsets = column->getOffsets();
ASSERT_EQ(column->size(), 5);
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ(offsets[1], 0);
ASSERT_EQ(offsets[3], 1);
ASSERT_TRUE(column->isDefaultAt(2) && column->isDefaultAt(4));
ASSERT_EQ((*column)[0].get<UInt32>(), 42);
ASSERT_EQ((*column)[1].get<String>(), "Hello");
ASSERT_TRUE((*column)[2].isNull());
ASSERT_EQ((*column)[3].get<String>(), "World");
ASSERT_TRUE((*column)[4].isNull());
}
void checkColumnVariant1Order(ColumnVariant * column)
{
ASSERT_EQ(column->localDiscriminatorByGlobal(0), 2);
ASSERT_EQ(column->localDiscriminatorByGlobal(1), 0);
ASSERT_EQ(column->localDiscriminatorByGlobal(2), 1);
ASSERT_EQ(column->globalDiscriminatorByLocal(0), 1);
ASSERT_EQ(column->globalDiscriminatorByLocal(1), 2);
ASSERT_EQ(column->globalDiscriminatorByLocal(2), 0);
ASSERT_EQ(column->localDiscriminatorAt(0), 2);
ASSERT_EQ(column->localDiscriminatorAt(1), 0);
ASSERT_EQ(column->localDiscriminatorAt(2), ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(column->localDiscriminatorAt(3), 0);
ASSERT_EQ(column->localDiscriminatorAt(4), ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
ASSERT_EQ(column->globalDiscriminatorAt(1), 1);
ASSERT_EQ(column->globalDiscriminatorAt(2), ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(column->globalDiscriminatorAt(3), 1);
ASSERT_EQ(column->globalDiscriminatorAt(4), ColumnVariant::NULL_DISCRIMINATOR);
}
TEST(ColumnVariant, CreateFromDiscriminatorsAndColumns)
{
auto columns = createColumns1();
auto discriminators = createDiscriminators1();
auto column = ColumnVariant::create(std::move(discriminators), std::move(columns));
checkColumnVariant1(column.get());
}
TEST(ColumnVariant, CreateFromDiscriminatorsAndColumnsWithLocalOrder)
{
auto local_to_global_order = createLocalToGlobalOrder1();
auto columns = createColumns1();
reorderColumns(local_to_global_order, columns);
auto discriminators = createDiscriminators1();
reorderDiscriminators(local_to_global_order, discriminators);
auto column = ColumnVariant::create(std::move(discriminators), std::move(columns), createLocalToGlobalOrder1());
checkColumnVariant1(column.get());
checkColumnVariant1Order(column.get());
}
TEST(ColumnVariant, CreateFromDiscriminatorsOffsetsAndColumns)
{
auto columns = createColumns1();
auto discriminators = createDiscriminators1();
auto offsets = createOffsets1();
auto column = ColumnVariant::create(std::move(discriminators), std::move(offsets), std::move(columns));
checkColumnVariant1(column.get());
}
TEST(ColumnVariant, CreateFromDiscriminatorsOffsetsAndColumnsWithLocalOrder)
{
auto local_to_global_order = createLocalToGlobalOrder1();
auto columns = createColumns1();
reorderColumns(local_to_global_order, columns);
auto discriminators = createDiscriminators1();
reorderDiscriminators(local_to_global_order, discriminators);
auto offsets = createOffsets1();
auto column = ColumnVariant::create(std::move(discriminators), std::move(offsets), std::move(columns), createLocalToGlobalOrder1());
checkColumnVariant1(column.get());
checkColumnVariant1Order(column.get());
}
ColumnVariant::MutablePtr createVariantWithOneFullColumNoNulls(size_t size, bool change_order)
{
MutableColumns columns;
auto column1 = ColumnUInt64::create();
for (size_t i = 0; i != size; ++i)
column1->insertValue(i);
columns.push_back(std::move(column1));
auto column2 = ColumnString::create();
columns.push_back(std::move(column2));
auto column3 = ColumnUInt32::create();
columns.push_back(std::move(column3));
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
for (size_t i = 0; i != size; ++i)
discriminators_column->insertValue(0);
if (change_order)
{
auto local_to_global_order = createLocalToGlobalOrder1();
reorderColumns(local_to_global_order, columns);
reorderDiscriminators(local_to_global_order, discriminators_column);
return ColumnVariant::create(std::move(discriminators_column), std::move(columns), createLocalToGlobalOrder1());
}
return ColumnVariant::create(std::move(discriminators_column), std::move(columns));
}
TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(3, false);
const auto & offsets = column->getOffsets();
ASSERT_EQ(column->size(), 3);
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ(offsets[1], 1);
ASSERT_EQ(offsets[2], 2);
ASSERT_EQ((*column)[0].get<UInt64>(), 0);
ASSERT_EQ((*column)[1].get<UInt64>(), 1);
ASSERT_EQ((*column)[2].get<UInt64>(), 2);
}
TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNullsWithLocalOrder)
{
auto column = createVariantWithOneFullColumNoNulls(3, true);
const auto & offsets = column->getOffsets();
ASSERT_EQ(column->size(), 3);
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ(offsets[1], 1);
ASSERT_EQ(offsets[2], 2);
ASSERT_EQ((*column)[0].get<UInt64>(), 0);
ASSERT_EQ((*column)[1].get<UInt64>(), 1);
ASSERT_EQ((*column)[2].get<UInt64>(), 2);
ASSERT_EQ(column->localDiscriminatorAt(0), 2);
ASSERT_EQ(column->localDiscriminatorAt(1), 2);
ASSERT_EQ(column->localDiscriminatorAt(2), 2);
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
}
TEST(ColumnVariant, CloneResizedToEmpty)
{
auto column = ColumnVariant::create(createDiscriminators1(), createOffsets1(), createColumns1());
auto resized_column = column->cloneResized(0);
ASSERT_TRUE(resized_column->empty());
}
TEST(ColumnVariant, CloneResizedToLarge)
{
auto column = ColumnVariant::create(createDiscriminators1(), createOffsets1(), createColumns1());
auto resized_column = column->cloneResized(7);
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
ASSERT_EQ(resized_column_variant->size(), 7);
const auto & offsets = resized_column_variant->getOffsets();
for (size_t i = 0; i != 7; ++i)
{
if (i == 3)
ASSERT_EQ(offsets[i], 1);
else
ASSERT_EQ(offsets[i], 0);
}
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
std::vector<size_t> null_indexes = {2, 4, 5, 6};
for (size_t i : null_indexes)
ASSERT_EQ(discriminators[i], ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 1);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 2);
}
TEST(ColumnVariant, CloneResizedWithOneFullColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(5, false);
auto resized_column = column->cloneResized(3);
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
ASSERT_EQ(resized_column_variant->size(), 3);
const auto & offsets = resized_column_variant->getOffsets();
for (size_t i = 0; i != 3; ++i)
ASSERT_EQ(offsets[i], i);
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
for (size_t i = 0; i != 3; ++i)
ASSERT_EQ(discriminators[i], 0);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 3);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 0);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
}
MutableColumns createColumns2()
{
MutableColumns columns;
auto column1 = ColumnUInt64::create();
column1->insertValue(42);
column1->insertValue(43);
column1->insertValue(44);
columns.push_back(std::move(column1));
auto column2 = ColumnString::create();
column2->insertData("Hello", 5);
column2->insertData("World", 5);
columns.push_back(std::move(column2));
auto column3 = ColumnUInt8::create();
columns.push_back(std::move(column3));
return columns;
}
TEST(ColumnVariant, CloneResizedGeneral1)
{
/// D c1 c2 c3
/// 0 42 Hello
/// 1 43 World
/// NULL 44
/// 0
/// 1
/// NULL
/// 0
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
auto column = ColumnVariant::create(std::move(discriminators_column), createColumns2());
auto resized_column = column->cloneResized(4);
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
ASSERT_EQ(resized_column_variant->size(), 4);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 2);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 1);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
ASSERT_EQ(discriminators[0], 0);
ASSERT_EQ(discriminators[1], 1);
ASSERT_EQ(discriminators[2], ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(discriminators[3], 0);
const auto & offsets = resized_column_variant->getOffsets();
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ(offsets[1], 0);
ASSERT_EQ(offsets[3], 1);
ASSERT_EQ((*resized_column_variant)[0].get<UInt64>(), 42);
ASSERT_EQ((*resized_column_variant)[1].get<String>(), "Hello");
ASSERT_EQ((*resized_column_variant)[3].get<UInt64>(), 43);
}
TEST(ColumnVariant, CloneResizedGeneral2)
{
/// D c1 c2 c3
/// 0 42 Hello
/// NULL 43 World
/// NULL 44
/// 0
/// 1
/// 1
/// 0
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
discriminators_column->insertValue(0);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(1);
discriminators_column->insertValue(0);
auto column = ColumnVariant::create(std::move(discriminators_column), createColumns2());
auto resized_column = column->cloneResized(3);
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
ASSERT_EQ(resized_column_variant->size(), 3);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 1);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 0);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
ASSERT_EQ(discriminators[0], 0);
ASSERT_EQ(discriminators[1], ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(discriminators[2], ColumnVariant::NULL_DISCRIMINATOR);
const auto & offsets = resized_column_variant->getOffsets();
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ((*resized_column_variant)[0].get<UInt64>(), 42);
}
TEST(ColumnVariant, CloneResizedGeneral3)
{
/// D c1 c2 c3
/// 0 42 Hello
/// 1 43 World
/// 1 44
/// 0
/// NULL
/// NULL
/// 0
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(1);
discriminators_column->insertValue(0);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
auto column = ColumnVariant::create(std::move(discriminators_column), createColumns2());
auto resized_column = column->cloneResized(5);
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
ASSERT_EQ(resized_column_variant->size(), 5);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 2);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 2);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
ASSERT_EQ(discriminators[0], 0);
ASSERT_EQ(discriminators[1], 1);
ASSERT_EQ(discriminators[2], 1);
ASSERT_EQ(discriminators[3], 0);
const auto & offsets = resized_column_variant->getOffsets();
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ(offsets[1], 0);
ASSERT_EQ(offsets[2], 1);
ASSERT_EQ(offsets[3], 1);
ASSERT_EQ((*resized_column_variant)[0].get<UInt64>(), 42);
ASSERT_EQ((*resized_column_variant)[1].get<String>(), "Hello");
ASSERT_EQ((*resized_column_variant)[2].get<String>(), "World");
ASSERT_EQ((*resized_column_variant)[3].get<UInt64>(), 43);
}
MutableColumnPtr createDiscriminators2()
{
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
return discriminators_column;
}
std::vector<ColumnVariant::Discriminator> createLocalToGlobalOrder2()
{
std::vector<ColumnVariant::Discriminator> local_to_global_discriminators;
local_to_global_discriminators.push_back(2);
local_to_global_discriminators.push_back(0);
local_to_global_discriminators.push_back(1);
return local_to_global_discriminators;
}
ColumnVariant::MutablePtr createVariantColumn1(bool reorder)
{
auto columns = createColumns1();
auto discriminators = createDiscriminators1();
if (!reorder)
return ColumnVariant::create(std::move(discriminators), std::move(columns));
auto local_to_global_order = createLocalToGlobalOrder1();
reorderColumns(local_to_global_order, columns);
reorderDiscriminators(local_to_global_order, discriminators);
return ColumnVariant::create(std::move(discriminators), std::move(columns), local_to_global_order);
}
ColumnVariant::MutablePtr createVariantColumn2(bool reorder)
{
auto columns = createColumns2();
auto discriminators = createDiscriminators2();
if (!reorder)
return ColumnVariant::create(std::move(discriminators), std::move(columns));
auto local_to_global_order = createLocalToGlobalOrder2();
reorderColumns(local_to_global_order, columns);
reorderDiscriminators(local_to_global_order, discriminators);
return ColumnVariant::create(std::move(discriminators), std::move(columns), local_to_global_order);
}
TEST(ColumnVariant, InsertFrom)
{
for (bool change_order : {false, true})
{
auto column_to = createVariantColumn1(change_order);
auto column_from = createVariantColumn2(change_order);
column_to->insertFrom(*column_from, 3);
ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0);
ASSERT_EQ((*column_to)[5].get<UInt64>(), 43);
}
}
TEST(ColumnVariant, InsertRangeFromOneColumnNoNulls)
{
for (bool change_order : {false, true})
{
auto column_to = createVariantColumn2(change_order);
auto column_from = createVariantWithOneFullColumNoNulls(5, change_order);
column_to->insertRangeFrom(*column_from, 2, 2);
ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0);
ASSERT_EQ(column_to->globalDiscriminatorAt(8), 0);
ASSERT_EQ((*column_to)[7].get<UInt64>(), 2);
ASSERT_EQ((*column_to)[8].get<UInt64>(), 3);
}
}
TEST(ColumnVariant, InsertRangeFromGeneral)
{
for (bool change_order : {false, true})
{
auto column_to = createVariantColumn1(change_order);
auto column_from = createVariantColumn2(change_order);
column_to->insertRangeFrom(*column_from, 1, 4);
ASSERT_EQ(column_to->globalDiscriminatorAt(5), 1);
ASSERT_EQ(column_to->globalDiscriminatorAt(6), ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0);
ASSERT_EQ(column_to->globalDiscriminatorAt(8), 1);
ASSERT_EQ((*column_to)[5].get<String>(), "Hello");
ASSERT_EQ((*column_to)[7].get<UInt64>(), 43);
ASSERT_EQ((*column_to)[8].get<String>(), "World");
}
}
TEST(ColumnVariant, InsertManyFrom)
{
for (bool change_order : {false, true})
{
auto column_to = createVariantColumn1(change_order);
auto column_from = createVariantColumn2(change_order);
column_to->insertManyFrom(*column_from, 3, 2);
ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0);
ASSERT_EQ(column_to->globalDiscriminatorAt(6), 0);
ASSERT_EQ((*column_to)[5].get<UInt64>(), 43);
ASSERT_EQ((*column_to)[6].get<UInt64>(), 43);
}
}
TEST(ColumnVariant, PopBackOneColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(5, false);
column->popBack(3);
ASSERT_EQ(column->size(), 2);
ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 2);
ASSERT_EQ((*column)[0].get<UInt64>(), 0);
ASSERT_EQ((*column)[1].get<UInt64>(), 1);
}
TEST(ColumnVariant, PopBackGeneral)
{
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
column->popBack(4);
ASSERT_EQ(column->size(), 3);
ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 1);
ASSERT_EQ(column->getVariantByLocalDiscriminator(1).size(), 1);
ASSERT_EQ((*column)[0].get<UInt64>(), 42);
ASSERT_EQ((*column)[1].get<String>(), "Hello");
ASSERT_TRUE((*column)[2].isNull());
}
TEST(ColumnVariant, FilterOneColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(3, false);
IColumn::Filter filter;
filter.push_back(1);
filter.push_back(0);
filter.push_back(1);
auto filtered_column = column->filter(filter, -1);
ASSERT_EQ(filtered_column->size(), 2);
ASSERT_EQ((*filtered_column)[0].get<UInt64>(), 0);
ASSERT_EQ((*filtered_column)[1].get<UInt64>(), 2);
}
TEST(ColumnVariant, FilterGeneral)
{
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
IColumn::Filter filter;
filter.push_back(0);
filter.push_back(1);
filter.push_back(1);
filter.push_back(0);
filter.push_back(0);
filter.push_back(1);
filter.push_back(0);
auto filtered_column = column->filter(filter, -1);
ASSERT_EQ(filtered_column->size(), 3);
ASSERT_EQ((*filtered_column)[0].get<String>(), "Hello");
ASSERT_TRUE((*filtered_column)[1].isNull());
ASSERT_TRUE((*filtered_column)[2].isNull());
}
TEST(ColumnVariant, PermuteAndIndexOneColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(4, false);
IColumn::Permutation permutation;
permutation.push_back(1);
permutation.push_back(3);
permutation.push_back(2);
permutation.push_back(0);
auto permuted_column = column->permute(permutation, 3);
ASSERT_EQ(permuted_column->size(), 3);
ASSERT_EQ((*permuted_column)[0].get<UInt64>(), 1);
ASSERT_EQ((*permuted_column)[1].get<UInt64>(), 3);
ASSERT_EQ((*permuted_column)[2].get<UInt64>(), 2);
auto index = ColumnUInt64::create();
index->getData().push_back(1);
index->getData().push_back(3);
index->getData().push_back(2);
index->getData().push_back(0);
auto indexed_column = column->index(*index, 3);
ASSERT_EQ(indexed_column->size(), 3);
ASSERT_EQ((*indexed_column)[0].get<UInt64>(), 1);
ASSERT_EQ((*indexed_column)[1].get<UInt64>(), 3);
ASSERT_EQ((*indexed_column)[2].get<UInt64>(), 2);
}
TEST(ColumnVariant, PermuteGeneral)
{
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
IColumn::Permutation permutation;
permutation.push_back(3);
permutation.push_back(4);
permutation.push_back(1);
permutation.push_back(5);
auto permuted_column = column->permute(permutation, 4);
ASSERT_EQ(permuted_column->size(), 4);
ASSERT_EQ((*permuted_column)[0].get<UInt64>(), 43);
ASSERT_EQ((*permuted_column)[1].get<String>(), "World");
ASSERT_EQ((*permuted_column)[2].get<String>(), "Hello");
ASSERT_TRUE((*permuted_column)[3].isNull());
}
TEST(ColumnVariant, ReplicateOneColumnNoNull)
{
auto column = createVariantWithOneFullColumNoNulls(3, false);
IColumn::Offsets offsets;
offsets.push_back(0);
offsets.push_back(3);
offsets.push_back(6);
auto replicated_column = column->replicate(offsets);
ASSERT_EQ(replicated_column->size(), 6);
ASSERT_EQ((*replicated_column)[0].get<UInt64>(), 1);
ASSERT_EQ((*replicated_column)[1].get<UInt64>(), 1);
ASSERT_EQ((*replicated_column)[2].get<UInt64>(), 1);
ASSERT_EQ((*replicated_column)[3].get<UInt64>(), 2);
ASSERT_EQ((*replicated_column)[4].get<UInt64>(), 2);
ASSERT_EQ((*replicated_column)[5].get<UInt64>(), 2);
}
TEST(ColumnVariant, ReplicateGeneral)
{
auto column = ColumnVariant::create(createDiscriminators1(), createColumns1());
IColumn::Offsets offsets;
offsets.push_back(1);
offsets.push_back(3);
offsets.push_back(5);
offsets.push_back(5);
offsets.push_back(7);
auto replicated_column = column->replicate(offsets);
ASSERT_EQ(replicated_column->size(), 7);
ASSERT_EQ((*replicated_column)[0].get<UInt64>(), 42);
ASSERT_EQ((*replicated_column)[1].get<String>(), "Hello");
ASSERT_EQ((*replicated_column)[2].get<String>(), "Hello");
ASSERT_TRUE((*replicated_column)[3].isNull());
ASSERT_TRUE((*replicated_column)[4].isNull());
ASSERT_TRUE((*replicated_column)[5].isNull());
ASSERT_TRUE((*replicated_column)[6].isNull());
}
TEST(ColumnVariant, ScatterOneColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(5, false);
IColumn::Selector selector;
selector.push_back(0);
selector.push_back(1);
selector.push_back(2);
selector.push_back(0);
selector.push_back(1);
auto columns = column->scatter(3, selector);
ASSERT_EQ(columns[0]->size(), 2);
ASSERT_EQ((*columns[0])[0].get<UInt64>(), 0);
ASSERT_EQ((*columns[0])[1].get<UInt64>(), 3);
ASSERT_EQ(columns[1]->size(), 2);
ASSERT_EQ((*columns[1])[0].get<UInt64>(), 1);
ASSERT_EQ((*columns[1])[1].get<UInt64>(), 4);
ASSERT_EQ(columns[2]->size(), 1);
ASSERT_EQ((*columns[2])[0].get<UInt64>(), 2);
}
TEST(ColumnVariant, ScatterGeneral)
{
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
IColumn::Selector selector;
selector.push_back(0);
selector.push_back(0);
selector.push_back(2);
selector.push_back(0);
selector.push_back(1);
selector.push_back(2);
selector.push_back(1);
auto columns = column->scatter(3, selector);
ASSERT_EQ(columns[0]->size(), 3);
ASSERT_EQ((*columns[0])[0].get<UInt64>(), 42);
ASSERT_EQ((*columns[0])[1].get<String>(), "Hello");
ASSERT_EQ((*columns[0])[2].get<UInt64>(), 43);
ASSERT_EQ(columns[1]->size(), 2);
ASSERT_EQ((*columns[1])[0].get<String>(), "World");
ASSERT_EQ((*columns[1])[1].get<UInt64>(), 44);
ASSERT_EQ(columns[2]->size(), 2);
ASSERT_TRUE((*columns[2])[0].isNull());
ASSERT_TRUE((*columns[2])[1].isNull());
}

51
src/Common/AtomicLogger.h Normal file
View File

@ -0,0 +1,51 @@
#pragma once
#include <memory>
#include <Common/Logger.h>
#include <Common/SharedMutex.h>
#include <Common/SharedLockGuard.h>
/** AtomicLogger allows to atomically change logger.
* Standard library does not have atomic_shared_ptr, and we do not use std::atomic* operations,
* because standard library implementation uses fixed table of mutexes, and it is better to avoid contention here.
*/
class AtomicLogger
{
public:
explicit AtomicLogger(LoggerPtr logger_)
: logger(std::move(logger_))
{}
explicit AtomicLogger(const std::string & log_name)
: AtomicLogger(::getLogger(log_name))
{}
void store(LoggerPtr new_logger)
{
std::lock_guard lock(log_mutex);
logger = std::move(new_logger);
}
void store(const std::string & new_log_name)
{
auto new_logger = ::getLogger(new_log_name);
store(std::move(new_logger));
}
LoggerPtr load() const
{
DB::SharedLockGuard lock(log_mutex);
return logger;
}
String loadName() const
{
DB::SharedLockGuard lock(log_mutex);
return logger->name();
}
private:
mutable DB::SharedMutex log_mutex;
LoggerPtr logger;
};

View File

@ -87,7 +87,7 @@
M(MarksLoaderThreads, "Number of threads in thread pool for loading marks.") \ M(MarksLoaderThreads, "Number of threads in thread pool for loading marks.") \
M(MarksLoaderThreadsActive, "Number of threads in the thread pool for loading marks running a task.") \ M(MarksLoaderThreadsActive, "Number of threads in the thread pool for loading marks running a task.") \
M(MarksLoaderThreadsScheduled, "Number of queued or active jobs in the thread pool for loading marks.") \ M(MarksLoaderThreadsScheduled, "Number of queued or active jobs in the thread pool for loading marks.") \
M(IOPrefetchThreads, "Number of threads in the IO prefertch thread pool.") \ M(IOPrefetchThreads, "Number of threads in the IO prefetch thread pool.") \
M(IOPrefetchThreadsActive, "Number of threads in the IO prefetch thread pool running a task.") \ M(IOPrefetchThreadsActive, "Number of threads in the IO prefetch thread pool running a task.") \
M(IOPrefetchThreadsScheduled, "Number of queued or active jobs in the IO prefetch thread pool.") \ M(IOPrefetchThreadsScheduled, "Number of queued or active jobs in the IO prefetch thread pool.") \
M(IOWriterThreads, "Number of threads in the IO writer thread pool.") \ M(IOWriterThreads, "Number of threads in the IO writer thread pool.") \

View File

@ -592,6 +592,7 @@
M(710, FAULT_INJECTED) \ M(710, FAULT_INJECTED) \
M(711, FILECACHE_ACCESS_DENIED) \ M(711, FILECACHE_ACCESS_DENIED) \
M(712, TOO_MANY_MATERIALIZED_VIEWS) \ M(712, TOO_MANY_MATERIALIZED_VIEWS) \
M(713, BROKEN_PROJECTION) \
\ \
M(999, KEEPER_EXCEPTION) \ M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \ M(1000, POCO_EXCEPTION) \

View File

@ -257,6 +257,11 @@ void tryLogCurrentException(LoggerPtr logger, const std::string & start_of_messa
tryLogCurrentException(logger.get(), start_of_message); tryLogCurrentException(logger.get(), start_of_message);
} }
void tryLogCurrentException(const AtomicLogger & logger, const std::string & start_of_message)
{
tryLogCurrentException(logger.load(), start_of_message);
}
static void getNoSpaceLeftInfoMessage(std::filesystem::path path, String & msg) static void getNoSpaceLeftInfoMessage(std::filesystem::path path, String & msg)
{ {
path = std::filesystem::absolute(path); path = std::filesystem::absolute(path);
@ -529,6 +534,11 @@ void tryLogException(std::exception_ptr e, LoggerPtr logger, const std::string &
} }
} }
void tryLogException(std::exception_ptr e, const AtomicLogger & logger, const std::string & start_of_message)
{
tryLogException(e, logger.load(), start_of_message);
}
std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace) std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace)
{ {
return getExceptionMessageAndPattern(e, with_stacktrace, check_embedded_stacktrace).text; return getExceptionMessageAndPattern(e, with_stacktrace, check_embedded_stacktrace).text;

View File

@ -11,6 +11,7 @@
#include <base/scope_guard.h> #include <base/scope_guard.h>
#include <Common/LoggingFormatStringHelpers.h> #include <Common/LoggingFormatStringHelpers.h>
#include <Common/Logger.h> #include <Common/Logger.h>
#include <Common/AtomicLogger.h>
#include <Common/StackTrace.h> #include <Common/StackTrace.h>
#include <fmt/format.h> #include <fmt/format.h>
@ -245,6 +246,7 @@ using Exceptions = std::vector<std::exception_ptr>;
void tryLogCurrentException(const char * log_name, const std::string & start_of_message = ""); void tryLogCurrentException(const char * log_name, const std::string & start_of_message = "");
void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = ""); void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = "");
void tryLogCurrentException(LoggerPtr logger, const std::string & start_of_message = ""); void tryLogCurrentException(LoggerPtr logger, const std::string & start_of_message = "");
void tryLogCurrentException(const AtomicLogger & logger, const std::string & start_of_message = "");
/** Prints current exception in canonical format. /** Prints current exception in canonical format.
@ -290,6 +292,7 @@ struct ExecutionStatus
/// TODO: Logger leak constexpr overload /// TODO: Logger leak constexpr overload
void tryLogException(std::exception_ptr e, const char * log_name, const std::string & start_of_message = ""); void tryLogException(std::exception_ptr e, const char * log_name, const std::string & start_of_message = "");
void tryLogException(std::exception_ptr e, LoggerPtr logger, const std::string & start_of_message = ""); void tryLogException(std::exception_ptr e, LoggerPtr logger, const std::string & start_of_message = "");
void tryLogException(std::exception_ptr e, const AtomicLogger & logger, const std::string & start_of_message = "");
std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace = false); std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace = false);
PreformattedMessage getExceptionMessageAndPattern(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace = false); PreformattedMessage getExceptionMessageAndPattern(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace = false);

View File

@ -71,6 +71,28 @@ struct StringHashTableHash
res = _mm_crc32_u64(res, key.c); res = _mm_crc32_u64(res, key.c);
return res; return res;
} }
#elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
size_t ALWAYS_INLINE operator()(StringKey8 key) const
{
size_t res = -1ULL;
res = __crc32cd(static_cast<UInt32>(res), key);
return res;
}
size_t ALWAYS_INLINE operator()(StringKey16 key) const
{
size_t res = -1ULL;
res = __crc32cd(static_cast<UInt32>(res), key.items[0]);
res = __crc32cd(static_cast<UInt32>(res), key.items[1]);
return res;
}
size_t ALWAYS_INLINE operator()(StringKey24 key) const
{
size_t res = -1ULL;
res = __crc32cd(static_cast<UInt32>(res), key.a);
res = __crc32cd(static_cast<UInt32>(res), key.b);
res = __crc32cd(static_cast<UInt32>(res), key.c);
return res;
}
#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
size_t ALWAYS_INLINE operator()(StringKey8 key) const size_t ALWAYS_INLINE operator()(StringKey8 key) const
{ {

View File

@ -43,7 +43,6 @@ LoggerRawPtr getRawLogger(const std::string & name);
LoggerRawPtr createRawLogger(const std::string & name, Poco::Channel * channel, Poco::Message::Priority level = Poco::Message::PRIO_INFORMATION); LoggerRawPtr createRawLogger(const std::string & name, Poco::Channel * channel, Poco::Message::Priority level = Poco::Message::PRIO_INFORMATION);
/** Returns true, if currently Logger with specified name is created. /** Returns true, if currently Logger with specified name is created.
* Otherwise, returns false. * Otherwise, returns false.
*/ */

View File

@ -1,6 +1,7 @@
#if defined(__ELF__) && !defined(OS_FREEBSD) #if defined(__ELF__) && !defined(OS_FREEBSD)
#include <Common/SymbolIndex.h> #include <Common/SymbolIndex.h>
#include <Common/MemorySanitizer.h>
#include <algorithm> #include <algorithm>
#include <optional> #include <optional>
@ -55,21 +56,6 @@ Otherwise you will get only exported symbols from program headers.
*/ */
#if defined(__clang__)
# pragma clang diagnostic ignored "-Wreserved-id-macro"
# pragma clang diagnostic ignored "-Wunused-macros"
#endif
#define __msan_unpoison_string(X) // NOLINT
#define __msan_unpoison(X, Y) // NOLINT
#if defined(ch_has_feature)
# if ch_has_feature(memory_sanitizer)
# undef __msan_unpoison_string
# undef __msan_unpoison
# include <sanitizer/msan_interface.h>
# endif
#endif
namespace DB namespace DB
{ {

View File

@ -9,6 +9,7 @@
#include <Common/ProfileEvents.h> #include <Common/ProfileEvents.h>
#include <Common/LoggingFormatStringHelpers.h> #include <Common/LoggingFormatStringHelpers.h>
#include <Common/Logger.h> #include <Common/Logger.h>
#include <Common/AtomicLogger.h>
namespace Poco { class Logger; } namespace Poco { class Logger; }
@ -20,7 +21,8 @@ using LogSeriesLimiterPtr = std::shared_ptr<LogSeriesLimiter>;
namespace namespace
{ {
[[maybe_unused]] const ::Poco::Logger * getLoggerHelper(const LoggerPtr & logger) { return logger.get(); } [[maybe_unused]] LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; }
[[maybe_unused]] LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); }
[[maybe_unused]] const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; } [[maybe_unused]] const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; }
[[maybe_unused]] std::unique_ptr<LogToStrImpl> getLoggerHelper(std::unique_ptr<LogToStrImpl> && logger) { return logger; } [[maybe_unused]] std::unique_ptr<LogToStrImpl> getLoggerHelper(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
[[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLoggerHelper(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; } [[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLoggerHelper(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }

View File

@ -32,7 +32,6 @@ TEST(Logger, TestLog)
LOG_TEST(log, "Hello World"); LOG_TEST(log, "Hello World");
EXPECT_EQ(oss.str(), "Hello World\n"); EXPECT_EQ(oss.str(), "Hello World\n");
Poco::Logger::destroy("TestLogger");
} }
{ /// Test logs invisible for other levels { /// Test logs invisible for other levels
@ -45,8 +44,6 @@ TEST(Logger, TestLog)
LOG_TEST(log, "Hello World"); LOG_TEST(log, "Hello World");
EXPECT_EQ(oss.str(), ""); EXPECT_EQ(oss.str(), "");
Poco::Logger::destroy(std::string{level} + "_Logger");
} }
} }

View File

@ -6,14 +6,15 @@
#include <Compression/CompressionFactory.h> #include <Compression/CompressionFactory.h>
#include <Compression/CompressionInfo.h> #include <Compression/CompressionInfo.h>
#include <Poco/Logger.h> #include <Poco/Logger.h>
#include <Common/randomSeed.h>
#include <Common/logger_useful.h>
#include "libaccel_config.h"
#include <Common/MemorySanitizer.h> #include <Common/MemorySanitizer.h>
#include <Common/logger_useful.h>
#include <Common/randomSeed.h>
#include <base/scope_guard.h> #include <base/scope_guard.h>
#include <base/getPageSize.h> #include <base/getPageSize.h>
#include <immintrin.h>
#include "libaccel_config.h"
#include <immintrin.h>
namespace DB namespace DB
{ {
@ -416,9 +417,7 @@ UInt32 CompressionCodecDeflateQpl::doCompressData(const char * source, UInt32 so
{ {
/// QPL library is using AVX-512 with some shuffle operations. /// QPL library is using AVX-512 with some shuffle operations.
/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. /// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
#if defined(MEMORY_SANITIZER)
__msan_unpoison(dest, getMaxCompressedDataSize(source_size)); __msan_unpoison(dest, getMaxCompressedDataSize(source_size));
#endif
Int32 res = HardwareCodecDeflateQpl::RET_ERROR; Int32 res = HardwareCodecDeflateQpl::RET_ERROR;
if (DeflateQplJobHWPool::instance().isJobPoolReady()) if (DeflateQplJobHWPool::instance().isJobPoolReady())
res = hw_codec->doCompressData(source, source_size, dest, getMaxCompressedDataSize(source_size)); res = hw_codec->doCompressData(source, source_size, dest, getMaxCompressedDataSize(source_size));
@ -439,9 +438,7 @@ void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 so
{ {
/// QPL library is using AVX-512 with some shuffle operations. /// QPL library is using AVX-512 with some shuffle operations.
/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. /// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
#if defined(MEMORY_SANITIZER)
__msan_unpoison(dest, uncompressed_size); __msan_unpoison(dest, uncompressed_size);
#endif
/// Device IOTLB miss has big perf. impact for IAA accelerators. /// Device IOTLB miss has big perf. impact for IAA accelerators.
/// To avoid page fault, we need touch buffers related to accelerator in advance. /// To avoid page fault, we need touch buffers related to accelerator in advance.
touchBufferWithZeroFilling(dest, uncompressed_size); touchBufferWithZeroFilling(dest, uncompressed_size);

View File

@ -85,6 +85,7 @@ class IColumn;
M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \ M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \ M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \
M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \
M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \ M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \
M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
@ -153,7 +154,7 @@ class IColumn;
M(Float, totals_auto_threshold, 0.5, "The threshold for totals_mode = 'auto'.", 0) \ M(Float, totals_auto_threshold, 0.5, "The threshold for totals_mode = 'auto'.", 0) \
\ \
M(Bool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \ M(Bool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \
M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \ M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misuse", 0) \
M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \ M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \
M(Bool, allow_suspicious_ttl_expressions, false, "Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.", 0) \ M(Bool, allow_suspicious_ttl_expressions, false, "Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.", 0) \
M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \ M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \
@ -252,7 +253,7 @@ class IColumn;
M(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "Minimal type in query_log to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \ M(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "Minimal type in query_log to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \
M(Milliseconds, log_queries_min_query_duration_ms, 0, "Minimal time for the query to run, to get to the query_log/query_thread_log/query_views_log.", 0) \ M(Milliseconds, log_queries_min_query_duration_ms, 0, "Minimal time for the query to run, to get to the query_log/query_thread_log/query_views_log.", 0) \
M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \ M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \
M(Float, log_queries_probability, 1., "Log queries with the specified probabality.", 0) \ M(Float, log_queries_probability, 1., "Log queries with the specified probability.", 0) \
\ \
M(Bool, log_processors_profiles, false, "Log Processors profile events.", 0) \ M(Bool, log_processors_profiles, false, "Log Processors profile events.", 0) \
M(DistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, "How are distributed subqueries performed inside IN or JOIN sections?", IMPORTANT) \ M(DistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, "How are distributed subqueries performed inside IN or JOIN sections?", IMPORTANT) \
@ -260,8 +261,8 @@ class IColumn;
M(UInt64, max_concurrent_queries_for_all_users, 0, "The maximum number of concurrent requests for all users.", 0) \ M(UInt64, max_concurrent_queries_for_all_users, 0, "The maximum number of concurrent requests for all users.", 0) \
M(UInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.", 0) \ M(UInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.", 0) \
\ \
M(Bool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be performed", 0) \ M(Bool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of inserting blocks should be performed", 0) \
M(Bool, async_insert_deduplicate, false, "For async INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be performed", 0) \ M(Bool, async_insert_deduplicate, false, "For async INSERT queries in the replicated table, specifies that deduplication of inserting blocks should be performed", 0) \
\ \
M(UInt64Auto, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled, 'auto' - use majority", 0) \ M(UInt64Auto, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled, 'auto' - use majority", 0) \
M(Milliseconds, insert_quorum_timeout, 600000, "If the quorum of replicas did not meet in specified time (in milliseconds), exception will be thrown and insertion is aborted.", 0) \ M(Milliseconds, insert_quorum_timeout, 600000, "If the quorum of replicas did not meet in specified time (in milliseconds), exception will be thrown and insertion is aborted.", 0) \
@ -586,10 +587,11 @@ class IColumn;
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \
M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped taraget table during pushing to views", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \
M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \
M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW <name> afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \ M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW <name> afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
@ -763,8 +765,8 @@ class IColumn;
\ \
M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \ M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \
M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \ M(Bool, enable_filesystem_read_prefetches_log, false, "Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default", 0) \
M(Bool, allow_prefetched_read_pool_for_remote_filesystem, true, "Prefer prefethed threadpool if all parts are on remote filesystem", 0) \ M(Bool, allow_prefetched_read_pool_for_remote_filesystem, true, "Prefer prefetched threadpool if all parts are on remote filesystem", 0) \
M(Bool, allow_prefetched_read_pool_for_local_filesystem, false, "Prefer prefethed threadpool if all parts are on remote filesystem", 0) \ M(Bool, allow_prefetched_read_pool_for_local_filesystem, false, "Prefer prefetched threadpool if all parts are on remote filesystem", 0) \
\ \
M(UInt64, prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the prefetch buffer to read from the filesystem.", 0) \ M(UInt64, prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the prefetch buffer to read from the filesystem.", 0) \
M(UInt64, filesystem_prefetch_step_bytes, 0, "Prefetch step in bytes. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \ M(UInt64, filesystem_prefetch_step_bytes, 0, "Prefetch step in bytes. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \
@ -809,7 +811,7 @@ class IColumn;
\ \
M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \ M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \
\ \
M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \ M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelization of query processing right after reading from storage if possible", 0) \
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \ M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
M(Bool, count_distinct_optimization, false, "Rewrite count distinct to subquery of group by", 0) \ M(Bool, count_distinct_optimization, false, "Rewrite count distinct to subquery of group by", 0) \
M(Bool, throw_if_no_data_to_insert, true, "Allows or forbids empty INSERTs, enabled by default (throws an error on an empty insert)", 0) \ M(Bool, throw_if_no_data_to_insert, true, "Allows or forbids empty INSERTs, enabled by default (throws an error on an empty insert)", 0) \
@ -827,6 +829,7 @@ class IColumn;
M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \ M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \
M(Bool, use_with_fill_by_sorting_prefix, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently", 0) \ M(Bool, use_with_fill_by_sorting_prefix, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently", 0) \
M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \ M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \
M(Bool, use_variant_as_common_type, false, "Use Variant as a result type for if/multiIf in case when there is no common type for arguments", 0) \
\ \
/** Experimental functions */ \ /** Experimental functions */ \
M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
@ -834,6 +837,7 @@ class IColumn;
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
@ -1101,7 +1105,7 @@ class IColumn;
M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \ M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \
\ \
M(Bool, exact_rows_before_limit, false, "When enabled, ClickHouse will provide exact value for rows_before_limit_at_least statistic, but with the cost that the data before limit will have to be read completely", 0) \ M(Bool, exact_rows_before_limit, false, "When enabled, ClickHouse will provide exact value for rows_before_limit_at_least statistic, but with the cost that the data before limit will have to be read completely", 0) \
M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if there're joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible", 0) \ M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if there are joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible", 0) \
\ \
M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
M(Bool, output_format_arrow_use_signed_indexes_for_dictionary, true, "Use signed integers for dictionary indexes in Arrow format", 0) \ M(Bool, output_format_arrow_use_signed_indexes_for_dictionary, true, "Use signed integers for dictionary indexes in Arrow format", 0) \

View File

@ -87,6 +87,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
{"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
{"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},
{"allow_experimental_variant_type", false, false, "Add new experimental Variant type"},
{"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"},
{"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"},
{"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"},
{"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"},
@ -100,7 +102,9 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
{"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"},
{"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"},
{"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}}}, {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"},
{"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"},
{"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}}},
{"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
{"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},

View File

@ -49,6 +49,7 @@ enum class TypeIndex
IPv4, IPv4,
IPv6, IPv6,
JSONPaths, JSONPaths,
Variant,
}; };
/** /**

View File

@ -12,6 +12,7 @@ public:
static constexpr auto family_name = "Date"; static constexpr auto family_name = "Date";
TypeIndex getTypeId() const override { return TypeIndex::Date; } TypeIndex getTypeId() const override { return TypeIndex::Date; }
TypeIndex getColumnType() const override { return TypeIndex::UInt16; }
const char * getFamilyName() const override { return family_name; } const char * getFamilyName() const override { return family_name; }
bool canBeUsedAsVersion() const override { return true; } bool canBeUsedAsVersion() const override { return true; }

View File

@ -12,6 +12,7 @@ public:
static constexpr auto family_name = "Date32"; static constexpr auto family_name = "Date32";
TypeIndex getTypeId() const override { return TypeIndex::Date32; } TypeIndex getTypeId() const override { return TypeIndex::Date32; }
TypeIndex getColumnType() const override { return TypeIndex::Int32; }
const char * getFamilyName() const override { return family_name; } const char * getFamilyName() const override { return family_name; }
Field getDefault() const override Field getDefault() const override

View File

@ -40,6 +40,7 @@ public:
const char * getFamilyName() const override { return family_name; } const char * getFamilyName() const override { return family_name; }
String doGetName() const override; String doGetName() const override;
TypeIndex getTypeId() const override { return TypeIndex::DateTime; } TypeIndex getTypeId() const override { return TypeIndex::DateTime; }
TypeIndex getColumnType() const override { return TypeIndex::UInt32; }
bool canBeUsedAsVersion() const override { return true; } bool canBeUsedAsVersion() const override { return true; }
bool canBeInsideNullable() const override { return true; } bool canBeInsideNullable() const override { return true; }

Some files were not shown because too many files have changed in this diff Show More