Merge remote-tracking branch 'rschu1ze/master' into cgroup-observing-memory-tracker

This commit is contained in:
Robert Schulze 2024-02-10 13:53:19 +00:00
commit 3a4b6a2379
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
128 changed files with 3127 additions and 658 deletions

View File

@ -318,15 +318,19 @@ jobs:
run_command: |
python3 build_report_check.py "$CHECK_NAME"
MarkReleaseReady:
needs: [RunConfig, BuilderBinDarwin, BuilderBinDarwinAarch64, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Mark Commit Release Ready
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 mark_release_ready.py
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
- BuilderDebRelease
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Mark Commit Release Ready
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 mark_release_ready.py
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################
@ -385,6 +389,22 @@ jobs:
test_name: Stateless tests (release, s3 storage)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestS3Debug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (debug, s3 storage)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestS3Tsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (tsan, s3 storage)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
@ -493,6 +513,55 @@ jobs:
test_name: Stateful tests (debug)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
# Parallel replicas
FunctionalStatefulTestDebugParallelReplicas:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (debug, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestUBsanParallelReplicas:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (ubsan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestMsanParallelReplicas:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (msan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestTsanParallelReplicas:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (tsan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestAsanParallelReplicas:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (asan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestReleaseParallelReplicas:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (release, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
########################### ClickBench #######################################################
##############################################################################################
@ -700,6 +769,28 @@ jobs:
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
############################ SQLLOGIC TEST ###################################################
##############################################################################################
SQLLogicTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Sqllogic test (release)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
##################################### SQL TEST ###############################################
##############################################################################################
SQLTest:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: SQLTest
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
###################################### SQLANCER FUZZERS ######################################
##############################################################################################
SQLancerTestRelease:
@ -732,6 +823,8 @@ jobs:
- FunctionalStatelessTestTsan
- FunctionalStatelessTestMsan
- FunctionalStatelessTestUBsan
- FunctionalStatelessTestS3Debug
- FunctionalStatelessTestS3Tsan
- FunctionalStatefulTestDebug
- FunctionalStatefulTestRelease
- FunctionalStatefulTestAarch64
@ -739,6 +832,12 @@ jobs:
- FunctionalStatefulTestTsan
- FunctionalStatefulTestMsan
- FunctionalStatefulTestUBsan
- FunctionalStatefulTestDebugParallelReplicas
- FunctionalStatefulTestUBsanParallelReplicas
- FunctionalStatefulTestMsanParallelReplicas
- FunctionalStatefulTestTsanParallelReplicas
- FunctionalStatefulTestAsanParallelReplicas
- FunctionalStatefulTestReleaseParallelReplicas
- StressTestDebug
- StressTestAsan
- StressTestTsan
@ -764,6 +863,8 @@ jobs:
- UnitTestsReleaseClang
- SQLancerTestRelease
- SQLancerTestDebug
- SQLLogicTestRelease
- SQLTest
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code

View File

@ -1002,7 +1002,7 @@ jobs:
####################################### libFuzzer ###########################################
#############################################################################################
libFuzzer:
if: ${{ !failure() && !cancelled() && contains(github.event.pull_request.labels.*.name, 'libFuzzer') }}
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, StyleCheck]
uses: ./.github/workflows/libfuzzer.yml
with:

View File

@ -91,6 +91,8 @@ jobs:
build_name: package_release
checkout_depth: 0
data: ${{ needs.RunConfig.outputs.data }}
# always rebuild on release branches to be able to publish from any commit
force: true
BuilderDebAarch64:
needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }}
@ -99,6 +101,8 @@ jobs:
build_name: package_aarch64
checkout_depth: 0
data: ${{ needs.RunConfig.outputs.data }}
# always rebuild on release branches to be able to publish from any commit
force: true
BuilderDebAsan:
needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }}
@ -142,6 +146,8 @@ jobs:
build_name: binary_darwin
checkout_depth: 0
data: ${{ needs.RunConfig.outputs.data }}
# always rebuild on release branches to be able to publish from any commit
force: true
BuilderBinDarwinAarch64:
needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }}
@ -150,6 +156,8 @@ jobs:
build_name: binary_darwin_aarch64
checkout_depth: 0
data: ${{ needs.RunConfig.outputs.data }}
# always rebuild on release branches to be able to publish from any commit
force: true
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -206,13 +214,8 @@ jobs:
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderDebRelease
- BuilderDebAarch64
- BuilderDebAsan
- BuilderDebTsan
- BuilderDebUBsan
- BuilderDebMsan
- BuilderDebDebug
- BuilderBinDarwin
- BuilderBinDarwinAarch64
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
@ -225,7 +228,6 @@ jobs:
run_command: |
python3 build_report_check.py "$CHECK_NAME"
MarkReleaseReady:
if: ${{ !failure() && !cancelled() }}
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
@ -235,8 +237,6 @@ jobs:
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Mark Commit Release Ready
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -26,6 +26,10 @@ name: Build ClickHouse
description: json ci data
type: string
required: true
force:
description: disallow job skipping
type: boolean
default: false
additional_envs:
description: additional ENV variables to setup the job
type: string
@ -33,7 +37,7 @@ name: Build ClickHouse
jobs:
Build:
name: Build-${{inputs.build_name}}
if: contains(fromJson(inputs.data).jobs_data.jobs_to_do, inputs.build_name)
if: ${{ contains(fromJson(inputs.data).jobs_data.jobs_to_do, inputs.build_name) || inputs.force }}
env:
GITHUB_JOB_OVERRIDDEN: Build-${{inputs.build_name}}
runs-on: [self-hosted, '${{inputs.runner_type}}']
@ -78,13 +82,15 @@ jobs:
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" \
--infile ${{ toJson(inputs.data) }} \
--job-name "$BUILD_NAME" \
--run
--run \
${{ inputs.force && '--force' || '' }}
- name: Post
# it still be build report to upload for failed build job
if: ${{ !cancelled() }}
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.build_name}}'
- name: Mark as done
if: ${{ !cancelled() }}
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --mark-success --job-name '${{inputs.build_name}}'
- name: Clean

View File

@ -107,6 +107,7 @@ jobs:
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.test_name}}'
- name: Mark as done
if: ${{ !cancelled() }}
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --mark-success --job-name '${{inputs.test_name}}' --batch ${{matrix.batch}}
- name: Clean

View File

@ -254,10 +254,17 @@ endif()
include(cmake/cpu_features.cmake)
# Asynchronous unwind tables are needed for Query Profiler.
# They are already by default on some platforms but possibly not on all platforms.
# Enable it explicitly.
set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables")
# Query Profiler doesn't work on MacOS for several reasons
# - PHDR cache is not available
# - We use native functionality to get stacktraces which is not async signal safe
# and thus we don't need to generate asynchronous unwind tables
if (NOT OS_DARWIN)
# Asynchronous unwind tables are needed for Query Profiler.
# They are already by default on some platforms but possibly not on all platforms.
# Enable it explicitly.
set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables")
endif()
# Reproducible builds.
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")

View File

@ -22,6 +22,7 @@
#include <cstddef>
#include <map>
#include <vector>
#include "Poco/Channel.h"
#include "Poco/Format.h"
#include "Poco/Foundation.h"
@ -871,21 +872,11 @@ public:
/// If the Logger does not yet exist, it is created, based
/// on its parent logger.
static LoggerPtr getShared(const std::string & name);
static LoggerPtr getShared(const std::string & name, bool should_be_owned_by_shared_ptr_if_created = true);
/// Returns a shared pointer to the Logger with the given name.
/// If the Logger does not yet exist, it is created, based
/// on its parent logger.
static Logger & unsafeGet(const std::string & name);
/// Returns a reference to the Logger with the given name.
/// If the Logger does not yet exist, it is created, based
/// on its parent logger.
///
/// WARNING: This method is not thread safe. You should
/// probably use get() instead.
/// The only time this method should be used is during
/// program initialization, when only one thread is running.
static Logger & create(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
/// Creates and returns a reference to a Logger with the
/// given name. The Logger's Channel and log level as set as
@ -932,6 +923,16 @@ public:
static const std::string ROOT; /// The name of the root logger ("").
public:
struct LoggerEntry
{
Poco::Logger * logger;
bool owned_by_shared_ptr = false;
};
using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
using LoggerMapIterator = LoggerMap::iterator;
protected:
Logger(const std::string & name, Channel * pChannel, int level);
~Logger();
@ -940,12 +941,16 @@ protected:
void log(const std::string & text, Message::Priority prio, const char * file, int line);
static std::string format(const std::string & fmt, int argc, std::string argv[]);
static Logger & unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
static Logger & parent(const std::string & name);
static void add(Logger * pLogger);
static Logger * find(const std::string & name);
private:
static std::pair<Logger::LoggerMapIterator, bool> unsafeGet(const std::string & name, bool get_shared);
static Logger * unsafeGetRawPtr(const std::string & name);
static std::pair<LoggerMapIterator, bool> unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
static Logger & parent(const std::string & name);
static std::pair<LoggerMapIterator, bool> add(Logger * pLogger);
static std::optional<LoggerMapIterator> find(const std::string & name);
static Logger * findRawPtr(const std::string & name);
Logger();
Logger(const Logger &);
Logger & operator=(const Logger &);

View File

@ -38,14 +38,7 @@ std::mutex & getLoggerMutex()
return *logger_mutex;
}
struct LoggerEntry
{
Poco::Logger * logger;
bool owned_by_shared_ptr = false;
};
using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
LoggerMap * _pLoggerMap = nullptr;
Poco::Logger::LoggerMap * _pLoggerMap = nullptr;
}
@ -337,10 +330,12 @@ struct LoggerDeleter
}
};
inline LoggerPtr makeLoggerPtr(Logger & logger)
inline LoggerPtr makeLoggerPtr(Logger & logger, bool owned_by_shared_ptr)
{
return std::shared_ptr<Logger>(&logger, LoggerDeleter());
if (owned_by_shared_ptr)
return LoggerPtr(&logger, LoggerDeleter());
return LoggerPtr(std::shared_ptr<void>{}, &logger);
}
}
@ -350,64 +345,67 @@ Logger& Logger::get(const std::string& name)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
Logger & logger = unsafeGet(name);
/** If there are already shared pointer created for this logger
* we need to increment Logger reference count and now logger
* is owned by logger infrastructure.
*/
auto it = _pLoggerMap->find(name);
if (it->second.owned_by_shared_ptr)
{
it->second.logger->duplicate();
it->second.owned_by_shared_ptr = false;
}
return logger;
auto [it, inserted] = unsafeGet(name, false /*get_shared*/);
return *it->second.logger;
}
LoggerPtr Logger::getShared(const std::string & name)
LoggerPtr Logger::getShared(const std::string & name, bool should_be_owned_by_shared_ptr_if_created)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
bool logger_exists = _pLoggerMap && _pLoggerMap->contains(name);
auto [it, inserted] = unsafeGet(name, true /*get_shared*/);
Logger & logger = unsafeGet(name);
/** If logger already exists, then this shared pointer does not own it.
* If logger does not exists, logger infrastructure could be already destroyed
* or logger was created.
/** If during `unsafeGet` logger was created, then this shared pointer owns it.
* If logger was already created, then this shared pointer does not own it.
*/
if (logger_exists)
{
logger.duplicate();
}
else if (_pLoggerMap)
{
_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
}
if (inserted && should_be_owned_by_shared_ptr_if_created)
it->second.owned_by_shared_ptr = true;
return makeLoggerPtr(logger);
return makeLoggerPtr(*it->second.logger, it->second.owned_by_shared_ptr);
}
Logger& Logger::unsafeGet(const std::string& name)
std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeGet(const std::string& name, bool get_shared)
{
Logger* pLogger = find(name);
if (!pLogger)
std::optional<Logger::LoggerMapIterator> optional_logger_it = find(name);
if (optional_logger_it)
{
auto & logger_it = *optional_logger_it;
if (logger_it->second.owned_by_shared_ptr)
{
logger_it->second.logger->duplicate();
if (!get_shared)
logger_it->second.owned_by_shared_ptr = false;
}
}
if (!optional_logger_it)
{
Logger * logger = nullptr;
if (name == ROOT)
{
pLogger = new Logger(name, 0, Message::PRIO_INFORMATION);
logger = new Logger(name, nullptr, Message::PRIO_INFORMATION);
}
else
{
Logger& par = parent(name);
pLogger = new Logger(name, par.getChannel(), par.getLevel());
logger = new Logger(name, par.getChannel(), par.getLevel());
}
add(pLogger);
return add(logger);
}
return *pLogger;
return std::make_pair(*optional_logger_it, false);
}
Logger * Logger::unsafeGetRawPtr(const std::string & name)
{
return unsafeGet(name, false /*get_shared*/).first->second.logger;
}
@ -415,24 +413,24 @@ Logger& Logger::create(const std::string& name, Channel* pChannel, int level)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
return unsafeCreate(name, pChannel, level);
return *unsafeCreate(name, pChannel, level).first->second.logger;
}
LoggerPtr Logger::createShared(const std::string & name, Channel * pChannel, int level)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
Logger & logger = unsafeCreate(name, pChannel, level);
_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
auto [it, inserted] = unsafeCreate(name, pChannel, level);
it->second.owned_by_shared_ptr = true;
return makeLoggerPtr(logger);
return makeLoggerPtr(*it->second.logger, it->second.owned_by_shared_ptr);
}
Logger& Logger::root()
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
return unsafeGet(ROOT);
return *unsafeGetRawPtr(ROOT);
}
@ -440,7 +438,11 @@ Logger* Logger::has(const std::string& name)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
return find(name);
auto optional_it = find(name);
if (!optional_it)
return nullptr;
return (*optional_it)->second.logger;
}
@ -459,20 +461,32 @@ void Logger::shutdown()
}
delete _pLoggerMap;
_pLoggerMap = 0;
_pLoggerMap = nullptr;
}
}
Logger* Logger::find(const std::string& name)
std::optional<Logger::LoggerMapIterator> Logger::find(const std::string& name)
{
if (_pLoggerMap)
{
LoggerMap::iterator it = _pLoggerMap->find(name);
if (it != _pLoggerMap->end())
return it->second.logger;
return it;
return {};
}
return 0;
return {};
}
Logger * Logger::findRawPtr(const std::string & name)
{
auto optional_it = find(name);
if (!optional_it)
return nullptr;
return (*optional_it)->second.logger;
}
@ -490,28 +504,28 @@ void Logger::names(std::vector<std::string>& names)
}
}
Logger& Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level)
std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level)
{
if (find(name)) throw ExistsException();
Logger* pLogger = new Logger(name, pChannel, level);
add(pLogger);
return *pLogger;
return add(pLogger);
}
Logger& Logger::parent(const std::string& name)
{
std::string::size_type pos = name.rfind('.');
if (pos != std::string::npos)
{
std::string pname = name.substr(0, pos);
Logger* pParent = find(pname);
Logger* pParent = findRawPtr(pname);
if (pParent)
return *pParent;
else
return parent(pname);
}
else return unsafeGet(ROOT);
else return *unsafeGetRawPtr(ROOT);
}
@ -579,12 +593,14 @@ namespace
}
void Logger::add(Logger* pLogger)
std::pair<Logger::LoggerMapIterator, bool> Logger::add(Logger* pLogger)
{
if (!_pLoggerMap)
_pLoggerMap = new LoggerMap;
_pLoggerMap = new Logger::LoggerMap;
_pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
auto result = _pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
assert(result.second);
return result;
}

View File

@ -1,6 +1,10 @@
#include <libunwind.h>
/// On MacOS this function will be replaced with a dynamic symbol
/// from the system library.
#if !defined(OS_DARWIN)
int backtrace(void ** buffer, int size)
{
return unw_backtrace(buffer, size);
}
#endif

2
contrib/libuv vendored

@ -1 +1 @@
Subproject commit 3a85b2eb3d83f369b8a8cafd329d7e9dc28f60cf
Subproject commit 4482964660c77eec1166cd7d14fb915e3dbd774a

@ -1 +1 @@
Subproject commit 2568a7cd1297c7c3044b0f3cc0c23a6f6444d856
Subproject commit d2142eed98046a47ff7112e3cc1e197c8a5cd80f

View File

@ -451,3 +451,24 @@ To disallow concurrent backup/restore, you can use these settings respectively.
The default value for both is true, so by default concurrent backup/restores are allowed.
When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time.
## Configuring BACKUP/RESTORE to use an AzureBlobStorage Endpoint
To write backups to an AzureBlobStorage container you need the following pieces of information:
- AzureBlobStorage endpoint connection string / url,
- Container,
- Path,
- Account name (if url is specified)
- Account Key (if url is specified)
The destination for a backup will be specified like this:
```
AzureBlobStorage('<connection string>/<url>', '<container>', '<path>', '<account name>', '<account key>')
```
```sql
BACKUP TABLE data TO AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
'test_container', 'data_backup');
RESTORE TABLE data AS data_restored FROM AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
'test_container', 'data_backup');
```

View File

@ -28,6 +28,8 @@ The maximum amount of RAM to use for running a query on a single server.
The default setting is unlimited (set to `0`).
Cloud default value: depends on the amount of RAM on the replica.
The setting does not consider the volume of available memory or the total volume of memory on the machine.
The restriction applies to a single query within a single server.
You can use `SHOW PROCESSLIST` to see the current memory consumption for each query.
@ -104,7 +106,9 @@ Possible values:
- Maximum volume of RAM (in bytes) that can be used by the single [GROUP BY](../../sql-reference/statements/select/group-by.md#select-group-by-clause) operation.
- 0 — `GROUP BY` in external memory disabled.
Default value: 0.
Default value: `0`.
Cloud default value: half the memory amount per replica.
## max_bytes_before_external_sort {#settings-max_bytes_before_external_sort}
@ -115,6 +119,8 @@ Enables or disables execution of `ORDER BY` clauses in external memory. See [ORD
Default value: 0.
Cloud default value: half the memory amount per replica.
## max_rows_to_sort {#max-rows-to-sort}
A maximum number of rows before sorting. This allows you to limit memory consumption when sorting.
@ -129,7 +135,11 @@ What to do if the number of rows received before sorting exceeds one of the limi
## max_result_rows {#setting-max_result_rows}
Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query.
Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query. No limit is applied when value is `0`.
Default value: `0`.
Cloud default value: `0`.
## max_result_bytes {#max-result-bytes}
@ -137,10 +147,14 @@ Limit on the number of bytes in the result. The same as the previous setting.
## result_overflow_mode {#result-overflow-mode}
What to do if the volume of the result exceeds one of the limits: throw or break. By default, throw.
What to do if the volume of the result exceeds one of the limits: throw or break.
Using break is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#max_threads).
Default value: `throw`.
Cloud default value: `throw`.
Example:
``` sql

View File

@ -212,6 +212,8 @@ Possible values:
Default value: `'basic'`.
Cloud default value: `'best_effort'`.
See also:
- [DateTime data type.](../../sql-reference/data-types/datetime.md)

View File

@ -508,7 +508,9 @@ Possible values:
- Any positive integer number of hops.
- 0 — No hops allowed.
Default value: 0.
Default value: `0`.
Cloud default value: `10`.
## insert_null_as_default {#insert_null_as_default}
@ -1126,7 +1128,9 @@ Possible values:
- 0 (or 1) — `INSERT SELECT` no parallel execution.
- Positive integer. Bigger than 1.
Default value: 0.
Default value: `0`.
Cloud default value: from `2` to `4`, depending on the service size.
Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#max_threads) setting.
Higher values will lead to higher memory usage.
@ -1207,7 +1211,9 @@ Default value: 10000.
Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response.
Default value: 0
Default value: `0`.
Cloud default value: `1`.
## poll_interval {#poll-interval}
@ -1946,6 +1952,8 @@ Possible values:
Default value: `200`.
Cloud default value: `1000`.
### async_insert_poll_timeout_ms {#async-insert-poll-timeout-ms}
Timeout in milliseconds for polling data from asynchronous insert queue.
@ -2130,7 +2138,9 @@ Possible values:
- Positive integer.
- 0 — Retries are disabled
Default value: 0
Default value: 20
Cloud default value: `20`.
Keeper request retries are done after some timeout. The timeout is controlled by the following settings: `insert_keeper_retry_initial_backoff_ms`, `insert_keeper_retry_max_backoff_ms`.
The first retry is done after `insert_keeper_retry_initial_backoff_ms` timeout. The consequent timeouts will be calculated as follows:
@ -2660,6 +2670,8 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md).
Default value: 1000000000 nanoseconds (once a second).
**Temporarily disabled in ClickHouse Cloud.**
See also:
- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
@ -2683,6 +2695,8 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md).
Default value: 1000000000 nanoseconds.
**Temporarily disabled in ClickHouse Cloud.**
See also:
- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
@ -2804,6 +2818,8 @@ Possible values:
Default value: `0`.
Cloud default value: `1`.
**See Also**
- [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed)
@ -3319,7 +3335,9 @@ Possible values:
- a string representing any valid table engine name
Default value: `None`
Default value: `MergeTree`.
Cloud default value: `SharedMergeTree`.
**Example**
@ -3895,6 +3913,8 @@ Possible values:
Default value: `0`.
Cloud default value: `1`.
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.
@ -3933,6 +3953,8 @@ Possible values:
Default value: `throw`.
Cloud default value: `none`.
## flatten_nested {#flatten-nested}
Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns.
@ -4068,6 +4090,8 @@ Possible values:
Default value: `1`.
Cloud default value: `0`.
:::note
`alter_sync` is applicable to `Replicated` tables only, it does nothing to alters of not `Replicated` tables.
:::
@ -4723,6 +4747,8 @@ other connections are cancelled. Queries with `max_parallel_replicas > 1` are su
Enabled by default.
Disabled by default on Cloud.
## hedged_connection_timeout {#hedged_connection_timeout}
If we can't establish connection with replica after this timeout in hedged requests, we start working with the next replica without cancelling connection to the previous.
@ -5348,10 +5374,11 @@ Default value: `false`.
## max_partition_size_to_drop
Restriction on dropping partitions in query time.
Restriction on dropping partitions in query time. The value 0 means that you can drop partitions without any restrictions.
Default value: 50 GB.
The value 0 means that you can drop partitions without any restrictions.
Cloud default value: 1 TB.
:::note
This query setting overwrites its server setting equivalent, see [max_partition_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-partition-size-to-drop)
@ -5359,10 +5386,11 @@ This query setting overwrites its server setting equivalent, see [max_partition_
## max_table_size_to_drop
Restriction on deleting tables in query time.
Restriction on deleting tables in query time. The value 0 means that you can delete all tables without any restrictions.
Default value: 50 GB.
The value 0 means that you can delete all tables without any restrictions.
Cloud default value: 1 TB.
:::note
This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop)

View File

@ -206,7 +206,7 @@ Some of these settings will disable cache features per query/profile that are en
- `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` - allows to use cache in query only if it already exists, otherwise query data will not be written to local cache storage. Default: `false`.
- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. Default: `false`.
- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. Default: `false`. Cloud default value: `true`.
- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. It can be turn on for specific queries or enabled in a profile. Default: `false`.

View File

@ -20,6 +20,6 @@ sidebar_position: 11
Если вы не видели наших футболок, посмотрите видео о ClickHouse. Например, вот это:
![iframe](https://www.youtube.com/embed/bSyQahMVZ7w)
<iframe width="675" height="380" src="https://www.youtube.com/embed/bSyQahMVZ7w" frameborder="0" allow="accelerometer; autoplay; gyroscope; picture-in-picture" allowfullscreen></iframe>
P.S. Эти футболки не продаются, а распространяются бесплатно на большинстве митапов [ClickHouse](https://clickhouse.com/#meet), обычно в награду за самые интересные вопросы или другие виды активного участия.

View File

@ -19,7 +19,7 @@ slug: /ru/operations/system-tables/grants
- `column` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Имя столбца, к которому предоставляется доступ.
- `is_partial_revoke` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Логическое значение. Показывает, были ли отменены некоторые привилегии. Возможные значения:
- `0` — Строка описывает частичный отзыв.
- `1` — Строка описывает грант.
- `0` — Строка описывает грант.
- `1` — Строка описывает частичный отзыв.
- `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Разрешение предоставлено с опцией `WITH GRANT OPTION`, подробнее см. [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax).

View File

@ -527,10 +527,11 @@ let queries = [];
/// Query parameters with predefined default values.
/// All other parameters will be automatically found in the queries.
let params = {
let default_params = {
'rounding': '60',
'seconds': '86400'
};
let params = default_params;
/// Palette generation for charts
function generatePalette(baseColor, numColors) {
@ -594,13 +595,19 @@ let plots = [];
let charts = document.getElementById('charts');
/// This is not quite correct (we cannot really parse SQL with regexp) but tolerable.
const query_param_regexp = /\{(\w+):[^}]+\}/g;
const query_param_regexp = /\{(\w+):([^}]+)\}/g;
/// Automatically parse more parameters from the queries.
function findParamsInQuery(query, new_params) {
const typeDefault = (type) => type.includes('Int') ? '0'
: (type.includes('Float') ? '0.0'
: (type.includes('Bool') ? 'false'
: (type.includes('Date') ? new Date().toISOString().slice(0, 10)
: (type.includes('UUID') ? '00000000-0000-0000-0000-000000000000'
: ''))));
for (let match of query.matchAll(query_param_regexp)) {
const name = match[1];
new_params[name] = params[name] || '';
new_params[name] = params[name] || default_params[name] || typeDefault(match[2]);
}
}

View File

@ -404,12 +404,12 @@ private:
auto operand_type = and_operands[0]->getResultType();
auto function_type = function_node.getResultType();
assert(!function_type->isNullable());
chassert(!function_type->isNullable());
if (!function_type->equals(*operand_type))
{
/// Result of equality operator can be low cardinality, while AND always returns UInt8.
/// In that case we replace `(lc = 1) AND (lc = 1)` with `(lc = 1) AS UInt8`
assert(function_type->equals(*removeLowCardinality(operand_type)));
chassert(function_type->equals(*removeLowCardinality(operand_type)));
node = createCastFunction(std::move(and_operands[0]), function_type, getContext());
}
else
@ -427,7 +427,7 @@ private:
void tryReplaceOrEqualsChainWithIn(QueryTreeNodePtr & node)
{
auto & function_node = node->as<FunctionNode &>();
assert(function_node.getFunctionName() == "or");
chassert(function_node.getFunctionName() == "or");
QueryTreeNodes or_operands;
@ -486,7 +486,7 @@ private:
/// first we create tuple from RHS of equals functions
for (const auto & equals : equals_functions)
{
is_any_nullable |= equals->getResultType()->isNullable();
is_any_nullable |= removeLowCardinality(equals->getResultType())->isNullable();
const auto * equals_function = equals->as<FunctionNode>();
assert(equals_function && equals_function->getFunctionName() == "equals");

View File

@ -33,11 +33,13 @@ void BackupFactory::registerBackupEngine(const String & engine_name, const Creat
void registerBackupEnginesFileAndDisk(BackupFactory &);
void registerBackupEngineS3(BackupFactory &);
void registerBackupEngineAzureBlobStorage(BackupFactory &);
void registerBackupEngines(BackupFactory & factory)
{
registerBackupEnginesFileAndDisk(factory);
registerBackupEngineS3(factory);
registerBackupEngineAzureBlobStorage(factory);
}
BackupFactory::BackupFactory()

View File

@ -0,0 +1,320 @@
#include <Backups/BackupIO_AzureBlobStorage.h>
#if USE_AZURE_BLOB_STORAGE
#include <Common/quoteString.h>
#include <Interpreters/threadPoolCallbackRunner.h>
#include <Interpreters/Context.h>
#include <IO/SharedThreadPools.h>
#include <IO/HTTPHeaderEntries.h>
#include <Storages/StorageAzureBlobCluster.h>
#include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
#include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
#include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
#include <Disks/IDisk.h>
#include <Disks/DiskType.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int AZURE_BLOB_STORAGE_ERROR;
extern const int LOGICAL_ERROR;
}
BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_)
: BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage"))
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
, configuration(configuration_)
{
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
std::move(client_ptr),
StorageAzureBlob::createSettings(context_),
configuration_.container);
client = object_storage->getAzureBlobStorageClient();
settings = object_storage->getSettings();
}
BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default;
bool BackupReaderAzureBlobStorage::fileExists(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
return object_storage->exists(StoredObject(key));
}
UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
return object_metadata.size_bytes;
}
std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
return std::make_unique<ReadBufferFromAzureBlobStorage>(
client, key, read_settings, settings->max_single_read_retries,
settings->max_single_download_retries);
}
void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
{
auto destination_data_source_description = destination_disk->getDataSourceDescription();
if ((destination_data_source_description.type == DataSourceType::ObjectStorage)
&& (destination_data_source_description.object_storage_type == ObjectStorageType::Azure)
&& (destination_data_source_description.is_encrypted == encrypted_in_backup))
{
LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName());
auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> &) -> size_t
{
/// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
if (blob_path.size() != 2 || mode != WriteMode::Rewrite)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Blob writing function called with unexpected blob_path.size={} or mode={}",
blob_path.size(), mode);
copyAzureBlobStorageFile(
client,
destination_disk->getObjectStorage()->getAzureBlobStorageClient(),
configuration.container,
fs::path(configuration.blob_path) / path_in_backup,
0,
file_size,
/* dest_container */ blob_path[1],
/* dest_path */ blob_path[0],
settings,
read_settings,
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupRDAzure"),
/* for_disk_azure_blob_storage= */ true);
return file_size;
};
destination_disk->writeFileUsingBlobWritingFunction(destination_path, write_mode, write_blob_function);
return; /// copied!
}
/// Fallback to copy through buffers.
BackupReaderDefault::copyFileToDisk(path_in_backup, file_size, encrypted_in_backup, destination_disk, destination_path, write_mode);
}
BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_)
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage"))
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
, configuration(configuration_)
{
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
std::move(client_ptr),
StorageAzureBlob::createSettings(context_),
configuration_.container);
client = object_storage->getAzureBlobStorageClient();
settings = object_storage->getSettings();
}
void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length)
{
/// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible.
auto source_data_source_description = src_disk->getDataSourceDescription();
if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted))
{
/// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container.
/// In this case we can't use the native copy.
if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2)
{
LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName());
copyAzureBlobStorageFile(
src_disk->getObjectStorage()->getAzureBlobStorageClient(),
client,
/* src_container */ blob_path[1],
/* src_path */ blob_path[0],
start_pos,
length,
configuration.container,
fs::path(configuration.blob_path) / path_in_backup,
settings,
read_settings,
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
return; /// copied!
}
}
/// Fallback to copy through buffers.
BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
}
void BackupWriterAzureBlobStorage::copyFile(const String & destination, const String & source, size_t size)
{
LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
copyAzureBlobStorageFile(
client,
client,
configuration.container,
fs::path(source),
0,
size,
/* dest_container */ configuration.container,
/* dest_path */ destination,
settings,
read_settings,
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"),
/* for_disk_azure_blob_storage= */ true);
}
void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
{
copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings,
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
}
BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
bool BackupWriterAzureBlobStorage::fileExists(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
return object_storage->exists(StoredObject(key));
}
UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
RelativePathsWithMetadata children;
object_storage->listObjects(key,children,/*max_keys*/0);
if (children.empty())
throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist");
return children[0].metadata.size_bytes;
}
std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
return std::make_unique<ReadBufferFromAzureBlobStorage>(
client, key, read_settings, settings->max_single_read_retries,
settings->max_single_download_retries);
}
std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
return std::make_unique<WriteBufferFromAzureBlobStorage>(
client,
key,
settings->max_single_part_upload_size,
settings->max_unexpected_write_error_retries,
DBMS_DEFAULT_BUFFER_SIZE,
write_settings);
}
void BackupWriterAzureBlobStorage::removeFile(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
StoredObject object(key);
object_storage->removeObjectIfExists(object);
}
void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names)
{
StoredObjects objects;
for (const auto & file_name : file_names)
objects.emplace_back(file_name);
object_storage->removeObjectsIfExist(objects);
}
void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names)
{
StoredObjects objects;
for (const auto & file_name : file_names)
objects.emplace_back(file_name);
object_storage->removeObjectsIfExist(objects);
}
}
#endif

View File

@ -0,0 +1,68 @@
#pragma once
#include "config.h"
#if USE_AZURE_BLOB_STORAGE
#include <Backups/BackupIO_Default.h>
#include <Disks/DiskType.h>
#include <Storages/StorageAzureBlobCluster.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
/// Represents a backup stored to Azure
class BackupReaderAzureBlobStorage : public BackupReaderDefault
{
public:
BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
~BackupReaderAzureBlobStorage() override;
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
private:
const DataSourceDescription data_source_description;
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
StorageAzureBlob::Configuration configuration;
std::unique_ptr<AzureObjectStorage> object_storage;
std::shared_ptr<const AzureObjectStorageSettings> settings;
};
class BackupWriterAzureBlobStorage : public BackupWriterDefault
{
public:
BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
~BackupWriterAzureBlobStorage() override;
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
void copyFile(const String & destination, const String & source, size_t size) override;
void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;
private:
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
void removeFilesBatch(const Strings & file_names);
const DataSourceDescription data_source_description;
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
StorageAzureBlob::Configuration configuration;
std::unique_ptr<AzureObjectStorage> object_storage;
std::shared_ptr<const AzureObjectStorageSettings> settings;
};
}
#endif

View File

@ -939,12 +939,12 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
}
else if (src_disk && from_immutable_file)
{
LOG_TRACE(log, "Writing backup for file {} from {} (disk {}): data file #{}", info.data_file_name, src_file_desc, src_disk->getName(), info.data_file_index);
LOG_INFO(log, "Writing backup for file {} from {} (disk {}): data file #{}", info.data_file_name, src_file_desc, src_disk->getName(), info.data_file_index);
writer->copyFileFromDisk(info.data_file_name, src_disk, src_file_path, info.encrypted_by_disk, info.base_size, info.size - info.base_size);
}
else
{
LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, src_file_desc, info.data_file_index);
LOG_INFO(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, src_file_desc, info.data_file_index);
auto create_read_buffer = [entry, read_settings = writer->getReadSettings()] { return entry->getReadBuffer(read_settings); };
writer->copyDataToFile(info.data_file_name, create_read_buffer, info.base_size, info.size - info.base_size);
}

View File

@ -0,0 +1,172 @@
#include "config.h"
#include <Backups/BackupFactory.h>
#include <Common/Exception.h>
#if USE_AZURE_BLOB_STORAGE
#include <Backups/BackupIO_AzureBlobStorage.h>
#include <Storages/StorageAzureBlob.h>
#include <Backups/BackupImpl.h>
#include <IO/Archives/hasRegisteredArchiveFileExtension.h>
#include <Interpreters/Context.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <filesystem>
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int SUPPORT_IS_DISABLED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
#if USE_AZURE_BLOB_STORAGE
namespace
{
String removeFileNameFromURL(String & url)
{
Poco::URI url2{url};
String path = url2.getPath();
size_t slash_pos = path.find_last_of('/');
String file_name = path.substr(slash_pos + 1);
path.resize(slash_pos + 1);
url2.setPath(path);
url = url2.toString();
return file_name;
}
}
#endif
void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
{
auto creator_fn = []([[maybe_unused]] const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
{
#if USE_AZURE_BLOB_STORAGE
const String & id_arg = params.backup_info.id_arg;
const auto & args = params.backup_info.args;
StorageAzureBlob::Configuration configuration;
if (!id_arg.empty())
{
const auto & config = params.context->getConfigRef();
auto config_prefix = "named_collections." + id_arg;
if (!config.has(config_prefix))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg);
if (config.has(config_prefix + ".connection_string"))
{
configuration.connection_url = config.getString(config_prefix + ".connection_string");
configuration.is_connection_string = true;
configuration.container = config.getString(config_prefix + ".container");
}
else
{
configuration.connection_url = config.getString(config_prefix + ".storage_account_url");
configuration.is_connection_string = false;
configuration.container = config.getString(config_prefix + ".container");
configuration.account_name = config.getString(config_prefix + ".account_name");
configuration.account_key = config.getString(config_prefix + ".account_key");
}
if (args.size() > 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]");
if (args.size() == 1)
configuration.blob_path = args[0].safeGet<String>();
}
else
{
if (args.size() == 3)
{
configuration.connection_url = args[0].safeGet<String>();
configuration.is_connection_string = true;
configuration.container = args[1].safeGet<String>();
configuration.blob_path = args[2].safeGet<String>();
}
else if (args.size() == 5)
{
configuration.connection_url = args[0].safeGet<String>();
configuration.is_connection_string = false;
configuration.container = args[1].safeGet<String>();
configuration.blob_path = args[2].safeGet<String>();
configuration.account_name = args[3].safeGet<String>();
configuration.account_key = args[4].safeGet<String>();
}
else
{
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Backup AzureBlobStorage requires 3 or 5 arguments: connection string>/<url, container, path, [account name], [account key]");
}
}
BackupImpl::ArchiveParams archive_params;
if (hasRegisteredArchiveFileExtension(configuration.blob_path))
{
if (params.is_internal_backup)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled");
archive_params.archive_name = removeFileNameFromURL(configuration.blob_path);
archive_params.compression_method = params.compression_method;
archive_params.compression_level = params.compression_level;
archive_params.password = params.password;
}
else
{
if (!params.password.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Password is not applicable, backup cannot be encrypted");
}
if (params.open_mode == IBackup::OpenMode::READ)
{
auto reader = std::make_shared<BackupReaderAzureBlobStorage>(configuration,
params.read_settings,
params.write_settings,
params.context);
return std::make_unique<BackupImpl>(
params.backup_info,
archive_params,
params.base_backup_info,
reader,
params.context,
/* use_same_s3_credentials_for_base_backup*/ false);
}
else
{
auto writer = std::make_shared<BackupWriterAzureBlobStorage>(configuration,
params.read_settings,
params.write_settings,
params.context);
return std::make_unique<BackupImpl>(
params.backup_info,
archive_params,
params.base_backup_info,
writer,
params.context,
params.is_internal_backup,
params.backup_coordination,
params.backup_uuid,
params.deduplicate_files,
/* use_same_s3_credentials_for_base_backup */ false);
}
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "AzureBlobStorage support is disabled");
#endif
};
factory.registerBackupEngine("AzureBlobStorage", creator_fn);
}
}

View File

@ -89,6 +89,7 @@ add_headers_and_sources(clickhouse_common_io Common/SSH)
add_headers_and_sources(clickhouse_common_io IO)
add_headers_and_sources(clickhouse_common_io IO/Archives)
add_headers_and_sources(clickhouse_common_io IO/S3)
add_headers_and_sources(clickhouse_common_io IO/AzureBlobStorage)
list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
@ -141,6 +142,7 @@ endif()
if (TARGET ch_contrib::azure_sdk)
add_headers_and_sources(dbms Disks/ObjectStorages/AzureBlobStorage)
add_headers_and_sources(dbms IO/AzureBlobStorage)
endif()
if (TARGET ch_contrib::hdfs)
@ -496,6 +498,7 @@ if (TARGET ch_contrib::aws_s3)
endif()
if (TARGET ch_contrib::azure_sdk)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::azure_sdk)
dbms_target_link_libraries (PRIVATE ch_contrib::azure_sdk)
endif()

View File

@ -19,7 +19,6 @@
#include <Storages/MergeTree/RequestResponse.h>
#include <atomic>
#include <optional>
#include "config.h"

View File

@ -27,6 +27,9 @@ class IConnectionPool : private boost::noncopyable
public:
using Entry = PoolBase<Connection>::Entry;
IConnectionPool() = default;
IConnectionPool(String host_, UInt16 port_) : host(host_), port(port_), address(host + ":" + toString(port_)) {}
virtual ~IConnectionPool() = default;
/// Selects the connection to work.
@ -36,7 +39,15 @@ public:
const Settings & settings,
bool force_connected = true) = 0;
const std::string & getHost() const { return host; }
UInt16 getPort() const { return port; }
const String & getAddress() const { return address; }
virtual Priority getPriority() const { return Priority{1}; }
protected:
const String host;
const UInt16 port = 0;
const String address;
};
using ConnectionPoolPtr = std::shared_ptr<IConnectionPool>;
@ -63,10 +74,9 @@ public:
Protocol::Compression compression_,
Protocol::Secure secure_,
Priority priority_ = Priority{1})
: Base(max_connections_,
: IConnectionPool(host_, port_),
Base(max_connections_,
getLogger("ConnectionPool (" + host_ + ":" + toString(port_) + ")")),
host(host_),
port(port_),
default_database(default_database_),
user(user_),
password(password_),
@ -99,10 +109,6 @@ public:
return entry;
}
const std::string & getHost() const
{
return host;
}
std::string getDescription() const
{
return host + ":" + toString(port);
@ -125,8 +131,6 @@ protected:
}
private:
String host;
UInt16 port;
String default_database;
String user;
String password;

View File

@ -1,7 +1,5 @@
#pragma once
#include <compare>
#include <Client/Connection.h>
#include <Storages/MergeTree/RequestResponse.h>

View File

@ -554,6 +554,21 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng
}
MutableColumnPtr ColumnArray::getDataInRange(size_t start, size_t length) const
{
if (start + length > getOffsets().size())
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnArray::getDataPtrForRange method. "
"[start({}) + length({}) > offsets.size({})]", start, length, getOffsets().size());
size_t start_offset = offsetAt(start);
size_t end_offset = offsetAt(start + length);
auto res = getData().cloneEmpty();
res->insertRangeFrom(getData(), start_offset, end_offset - start_offset);
return res;
}
ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const
{
if (typeid_cast<const ColumnUInt8 *>(data.get()))

View File

@ -143,6 +143,10 @@ public:
const ColumnPtr & getOffsetsPtr() const { return offsets; }
ColumnPtr & getOffsetsPtr() { return offsets; }
/// Returns a copy of the data column's part corresponding to a specified range of rows.
/// For example, `getDataInRange(0, size())` is the same as `getDataPtr()->clone()`.
MutableColumnPtr getDataInRange(size_t start, size_t length) const;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{
return scatterImpl<ColumnArray>(num_columns, selector);

View File

@ -593,6 +593,7 @@
M(711, FILECACHE_ACCESS_DENIED) \
M(712, TOO_MANY_MATERIALIZED_VIEWS) \
M(713, BROKEN_PROJECTION) \
M(714, UNEXPECTED_CLUSTER) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -2,6 +2,8 @@
#include <memory>
#include <base/defines.h>
#include <Poco/Channel.h>
#include <Poco/Logger.h>
#include <Poco/Message.h>
@ -24,6 +26,16 @@ using LoggerRawPtr = Poco::Logger *;
*/
LoggerPtr getLogger(const std::string & name);
/** Get Logger with specified name. If the Logger does not exists, it is created.
* This overload was added for specific purpose, when logger is constructed from constexpr string.
* Logger is destroyed only during program shutdown.
*/
template <size_t n>
ALWAYS_INLINE LoggerPtr getLogger(const char (&name)[n])
{
return Poco::Logger::getShared(name, false /*should_be_owned_by_shared_ptr_if_created*/);
}
/** Create Logger with specified name, channel and logging level.
* If Logger already exists, throws exception.
* Logger is destroyed, when last shared ptr that refers to Logger with specified name is destroyed.

View File

@ -384,6 +384,10 @@ The server successfully detected this situation and will download merged part fr
M(S3PutObject, "Number of S3 API PutObject calls.") \
M(S3GetObject, "Number of S3 API GetObject calls.") \
\
M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \
M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \
M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \
M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \
M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \
\
@ -625,6 +629,8 @@ The server successfully detected this situation and will download merged part fr
M(InterfacePostgreSQLReceiveBytes, "Number of bytes received through PostgreSQL interfaces") \
\
M(ParallelReplicasUsedCount, "Number of replicas used to execute a query with task-based parallel replicas") \
M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas") \
M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but found to be unavailable during query execution with task-based parallel replicas") \
#ifdef APPLY_FOR_EXTERNAL_EVENTS
#define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)

View File

@ -4,6 +4,7 @@
#include <base/constexpr_helpers.h>
#include <base/demangle.h>
#include <Common/scope_guard_safe.h>
#include <Common/Dwarf.h>
#include <Common/Elf.h>
#include <Common/MemorySanitizer.h>
@ -24,6 +25,15 @@
#include "config.h"
#include <boost/algorithm/string/split.hpp>
#if defined(OS_DARWIN)
/// This header contains functions like `backtrace` and `backtrace_symbols`
/// Which will be used for stack unwinding on Mac.
/// Read: https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/backtrace.3.html
#include "execinfo.h"
#endif
namespace
{
/// Currently this variable is set up once on server startup.
@ -262,6 +272,33 @@ void StackTrace::forEachFrame(
callback(current_inline_frame);
}
callback(current_frame);
}
#elif defined(OS_DARWIN)
UNUSED(fatal);
/// This function returns an array of string in a special (a little bit weird format)
/// The frame number, library name, address in hex, mangled symbol name, `+` sign, the offset.
char** strs = ::backtrace_symbols(frame_pointers.data(), static_cast<int>(size));
SCOPE_EXIT_SAFE({free(strs);});
for (size_t i = offset; i < size; ++i)
{
StackTrace::Frame current_frame;
std::vector<std::string> split;
boost::split(split, strs[i], isWhitespaceASCII);
split.erase(
std::remove_if(
split.begin(), split.end(),
[](const std::string & x) { return x.empty(); }),
split.end());
assert(split.size() == 6);
current_frame.virtual_addr = frame_pointers[i];
current_frame.physical_addr = frame_pointers[i];
current_frame.object = split[1];
current_frame.symbol = split[3];
callback(current_frame);
}
#else
@ -306,7 +343,11 @@ StackTrace::StackTrace(const ucontext_t & signal_context)
void StackTrace::tryCapture()
{
#if defined(OS_DARWIN)
size = backtrace(frame_pointers.data(), capacity);
#else
size = unw_backtrace(frame_pointers.data(), capacity);
#endif
__msan_unpoison(frame_pointers.data(), size * sizeof(frame_pointers[0]));
}
@ -376,7 +417,7 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
return callback("<Empty trace>");
size_t frame_index = stack_trace.offset;
#if defined(__ELF__) && !defined(OS_FREEBSD)
#if (defined(__ELF__) && !defined(OS_FREEBSD)) || defined(OS_DARWIN)
size_t inline_frame_index = 0;
auto callback_wrapper = [&](const StackTrace::Frame & frame)
{

View File

@ -9,6 +9,7 @@
#include <Poco/NullChannel.h>
#include <Poco/StreamChannel.h>
#include <sstream>
#include <thread>
TEST(Logger, Log)
@ -100,3 +101,75 @@ TEST(Logger, SideEffects)
LOG_TRACE(log, "test no throw {}", getLogMessageParamOrThrow());
}
TEST(Logger, SharedRawLogger)
{
{
std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
auto stream_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(stream));
auto shared_logger = getLogger("Logger_1");
shared_logger->setChannel(stream_channel.get());
shared_logger->setLevel("trace");
LOG_TRACE(shared_logger, "SharedLogger1Log1");
LOG_TRACE(getRawLogger("Logger_1"), "RawLogger1Log");
LOG_TRACE(shared_logger, "SharedLogger1Log2");
auto actual = stream.str();
EXPECT_EQ(actual, "SharedLogger1Log1\nRawLogger1Log\nSharedLogger1Log2\n");
}
{
std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
auto stream_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(stream));
auto * raw_logger = getRawLogger("Logger_2");
raw_logger->setChannel(stream_channel.get());
raw_logger->setLevel("trace");
LOG_TRACE(getLogger("Logger_2"), "SharedLogger2Log1");
LOG_TRACE(raw_logger, "RawLogger2Log");
LOG_TRACE(getLogger("Logger_2"), "SharedLogger2Log2");
auto actual = stream.str();
EXPECT_EQ(actual, "SharedLogger2Log1\nRawLogger2Log\nSharedLogger2Log2\n");
}
}
TEST(Logger, SharedLoggersThreadSafety)
{
static size_t threads_count = std::thread::hardware_concurrency();
static constexpr size_t loggers_count = 10;
static constexpr size_t logger_get_count = 1000;
Poco::Logger::root();
std::vector<std::string> names;
Poco::Logger::names(names);
size_t loggers_size_before = names.size();
std::vector<std::thread> threads;
for (size_t thread_index = 0; thread_index < threads_count; ++thread_index)
{
threads.emplace_back([]()
{
for (size_t logger_index = 0; logger_index < loggers_count; ++logger_index)
{
for (size_t iteration = 0; iteration < logger_get_count; ++iteration)
{
getLogger("Logger_" + std::to_string(logger_index));
}
}
});
}
for (auto & thread : threads)
thread.join();
Poco::Logger::names(names);
size_t loggers_size_after = names.size();
EXPECT_EQ(loggers_size_before, loggers_size_after);
}

View File

@ -82,7 +82,8 @@ class IColumn;
M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \
M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited. You ", 0) \
M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \
M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \
M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \
M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \

View File

@ -92,7 +92,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
{"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
{"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
{"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
{"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"},
{"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}}},
{"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
{"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
{"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},

View File

@ -166,6 +166,9 @@ std::unique_ptr<AzureObjectStorageSettings> getAzureBlobStorageSettings(const Po
config.getInt(config_prefix + ".max_single_read_retries", 3),
config.getInt(config_prefix + ".max_single_download_retries", 3),
config.getInt(config_prefix + ".list_object_keys_size", 1000),
config.getUInt64(config_prefix + ".max_upload_part_size", 5ULL * 1024 * 1024 * 1024),
config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size),
config.getBool(config_prefix + ".use_native_copy", false),
config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries)
);
}

View File

@ -92,10 +92,12 @@ private:
AzureObjectStorage::AzureObjectStorage(
const String & name_,
AzureClientPtr && client_,
SettingsPtr && settings_)
SettingsPtr && settings_,
const String & container_)
: name(name_)
, client(std::move(client_))
, settings(std::move(settings_))
, container(container_)
, log(getLogger("AzureObjectStorage"))
{
}
@ -376,7 +378,8 @@ std::unique_ptr<IObjectStorage> AzureObjectStorage::cloneObjectStorage(const std
return std::make_unique<AzureObjectStorage>(
name,
getAzureBlobContainerClient(config, config_prefix),
getAzureBlobStorageSettings(config, config_prefix, context)
getAzureBlobStorageSettings(config, config_prefix, context),
container
);
}

View File

@ -24,12 +24,18 @@ struct AzureObjectStorageSettings
int max_single_read_retries_,
int max_single_download_retries_,
int list_object_keys_size_,
size_t max_upload_part_size_,
size_t max_single_part_copy_size_,
bool use_native_copy_,
size_t max_unexpected_write_error_retries_)
: max_single_part_upload_size(max_single_part_upload_size_)
, min_bytes_for_seek(min_bytes_for_seek_)
, max_single_read_retries(max_single_read_retries_)
, max_single_download_retries(max_single_download_retries_)
, list_object_keys_size(list_object_keys_size_)
, max_upload_part_size(max_upload_part_size_)
, max_single_part_copy_size(max_single_part_copy_size_)
, use_native_copy(use_native_copy_)
, max_unexpected_write_error_retries (max_unexpected_write_error_retries_)
{
}
@ -41,6 +47,10 @@ struct AzureObjectStorageSettings
size_t max_single_read_retries = 3;
size_t max_single_download_retries = 3;
int list_object_keys_size = 1000;
size_t min_upload_part_size = 16 * 1024 * 1024;
size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024;
size_t max_single_part_copy_size = 256 * 1024 * 1024;
bool use_native_copy = false;
size_t max_unexpected_write_error_retries = 4;
};
@ -56,7 +66,8 @@ public:
AzureObjectStorage(
const String & name_,
AzureClientPtr && client_,
SettingsPtr && settings_);
SettingsPtr && settings_,
const String & container_);
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
@ -119,7 +130,7 @@ public:
const std::string & config_prefix,
ContextPtr context) override;
String getObjectsNamespace() const override { return ""; }
String getObjectsNamespace() const override { return container ; }
std::unique_ptr<IObjectStorage> cloneObjectStorage(
const std::string & new_namespace,
@ -131,11 +142,19 @@ public:
bool isRemote() const override { return true; }
std::shared_ptr<const AzureObjectStorageSettings> getSettings() { return settings.get(); }
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> getAzureBlobStorageClient() override
{
return client.get();
}
private:
const String name;
/// client used to access the files in the Blob Storage cloud
MultiVersion<Azure::Storage::Blobs::BlobContainerClient> client;
MultiVersion<AzureObjectStorageSettings> settings;
const String container;
LoggerPtr log;
};

View File

@ -3,6 +3,7 @@
#include <Disks/ObjectStorages/IObjectStorage.h>
#include <Interpreters/Cache/FileCacheKey.h>
#include <Interpreters/Cache/FileCacheSettings.h>
#include "config.h"
namespace Poco
{
@ -120,6 +121,13 @@ public:
static bool canUseReadThroughCache(const ReadSettings & settings);
#if USE_AZURE_BLOB_STORAGE
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> getAzureBlobStorageClient() override
{
return object_storage->getAzureBlobStorageClient();
}
#endif
private:
FileCacheKey getCacheKey(const std::string & path) const;

View File

@ -23,11 +23,22 @@
#include <Disks/DirectoryIterator.h>
#include <Common/ThreadPool.h>
#include <Interpreters/threadPoolCallbackRunner.h>
#include <Common/Exception.h>
#include "config.h"
#if USE_AZURE_BLOB_STORAGE
#include <Common/MultiVersion.h>
#include <azure/storage/blobs.hpp>
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
class ReadBufferFromFileBase;
class WriteBufferFromFileBase;
@ -214,6 +225,14 @@ public:
virtual WriteSettings patchSettings(const WriteSettings & write_settings) const;
#if USE_AZURE_BLOB_STORAGE
virtual std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> getAzureBlobStorageClient()
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for AzureBlobStorage");
}
#endif
private:
mutable std::mutex throttlers_mutex;
ThrottlerPtr remote_read_throttler;

View File

@ -102,7 +102,7 @@ void checkS3Capabilities(
if (s3_capabilities.support_batch_delete && !checkBatchRemove(storage, key_with_trailing_slash))
{
LOG_WARNING(
&Poco::Logger::get("S3ObjectStorage"),
getLogger("S3ObjectStorage"),
"Storage for disk {} does not support batch delete operations, "
"so `s3_capabilities.support_batch_delete` was automatically turned off during the access check. "
"To remove this message set `s3_capabilities.support_batch_delete` for the disk to `false`.",
@ -213,10 +213,12 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory)
const ContextPtr & context,
bool /* skip_access_check */) -> ObjectStoragePtr
{
String container_name = config.getString(config_prefix + ".container_name", "default-container");
return std::make_unique<AzureObjectStorage>(
name,
getAzureBlobContainerClient(config, config_prefix),
getAzureBlobStorageSettings(config, config_prefix, context));
getAzureBlobStorageSettings(config, config_prefix, context),
container_name);
});
}

View File

@ -82,7 +82,7 @@ WebObjectStorage::loadFiles(const String & path, const std::unique_lock<std::sha
if (!inserted)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Loading data for {} more than once", file_path);
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding file: {}, size: {}", file_path, size);
LOG_TRACE(getLogger("DiskWeb"), "Adding file: {}, size: {}", file_path, size);
loaded_files.emplace_back(file_path);
}

View File

@ -1,6 +1,7 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/IColumn.h>
#include <Common/TargetSpecific.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/IDataType.h>
@ -9,6 +10,10 @@
#include <Functions/FunctionHelpers.h>
#include <base/range.h>
#if USE_MULTITARGET_CODE
#include <immintrin.h>
#endif
namespace DB
{
namespace ErrorCodes
@ -75,6 +80,49 @@ struct L2Distance
state.sum += other_state.sum;
}
#if USE_MULTITARGET_CODE
template <typename ResultType>
AVX512_FUNCTION_SPECIFIC_ATTRIBUTE static void accumulateCombine(
const ResultType * __restrict data_x,
const ResultType * __restrict data_y,
size_t i_max,
size_t & i_x,
size_t & i_y,
State<ResultType> & state)
{
__m512 sums;
if constexpr (std::is_same_v<ResultType, Float32>)
sums = _mm512_setzero_ps();
else
sums = _mm512_setzero_pd();
const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
for (; i_x + n < i_max; i_x += n, i_y += n)
{
if constexpr (std::is_same_v<ResultType, Float32>)
{
__m512 x = _mm512_loadu_ps(data_x + i_x);
__m512 y = _mm512_loadu_ps(data_y + i_y);
__m512 differences = _mm512_sub_ps(x, y);
sums = _mm512_fmadd_ps(differences, differences, sums);
}
else
{
__m512 x = _mm512_loadu_pd(data_x + i_x);
__m512 y = _mm512_loadu_pd(data_y + i_y);
__m512 differences = _mm512_sub_pd(x, y);
sums = _mm512_fmadd_pd(differences, differences, sums);
}
}
if constexpr (std::is_same_v<ResultType, Float32>)
state.sum = _mm512_reduce_add_ps(sums);
else
state.sum = _mm512_reduce_add_pd(sums);
}
#endif
template <typename ResultType>
static ResultType finalize(const State<ResultType> & state, const ConstParams &)
{
@ -189,6 +237,70 @@ struct CosineDistance
state.y_squared += other_state.y_squared;
}
#if USE_MULTITARGET_CODE
template <typename ResultType>
AVX512_FUNCTION_SPECIFIC_ATTRIBUTE static void accumulateCombine(
const ResultType * __restrict data_x,
const ResultType * __restrict data_y,
size_t i_max,
size_t & i_x,
size_t & i_y,
State<ResultType> & state)
{
__m512 dot_products;
__m512 x_squareds;
__m512 y_squareds;
if constexpr (std::is_same_v<ResultType, Float32>)
{
dot_products = _mm512_setzero_ps();
x_squareds = _mm512_setzero_ps();
y_squareds = _mm512_setzero_ps();
}
else
{
dot_products = _mm512_setzero_pd();
x_squareds = _mm512_setzero_pd();
y_squareds = _mm512_setzero_pd();
}
const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
for (; i_x + n < i_max; i_x += n, i_y += n)
{
if constexpr (std::is_same_v<ResultType, Float32>)
{
__m512 x = _mm512_loadu_ps(data_x + i_x);
__m512 y = _mm512_loadu_ps(data_y + i_y);
dot_products = _mm512_fmadd_ps(x, y, dot_products);
x_squareds = _mm512_fmadd_ps(x, x, x_squareds);
y_squareds = _mm512_fmadd_ps(y, y, y_squareds);
}
else
{
__m512 x = _mm512_loadu_pd(data_x + i_x);
__m512 y = _mm512_loadu_pd(data_y + i_y);
dot_products = _mm512_fmadd_pd(x, y, dot_products);
x_squareds = _mm512_fmadd_pd(x, x, x_squareds);
y_squareds = _mm512_fmadd_pd(y, y, y_squareds);
}
}
if constexpr (std::is_same_v<ResultType, Float32>)
{
state.dot_prod = _mm512_reduce_add_ps(dot_products);
state.x_squared = _mm512_reduce_add_ps(x_squareds);
state.y_squared = _mm512_reduce_add_ps(y_squareds);
}
else
{
state.dot_prod = _mm512_reduce_add_pd(dot_products);
state.x_squared = _mm512_reduce_add_pd(x_squareds);
state.y_squared = _mm512_reduce_add_pd(y_squareds);
}
}
#endif
template <typename ResultType>
static ResultType finalize(const State<ResultType> & state, const ConstParams &)
{
@ -352,7 +464,7 @@ private:
/// Check that arrays in both columns are the sames size
for (size_t row = 0; row < offsets_x.size(); ++row)
{
if (unlikely(offsets_x[row] != offsets_y[row]))
if (offsets_x[row] != offsets_y[row]) [[unlikely]]
{
ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0;
throw Exception(
@ -420,7 +532,7 @@ private:
ColumnArray::Offset prev_offset = 0;
for (size_t row : collections::range(0, offsets_y.size()))
{
if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset))
if (offsets_x[0] != offsets_y[row] - prev_offset) [[unlikely]]
{
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
@ -438,14 +550,35 @@ private:
auto & result_data = result->getData();
/// Do the actual computation
ColumnArray::Offset prev = 0;
size_t prev = 0;
size_t row = 0;
for (auto off : offsets_y)
{
size_t i = 0;
typename Kernel::template State<ResultType> state;
/// SIMD optimization: process multiple elements in both input arrays at once.
/// To avoid combinatorial explosion of SIMD kernels, focus on
/// - the two most common input/output types (Float32 x Float32) --> Float32 and (Float64 x Float64) --> Float64 instead of 10 x
/// 10 input types x 2 output types,
/// - const/non-const inputs instead of non-const/non-const inputs
/// - the two most common metrics L2 and cosine distance,
/// - the most powerful SIMD instruction set (AVX-512F).
#if USE_MULTITARGET_CODE
if constexpr (std::is_same_v<ResultType, FirstArgType> && std::is_same_v<ResultType, SecondArgType>) /// ResultType is Float32 or Float64
{
if constexpr (std::is_same_v<Kernel, L2Distance>
|| std::is_same_v<Kernel, CosineDistance>)
{
if (isArchSupported(TargetArch::AVX512F))
Kernel::template accumulateCombine<ResultType>(data_x.data(), data_y.data(), i + offsets_x[0], i, prev, state);
}
}
#else
/// Process chunks in vectorized manner
static constexpr size_t VEC_SIZE = 4;
typename Kernel::template State<ResultType> states[VEC_SIZE];
size_t i = 0;
for (; prev + VEC_SIZE < off; i += VEC_SIZE, prev += VEC_SIZE)
{
for (size_t s = 0; s < VEC_SIZE; ++s)
@ -453,10 +586,9 @@ private:
states[s], static_cast<ResultType>(data_x[i + s]), static_cast<ResultType>(data_y[prev + s]), kernel_params);
}
typename Kernel::template State<ResultType> state;
for (const auto & other_state : states)
Kernel::template combine<ResultType>(state, other_state, kernel_params);
#endif
/// Process the tail
for (; prev < off; ++i, ++prev)
{
@ -466,6 +598,7 @@ private:
result_data[row] = Kernel::finalize(state, kernel_params);
row++;
}
return result;
}

View File

@ -0,0 +1,340 @@
#include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
#if USE_AZURE_BLOB_STORAGE
#include <Common/ProfileEvents.h>
#include <Common/typeid_cast.h>
#include <Interpreters/Context.h>
#include <IO/LimitSeekableReadBuffer.h>
#include <IO/SeekableReadBuffer.h>
#include <IO/StdStreamFromReadBuffer.h>
#include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
#include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
#include <Common/getRandomASCIIString.h>
#include <IO/SharedThreadPools.h>
namespace ProfileEvents
{
extern const Event AzureCopyObject;
extern const Event AzureUploadPart;
extern const Event DiskAzureCopyObject;
extern const Event DiskAzureUploadPart;
}
namespace DB
{
namespace ErrorCodes
{
extern const int INVALID_CONFIG_PARAMETER;
extern const int AZURE_BLOB_STORAGE_ERROR;
}
namespace
{
class UploadHelper
{
public:
UploadHelper(
const CreateReadBuffer & create_read_buffer_,
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client_,
size_t offset_,
size_t total_size_,
const String & dest_container_for_logging_,
const String & dest_blob_,
std::shared_ptr<const AzureObjectStorageSettings> settings_,
ThreadPoolCallbackRunner<void> schedule_,
bool for_disk_azure_blob_storage_,
const Poco::Logger * log_)
: create_read_buffer(create_read_buffer_)
, client(client_)
, offset (offset_)
, total_size (total_size_)
, dest_container_for_logging(dest_container_for_logging_)
, dest_blob(dest_blob_)
, settings(settings_)
, schedule(schedule_)
, for_disk_azure_blob_storage(for_disk_azure_blob_storage_)
, log(log_)
, max_single_part_upload_size(settings_->max_single_part_upload_size)
{
}
virtual ~UploadHelper() = default;
protected:
std::function<std::unique_ptr<SeekableReadBuffer>()> create_read_buffer;
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
size_t offset;
size_t total_size;
const String & dest_container_for_logging;
const String & dest_blob;
std::shared_ptr<const AzureObjectStorageSettings> settings;
ThreadPoolCallbackRunner<void> schedule;
bool for_disk_azure_blob_storage;
const Poco::Logger * log;
size_t max_single_part_upload_size;
struct UploadPartTask
{
size_t part_offset;
size_t part_size;
std::vector<std::string> block_ids;
bool is_finished = false;
std::exception_ptr exception;
};
size_t normal_part_size;
std::vector<std::string> block_ids;
std::list<UploadPartTask> TSA_GUARDED_BY(bg_tasks_mutex) bg_tasks;
int num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0;
int num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0;
std::mutex bg_tasks_mutex;
std::condition_variable bg_tasks_condvar;
void calculatePartSize()
{
auto max_upload_part_size = settings->max_upload_part_size;
if (!max_upload_part_size)
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be 0");
/// We've calculated the size of a normal part (the final part can be smaller).
normal_part_size = max_upload_part_size;
}
public:
void performCopy()
{
performMultipartUpload();
}
void completeMultipartUpload()
{
auto block_blob_client = client->GetBlockBlobClient(dest_blob);
block_blob_client.CommitBlockList(block_ids);
}
void performMultipartUpload()
{
calculatePartSize();
size_t position = offset;
size_t end_position = offset + total_size;
try
{
while (position < end_position)
{
size_t next_position = std::min(position + normal_part_size, end_position);
size_t part_size = next_position - position; /// `part_size` is either `normal_part_size` or smaller if it's the final part.
uploadPart(position, part_size);
position = next_position;
}
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
waitForAllBackgroundTasks();
throw;
}
waitForAllBackgroundTasks();
completeMultipartUpload();
}
void uploadPart(size_t part_offset, size_t part_size)
{
LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, Size: {}", dest_container_for_logging, dest_blob, part_size);
if (!part_size)
{
LOG_TRACE(log, "Skipping writing an empty part.");
return;
}
if (schedule)
{
UploadPartTask * task = nullptr;
{
std::lock_guard lock(bg_tasks_mutex);
task = &bg_tasks.emplace_back();
++num_added_bg_tasks;
}
/// Notify waiting thread when task finished
auto task_finish_notify = [this, task]()
{
std::lock_guard lock(bg_tasks_mutex);
task->is_finished = true;
++num_finished_bg_tasks;
/// Notification under mutex is important here.
/// Otherwise, WriteBuffer could be destroyed in between
/// Releasing lock and condvar notification.
bg_tasks_condvar.notify_one();
};
try
{
task->part_offset = part_offset;
task->part_size = part_size;
schedule([this, task, task_finish_notify]()
{
try
{
processUploadPartRequest(*task);
}
catch (...)
{
task->exception = std::current_exception();
}
task_finish_notify();
}, Priority{});
}
catch (...)
{
task_finish_notify();
throw;
}
}
else
{
UploadPartTask task;
task.part_offset = part_offset;
task.part_size = part_size;
processUploadPartRequest(task);
block_ids.insert(block_ids.end(),task.block_ids.begin(), task.block_ids.end());
}
}
void processUploadPartRequest(UploadPartTask & task)
{
ProfileEvents::increment(ProfileEvents::AzureUploadPart);
if (for_disk_azure_blob_storage)
ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart);
auto block_blob_client = client->GetBlockBlobClient(dest_blob);
auto read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), task.part_offset, task.part_size);
while (!read_buffer->eof())
{
auto size = read_buffer->available();
if (size > 0)
{
auto block_id = getRandomASCIIString(64);
Azure::Core::IO::MemoryBodyStream memory(reinterpret_cast<const uint8_t *>(read_buffer->position()), size);
block_blob_client.StageBlock(block_id, memory);
task.block_ids.emplace_back(block_id);
read_buffer->ignore(size);
LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, block_id: {}", dest_container_for_logging, dest_blob, block_id);
}
}
std::lock_guard lock(bg_tasks_mutex); /// Protect bg_tasks from race
LOG_TRACE(log, "Writing part finished. Container: {}, Blob: {}, Parts: {}", dest_container_for_logging, dest_blob, bg_tasks.size());
}
void waitForAllBackgroundTasks()
{
if (!schedule)
return;
std::unique_lock lock(bg_tasks_mutex);
/// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock
bg_tasks_condvar.wait(lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); });
auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(bg_tasks);
for (auto & task : tasks)
{
if (task.exception)
std::rethrow_exception(task.exception);
block_ids.insert(block_ids.end(),task.block_ids.begin(), task.block_ids.end());
}
}
};
}
void copyDataToAzureBlobStorageFile(
const std::function<std::unique_ptr<SeekableReadBuffer>()> & create_read_buffer,
size_t offset,
size_t size,
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> dest_client,
const String & dest_container_for_logging,
const String & dest_blob,
std::shared_ptr<const AzureObjectStorageSettings> settings,
ThreadPoolCallbackRunner<void> schedule,
bool for_disk_azure_blob_storage)
{
UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyDataToAzureBlobStorageFile")};
helper.performCopy();
}
void copyAzureBlobStorageFile(
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> src_client,
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> dest_client,
const String & src_container_for_logging,
const String & src_blob,
size_t offset,
size_t size,
const String & dest_container_for_logging,
const String & dest_blob,
std::shared_ptr<const AzureObjectStorageSettings> settings,
const ReadSettings & read_settings,
ThreadPoolCallbackRunner<void> schedule,
bool for_disk_azure_blob_storage)
{
if (settings->use_native_copy)
{
ProfileEvents::increment(ProfileEvents::AzureCopyObject);
if (for_disk_azure_blob_storage)
ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);
auto block_blob_client_src = src_client->GetBlockBlobClient(src_blob);
auto block_blob_client_dest = dest_client->GetBlockBlobClient(dest_blob);
auto source_uri = block_blob_client_src.GetUrl();
if (size < settings->max_single_part_copy_size)
{
block_blob_client_dest.CopyFromUri(source_uri);
}
else
{
Azure::Storage::Blobs::StartBlobCopyOperation operation = block_blob_client_dest.StartCopyFromUri(source_uri);
// Wait for the operation to finish, checking for status every 100 second.
auto copy_response = operation.PollUntilDone(std::chrono::milliseconds(100));
auto properties_model = copy_response.Value;
if (properties_model.CopySource.HasValue())
{
throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy failed");
}
}
}
else
{
LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob);
auto create_read_buffer = [&]
{
return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client, src_blob, read_settings, settings->max_single_read_retries,
settings->max_single_download_retries);
};
UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyAzureBlobStorageFile")};
helper.performCopy();
}
}
}
#endif

View File

@ -0,0 +1,56 @@
#pragma once
#include "config.h"
#if USE_AZURE_BLOB_STORAGE
#include <Storages/StorageAzureBlobCluster.h>
#include <Storages/StorageAzureBlob.h>
#include <Interpreters/threadPoolCallbackRunner.h>
#include <base/types.h>
#include <functional>
#include <memory>
namespace DB
{
class SeekableReadBuffer;
using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
/// Copies a file from AzureBlobStorage to AzureBlobStorage.
/// The parameters `src_offset` and `src_size` specify a part in the source to copy.
void copyAzureBlobStorageFile(
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> src_client,
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> dest_client,
const String & src_container_for_logging,
const String & src_blob,
size_t src_offset,
size_t src_size,
const String & dest_container_for_logging,
const String & dest_blob,
std::shared_ptr<const AzureObjectStorageSettings> settings,
const ReadSettings & read_settings,
ThreadPoolCallbackRunner<void> schedule_ = {},
bool for_disk_azure_blob_storage = false);
/// Copies data from any seekable source to AzureBlobStorage.
/// The same functionality can be done by using the function copyData() and the class WriteBufferFromS3
/// however copyDataToS3File() is faster and spends less memory.
/// The callback `create_read_buffer` can be called from multiple threads in parallel, so that should be thread-safe.
/// The parameters `offset` and `size` specify a part in the source to copy.
void copyDataToAzureBlobStorageFile(
const std::function<std::unique_ptr<SeekableReadBuffer>()> & create_read_buffer,
size_t offset,
size_t size,
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client,
const String & dest_container_for_logging,
const String & dest_blob,
std::shared_ptr<const AzureObjectStorageSettings> settings,
ThreadPoolCallbackRunner<void> schedule_ = {},
bool for_disk_azure_blob_storage = false);
}
#endif

View File

@ -605,7 +605,7 @@ ActionsDAGPtr ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool rem
return actions;
}
static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * node, ColumnsWithTypeAndName arguments)
static ColumnWithTypeAndName executeActionForPartialResult(const ActionsDAG::Node * node, ColumnsWithTypeAndName arguments, size_t input_rows_count)
{
ColumnWithTypeAndName res_column;
res_column.type = node->result_type;
@ -615,7 +615,7 @@ static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * nod
{
case ActionsDAG::ActionType::FUNCTION:
{
res_column.column = node->function->execute(arguments, res_column.type, 0, true);
res_column.column = node->function->execute(arguments, res_column.type, input_rows_count, true);
break;
}
@ -628,13 +628,24 @@ static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * nod
if (!array)
throw Exception(ErrorCodes::TYPE_MISMATCH,
"ARRAY JOIN of not array nor map: {}", node->result_name);
res_column.column = array->getDataPtr()->cloneEmpty();
ColumnPtr data;
if (input_rows_count < array->size())
data = array->getDataInRange(0, input_rows_count);
else
data = array->getDataPtr();
res_column.column = data;
break;
}
case ActionsDAG::ActionType::COLUMN:
{
res_column.column = node->column->cloneResized(0);
auto column = node->column;
if (input_rows_count < column->size())
column = column->cloneResized(input_rows_count);
res_column.column = column;
break;
}
@ -681,7 +692,7 @@ Block ActionsDAG::updateHeader(Block header) const
ColumnsWithTypeAndName result_columns;
try
{
result_columns = evaluatePartialResult(node_to_column, outputs, true);
result_columns = evaluatePartialResult(node_to_column, outputs, /* input_rows_count= */ 0, /* throw_on_error= */ true);
}
catch (Exception & e)
{
@ -710,8 +721,11 @@ Block ActionsDAG::updateHeader(Block header) const
ColumnsWithTypeAndName ActionsDAG::evaluatePartialResult(
IntermediateExecutionResult & node_to_column,
const NodeRawConstPtrs & outputs,
size_t input_rows_count,
bool throw_on_error)
{
chassert(input_rows_count <= 1); /// evaluatePartialResult() should be used only to evaluate headers or constants
ColumnsWithTypeAndName result_columns;
result_columns.reserve(outputs.size());
@ -768,7 +782,7 @@ ColumnsWithTypeAndName ActionsDAG::evaluatePartialResult(
node->result_name);
if (node->type != ActionsDAG::ActionType::INPUT && has_all_arguments)
node_to_column[node] = executeActionForHeader(node, std::move(arguments));
node_to_column[node] = executeActionForPartialResult(node, std::move(arguments), input_rows_count);
}
}

View File

@ -278,6 +278,7 @@ public:
static ColumnsWithTypeAndName evaluatePartialResult(
IntermediateExecutionResult & node_to_column,
const NodeRawConstPtrs & outputs,
size_t input_rows_count,
bool throw_on_error);
/// For apply materialize() function for every output.

View File

@ -353,7 +353,18 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
auto [it, inserted] = shard.iterators.try_emplace(key.hash);
auto now = std::chrono::steady_clock::now();
auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, shard_num, flush_time_points, now);
auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, flush_time_points, now);
if (timeout_ms != shard.busy_timeout_ms)
{
LOG_TRACE(
log,
"Asynchronous timeout {} from {} to {} for queue shard {}.",
timeout_ms < shard.busy_timeout_ms ? "decreased" : "increased",
shard.busy_timeout_ms.count(),
timeout_ms.count(),
size_t(shard_num));
}
if (inserted)
it->second = shard.queue.emplace(now + timeout_ms, Container{key, std::make_unique<InsertData>(timeout_ms)}).first;
@ -431,7 +442,6 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeoutMs(
const Settings & settings,
const QueueShard & shard,
size_t shard_num,
const QueueShardFlushTimeHistory::TimePoints & flush_time_points,
std::chrono::steady_clock::time_point now) const
{
@ -460,13 +470,6 @@ AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeou
auto timeout_ms = std::max(
std::chrono::duration_cast<Milliseconds>(shard.busy_timeout_ms * (1.0 + increase_rate)),
shard.busy_timeout_ms + Milliseconds(1));
if (timeout_ms != shard.busy_timeout_ms)
LOG_TRACE(
log,
"Async timeout increased from {} to {} for queue shard {}.",
shard.busy_timeout_ms.count(),
timeout_ms.count(),
shard_num);
return normalize(timeout_ms);
}
@ -475,18 +478,7 @@ AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeou
/// long enough (exceeding the adjusted timeout).
/// This ensures the timeout value converges to the minimum over time for non-frequent inserts.
else if (last_insert_time + decreased_timeout_ms < now && t1 + decreased_timeout_ms < t2)
{
auto timeout_ms = decreased_timeout_ms;
if (timeout_ms != shard.busy_timeout_ms)
LOG_TRACE(
log,
"Async timeout decreased from {} to {} for queue shard {}.",
shard.busy_timeout_ms.count(),
timeout_ms.count(),
shard_num);
return normalize(timeout_ms);
}
return normalize(decreased_timeout_ms);
return normalize(shard.busy_timeout_ms);
}

View File

@ -248,7 +248,6 @@ private:
Milliseconds getBusyWaitTimeoutMs(
const Settings & settings,
const QueueShard & shard,
size_t shard_num,
const QueueShardFlushTimeHistory::TimePoints & flush_time_points,
std::chrono::steady_clock::time_point now) const;

View File

@ -32,6 +32,7 @@ namespace ErrorCodes
extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
extern const int LOGICAL_ERROR;
extern const int CLUSTER_DOESNT_EXIST;
extern const int UNEXPECTED_CLUSTER;
}
namespace ClusterProxy
@ -374,12 +375,12 @@ void executeQueryWithParallelReplicas(
shard_num = column->getUInt(0);
}
ClusterPtr new_cluster;
const auto shard_count = not_optimized_cluster->getShardCount();
ClusterPtr new_cluster = not_optimized_cluster;
/// if got valid shard_num from query initiator, then parallel replicas scope is the specified shard
/// shards are numbered in order of appearance in the cluster config
if (shard_num > 0)
{
const auto shard_count = not_optimized_cluster->getShardCount();
if (shard_num > shard_count)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
@ -395,17 +396,18 @@ void executeQueryWithParallelReplicas(
// get cluster for shard specified by shard_num
// shard_num is 1-based, but getClusterWithSingleShard expects 0-based index
auto single_shard_cluster = not_optimized_cluster->getClusterWithSingleShard(shard_num - 1);
// convert cluster to representation expected by parallel replicas
new_cluster = single_shard_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
new_cluster = not_optimized_cluster->getClusterWithSingleShard(shard_num - 1);
}
else
{
new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
if (not_optimized_cluster->getShardCount() > 1)
throw DB::Exception(
ErrorCodes::UNEXPECTED_CLUSTER,
"`cluster_for_parallel_replicas` setting refers to cluster with several shards. Expected a cluster with one shard");
}
auto coordinator
= std::make_shared<ParallelReplicasReadingCoordinator>(new_cluster->getShardCount(), settings.parallel_replicas_mark_segment_size);
auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(
new_cluster->getShardsInfo().begin()->getAllNodeCount(), settings.parallel_replicas_mark_segment_size);
auto external_tables = new_context->getExternalTables();
auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>(
query_ast,

View File

@ -1076,15 +1076,22 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
{
const auto * kind = create.is_dictionary ? "Dictionary" : "Table";
const auto * kind_upper = create.is_dictionary ? "DICTIONARY" : "TABLE";
bool is_replicated_database_internal = database->getEngineName() == "Replicated" && getContext()->getClientInfo().is_replicated_database_internal;
bool from_path = create.attach_from_path.has_value();
bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
if (database->getEngineName() == "Replicated" && getContext()->getClientInfo().is_replicated_database_internal
&& !internal)
if (is_replicated_database_internal && !internal)
{
if (create.uuid == UUIDHelpers::Nil)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table UUID is not specified in DDL log");
}
bool from_path = create.attach_from_path.has_value();
if (create.refresh_strategy && database->getEngineName() != "Atomic")
throw Exception(ErrorCodes::INCORRECT_QUERY,
"Refreshable materialized view requires Atomic database engine, but database {} has engine {}", create.getDatabase(), database->getEngineName());
/// TODO: Support Replicated databases, only with Shared/ReplicatedMergeTree.
/// Figure out how to make the refreshed data appear all at once on other
/// replicas; maybe a replicated SYSTEM SYNC REPLICA query before the rename?
if (database->getUUID() != UUIDHelpers::Nil)
{
@ -1108,7 +1115,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
}
else
{
bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
bool has_uuid = create.uuid != UUIDHelpers::Nil || create.to_inner_uuid != UUIDHelpers::Nil;
if (has_uuid && !is_on_cluster && !internal)
{
@ -1121,13 +1127,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
"{} UUID specified, but engine of database {} is not Atomic", kind, create.getDatabase());
}
if (create.refresh_strategy && database->getEngineName() != "Atomic")
throw Exception(ErrorCodes::INCORRECT_QUERY,
"Refreshable materialized view requires Atomic database engine, but database {} has engine {}", create.getDatabase(), database->getEngineName());
/// TODO: Support Replicated databases, only with Shared/ReplicatedMergeTree.
/// Figure out how to make the refreshed data appear all at once on other
/// replicas; maybe a replicated SYSTEM SYNC REPLICA query before the rename?
/// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either
/// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or
/// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts.

View File

@ -77,6 +77,7 @@ public:
const DataTypes & getElementsTypes() const { return set_elements_types; }
bool hasExplicitSetElements() const { return fill_set_elements || (!set_elements.empty() && set_elements.front()->size() == data.getTotalRowCount()); }
bool hasSetElements() const { return !set_elements.empty(); }
Columns getSetElements() const { checkIsCreated(); return { set_elements.begin(), set_elements.end() }; }
void checkColumnsNumber(size_t num_key_columns) const;

View File

@ -661,7 +661,7 @@ namespace
const ActionsDAG::NodeRawConstPtrs & target_expr,
ConjunctionMap && conjunction)
{
auto columns = ActionsDAG::evaluatePartialResult(conjunction, target_expr, false);
auto columns = ActionsDAG::evaluatePartialResult(conjunction, target_expr, /* input_rows_count= */ 1, /* throw_on_error= */ false);
for (const auto & column : columns)
if (!column.column)
return {};

View File

@ -663,15 +663,17 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
if (query_span && query_span->trace_id != UUID{})
LOG_TRACE(getLogger("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id);
/// Used for logging query start time in system.query_log
auto query_start_time = std::chrono::system_clock::now();
/// Used to set the watch in QueryStatus and the output formats. It is not based on query_start_time as that might be based on
/// the value passed by the client
/// Used for:
/// * Setting the watch in QueryStatus (controls timeouts and progress) and the output formats
/// * Logging query duration (system.query_log)
Stopwatch start_watch{CLOCK_MONOTONIC};
const auto & client_info = context->getClientInfo();
if (!internal)
if (!internal && client_info.initial_query_start_time == 0)
{
// If it's not an internal query and we don't see an initial_query_start_time yet, initialize it
// to current time. Internal queries are those executed without an independent client context,
@ -679,15 +681,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
// possible to have unset initial_query_start_time for non-internal and non-initial queries. For
// example, the query is from an initiator that is running an old version of clickhouse.
// On the other hand, if it's initialized then take it as the start of the query
if (client_info.initial_query_start_time == 0)
{
context->setInitialQueryStartTime(query_start_time);
}
else
{
query_start_time = std::chrono::time_point<std::chrono::system_clock>(
std::chrono::microseconds{client_info.initial_query_start_time_microseconds});
}
context->setInitialQueryStartTime(query_start_time);
}
assert(internal || CurrentThread::get().getQueryContext());
@ -933,6 +927,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
reason = "asynchronous insert queue is not configured";
else if (insert_query->select)
reason = "insert query has select";
else if (settings.deduplicate_blocks_in_dependent_materialized_views)
reason = "dependent materialized views block deduplication is enabled";
else if (insert_query->hasInlinedData())
async_insert = true;

View File

@ -71,6 +71,13 @@ namespace
size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
/// In all known cases secret arguments are consecutive
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
std::vector<std::string> nested_maps;
bool hasSecrets() const
{
return count != 0 || !nested_maps.empty();
}
};
Result getResult() const { return result; }
@ -127,6 +134,10 @@ namespace
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (function.name == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
@ -143,6 +154,25 @@ namespace
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
size_t count = arguments->size();
while (count > 0)
{
const ASTFunction * f = arguments->at(count - 1)->as<ASTFunction>();
if (!f)
break;
if (f->name == "headers")
result.nested_maps.push_back(f->name);
else if (f->name != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
@ -156,9 +186,10 @@ namespace
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'])
/// s3('url', 'format', 'structure' [, 'compression'])
if ((url_arg_idx + 3 <= arguments->size()) && (arguments->size() <= url_arg_idx + 4))
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
@ -174,7 +205,14 @@ namespace
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
markSecretArgument(url_arg_idx + 2);
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
@ -347,6 +385,10 @@ namespace
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
findS3TableEngineSecretArguments();
}
else if (engine_name == "URL")
{
findURLSecretArguments();
}
}
void findExternalDistributedTableEngineSecretArguments()
@ -373,9 +415,10 @@ namespace
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// S3('url', NOSIGN, 'format' [, 'compression'])
/// S3('url', 'format', 'compression')
if ((3 <= arguments->size()) && (arguments->size() <= 4))
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((3 <= count) && (count <= 4))
{
String second_arg;
if (tryGetStringFromArgument(1, &second_arg))
@ -383,7 +426,7 @@ namespace
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (arguments->size() == 3)
if (count == 3)
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
@ -391,11 +434,12 @@ namespace
}
}
/// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
markSecretArgument(2);
if (2 < count)
markSecretArgument(2);
}
void findDatabaseEngineSecretArguments()
@ -724,6 +768,25 @@ ASTSelectWithUnionQuery * ASTFunction::tryGetQueryArgument() const
}
static bool formatNamedArgWithHiddenValue(IAST * arg, const IAST::FormatSettings & settings, IAST::FormatState & state, IAST::FormatStateStacked frame)
{
const auto * equals_func = arg->as<ASTFunction>();
if (!equals_func || (equals_func->name != "equals"))
return false;
const auto * expr_list = equals_func->arguments->as<ASTExpressionList>();
if (!expr_list)
return false;
const auto & equal_args = expr_list->children;
if (equal_args.size() != 2)
return false;
equal_args[0]->formatImpl(settings, state, frame);
settings.ostr << (settings.hilite ? IAST::hilite_operator : "") << " = " << (settings.hilite ? IAST::hilite_none : "");
settings.ostr << "'[HIDDEN]'";
return true;
}
void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
frame.expression_list_prepend_whitespace = false;
@ -1133,17 +1196,37 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
if (argument->as<ASTSetQuery>())
settings.ostr << "SETTINGS ";
if (!settings.show_secrets && (secret_arguments.start <= i) && (i < secret_arguments.start + secret_arguments.count))
if (!settings.show_secrets)
{
if (secret_arguments.are_named)
if (secret_arguments.start <= i && i < secret_arguments.start + secret_arguments.count)
{
assert_cast<const ASTFunction *>(argument.get())->arguments->children[0]->formatImpl(settings, state, nested_dont_need_parens);
settings.ostr << (settings.hilite ? hilite_operator : "") << " = " << (settings.hilite ? hilite_none : "");
if (secret_arguments.are_named)
{
assert_cast<const ASTFunction *>(argument.get())->arguments->children[0]->formatImpl(settings, state, nested_dont_need_parens);
settings.ostr << (settings.hilite ? hilite_operator : "") << " = " << (settings.hilite ? hilite_none : "");
}
settings.ostr << "'[HIDDEN]'";
if (size <= secret_arguments.start + secret_arguments.count && !secret_arguments.are_named)
break; /// All other arguments should also be hidden.
continue;
}
const ASTFunction * function = argument->as<ASTFunction>();
if (function && function->arguments && std::count(secret_arguments.nested_maps.begin(), secret_arguments.nested_maps.end(), function->name) != 0)
{
/// headers('foo' = '[HIDDEN]', 'bar' = '[HIDDEN]')
settings.ostr << (settings.hilite ? hilite_function : "") << function->name << (settings.hilite ? hilite_none : "") << "(";
for (size_t j = 0; j < function->arguments->children.size(); ++j)
{
if (j != 0)
settings.ostr << ", ";
auto inner_arg = function->arguments->children[j];
if (!formatNamedArgWithHiddenValue(inner_arg.get(), settings, state, nested_dont_need_parens))
inner_arg->formatImpl(settings, state, nested_dont_need_parens);
}
settings.ostr << ")";
continue;
}
settings.ostr << "'[HIDDEN]'";
if (size <= secret_arguments.start + secret_arguments.count && !secret_arguments.are_named)
break; /// All other arguments should also be hidden.
continue;
}
if ((i == 1) && special_hilite_regexp
@ -1166,7 +1249,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
bool ASTFunction::hasSecretParts() const
{
return (FunctionSecretArgumentsFinder{*this}.getResult().count > 0) || childrenHaveSecretParts();
return (FunctionSecretArgumentsFinder{*this}.getResult().hasSecrets()) || childrenHaveSecretParts();
}
String getFunctionName(const IAST * ast)

View File

@ -100,7 +100,7 @@ public:
merged_rows = 0;
sum_blocks_granularity = 0;
++total_chunks;
total_allocated_bytes += chunk.allocatedBytes();
total_allocated_bytes += chunk.bytes();
need_flush = false;
return chunk;
@ -122,7 +122,7 @@ public:
{
size_t merged_bytes = 0;
for (const auto & column : columns)
merged_bytes += column->allocatedBytes();
merged_bytes += column->byteSize();
if (merged_bytes >= max_block_size_bytes)
return true;
}

View File

@ -157,6 +157,34 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::Subqueries subque
query_plan.unitePlans(std::move(creating_sets), std::move(plans));
}
QueryPipelineBuilderPtr addCreatingSetsTransform(QueryPipelineBuilderPtr pipeline, PreparedSets::Subqueries subqueries, ContextPtr context)
{
DataStreams input_streams;
input_streams.emplace_back(DataStream{pipeline->getHeader()});
QueryPipelineBuilders pipelines;
pipelines.reserve(1 + subqueries.size());
pipelines.push_back(std::move(pipeline));
auto plan_settings = QueryPlanOptimizationSettings::fromContext(context);
auto pipeline_settings = BuildQueryPipelineSettings::fromContext(context);
for (auto & future_set : subqueries)
{
if (future_set->get())
continue;
auto plan = future_set->build(context);
if (!plan)
continue;
input_streams.emplace_back(plan->getCurrentDataStream());
pipelines.emplace_back(plan->buildQueryPipeline(plan_settings, pipeline_settings));
}
return CreatingSetsStep(input_streams).updatePipeline(std::move(pipelines), pipeline_settings);
}
std::vector<std::unique_ptr<QueryPlan>> DelayedCreatingSetsStep::makePlansForSets(DelayedCreatingSetsStep && step)
{
std::vector<std::unique_ptr<QueryPlan>> plans;

View File

@ -72,4 +72,6 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::Subqueries subque
void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context);
QueryPipelineBuilderPtr addCreatingSetsTransform(QueryPipelineBuilderPtr pipeline, PreparedSets::Subqueries subqueries, ContextPtr context);
}

View File

@ -12,7 +12,7 @@
#include <Processors/Sources/DelayedSource.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Interpreters/ActionsDAG.h>
#include "Common/logger_useful.h"
#include <Common/logger_useful.h>
#include <Common/checkStackSize.h>
#include <Core/QueryProcessingStage.h>
#include <Client/ConnectionPool.h>
@ -375,10 +375,11 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
, storage_limits(std::move(storage_limits_))
, log(log_)
{
std::vector<String> description;
chassert(cluster->getShardCount() == 1);
for (const auto & address : cluster->getShardsAddresses())
description.push_back(fmt::format("Replica: {}", address[0].host_name));
std::vector<String> description;
for (const auto & pool : cluster->getShardsInfo().front().per_replica_pools)
description.push_back(fmt::format("Replica: {}", pool->getHost()));
setStepDescription(boost::algorithm::join(description, ", "));
}
@ -399,51 +400,44 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
const Settings & current_settings = context->getSettingsRef();
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
const auto & shard = cluster->getShardsInfo().at(0);
size_t all_replicas_count = current_settings.max_parallel_replicas;
if (all_replicas_count > cluster->getShardsInfo().size())
if (all_replicas_count > shard.getAllNodeCount())
{
LOG_INFO(getLogger("ReadFromParallelRemoteReplicasStep"),
"The number of replicas requested ({}) is bigger than the real number available in the cluster ({}). "\
"Will use the latter number to execute the query.", current_settings.max_parallel_replicas, cluster->getShardsInfo().size());
all_replicas_count = cluster->getShardsInfo().size();
LOG_INFO(
getLogger("ReadFromParallelRemoteReplicasStep"),
"The number of replicas requested ({}) is bigger than the real number available in the cluster ({}). "
"Will use the latter number to execute the query.",
current_settings.max_parallel_replicas,
shard.getAllNodeCount());
all_replicas_count = shard.getAllNodeCount();
}
/// Find local shard. It might happen that there is no local shard, but that's fine
for (const auto & shard: cluster->getShardsInfo())
{
if (shard.isLocal())
{
IConnections::ReplicaInfo replica_info
{
.all_replicas_count = all_replicas_count,
/// `shard_num` will be equal to the number of the given replica in the cluster (set by `Cluster::getClusterWithReplicasAsShards`).
/// we should use this number specifically because efficiency of data distribution by consistent hash depends on it.
.number_of_current_replica = shard.shard_num - 1,
};
addPipeForSingeReplica(pipes, shard.pool, replica_info);
}
std::vector<ConnectionPoolWithFailover::Base::ShuffledPool> shuffled_pool;
if (all_replicas_count < shard.getAllNodeCount())
{
shuffled_pool = shard.pool->getShuffledPools(current_settings);
shuffled_pool.resize(all_replicas_count);
}
else
{
/// try to preserve replicas order if all replicas in cluster are used for query execution
/// it's important for data locality during query execution
auto priority_func = [](size_t i) { return Priority{static_cast<Int64>(i)}; };
shuffled_pool = shard.pool->getShuffledPools(current_settings, priority_func);
}
auto current_shard = cluster->getShardsInfo().begin();
while (pipes.size() != all_replicas_count)
for (size_t i=0; i < all_replicas_count; ++i)
{
if (current_shard->isLocal())
{
++current_shard;
continue;
}
IConnections::ReplicaInfo replica_info
{
.all_replicas_count = all_replicas_count,
/// `shard_num` will be equal to the number of the given replica in the cluster (set by `Cluster::getClusterWithReplicasAsShards`).
/// we should use this number specifically because efficiency of data distribution by consistent hash depends on it.
.number_of_current_replica = current_shard->shard_num - 1,
.number_of_current_replica = i,
};
addPipeForSingeReplica(pipes, current_shard->pool, replica_info);
++current_shard;
addPipeForSingeReplica(pipes, shuffled_pool[i].pool, replica_info);
}
auto pipe = Pipe::unitePipes(std::move(pipes));
@ -456,7 +450,8 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
}
void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(Pipes & pipes, std::shared_ptr<ConnectionPoolWithFailover> pool, IConnections::ReplicaInfo replica_info)
void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(
Pipes & pipes, const ConnectionPoolPtr & pool, IConnections::ReplicaInfo replica_info)
{
bool add_agg_info = stage == QueryProcessingStage::WithMergeableState;
bool add_totals = false;
@ -476,7 +471,14 @@ void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(Pipes & pipes, s
assert(output_stream);
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage,
pool,
query_string,
output_stream->header,
context,
throttler,
scalars,
external_tables,
stage,
RemoteQueryExecutor::Extension{.parallel_reading_coordinator = coordinator, .replica_info = std::move(replica_info)});
remote_query_executor->setLogger(log);

View File

@ -9,10 +9,6 @@
namespace DB
{
class ConnectionPoolWithFailover;
using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>;
class Throttler;
using ThrottlerPtr = std::shared_ptr<Throttler>;
@ -91,8 +87,7 @@ public:
void enforceAggregationInOrder();
private:
void addPipeForSingeReplica(Pipes & pipes, std::shared_ptr<ConnectionPoolWithFailover> pool, IConnections::ReplicaInfo replica_info);
void addPipeForSingeReplica(Pipes & pipes, const ConnectionPoolPtr & pool, IConnections::ReplicaInfo replica_info);
ClusterPtr cluster;
ASTPtr query_ast;

View File

@ -11,8 +11,9 @@ namespace ErrorCodes
}
ITTLAlgorithm::ITTLAlgorithm(
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
: description(description_)
const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
: ttl_expressions(ttl_expressions_)
, description(description_)
, old_ttl_info(old_ttl_info_)
, current_time(current_time_)
, force(force_)

View File

@ -8,6 +8,12 @@
namespace DB
{
struct TTLExpressions
{
ExpressionActionsPtr expression;
ExpressionActionsPtr where_expression;
};
/**
* Represents the actions, which are required to do
* with data, when TTL is expired: delete, aggregate, etc.
@ -18,7 +24,7 @@ public:
using TTLInfo = IMergeTreeDataPart::TTLInfo;
using MutableDataPartPtr = MergeTreeMutableDataPartPtr;
ITTLAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
ITTLAlgorithm(const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
virtual ~ITTLAlgorithm() = default;
virtual void execute(Block & block) = 0;
@ -39,6 +45,7 @@ protected:
bool isTTLExpired(time_t ttl) const;
UInt32 getTimestampByIndex(const IColumn * column, size_t index) const;
const TTLExpressions ttl_expressions;
const TTLDescription description;
const TTLInfo old_ttl_info;
const time_t current_time;

View File

@ -5,13 +5,14 @@ namespace DB
{
TTLAggregationAlgorithm::TTLAggregationAlgorithm(
const TTLExpressions & ttl_expressions_,
const TTLDescription & description_,
const TTLInfo & old_ttl_info_,
time_t current_time_,
bool force_,
const Block & header_,
const MergeTreeData & storage_)
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
: ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
, header(header_)
{
current_key_value.resize(description.group_by_keys.size());
@ -75,8 +76,8 @@ void TTLAggregationAlgorithm::execute(Block & block)
const auto & column_names = header.getNames();
MutableColumns aggregate_columns = header.cloneEmptyColumns();
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
auto where_column = executeExpressionAndGetColumn(ttl_expressions.where_expression, block, description.where_result_column);
size_t rows_aggregated = 0;
size_t current_key_start = 0;
@ -157,8 +158,8 @@ void TTLAggregationAlgorithm::execute(Block & block)
/// If some rows were aggregated we have to recalculate ttl info's
if (some_rows_were_aggregated)
{
auto ttl_column_after_aggregation = executeExpressionAndGetColumn(description.expression, block, description.result_column);
auto where_column_after_aggregation = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
auto ttl_column_after_aggregation = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
auto where_column_after_aggregation = executeExpressionAndGetColumn(ttl_expressions.where_expression, block, description.where_result_column);
for (size_t i = 0; i < block.rows(); ++i)
{
bool where_filter_passed = !where_column_after_aggregation || where_column_after_aggregation->getBool(i);

View File

@ -13,6 +13,7 @@ class TTLAggregationAlgorithm final : public ITTLAlgorithm
{
public:
TTLAggregationAlgorithm(
const TTLExpressions & ttl_expressions_,
const TTLDescription & description_,
const TTLInfo & old_ttl_info_,
time_t current_time_,

View File

@ -4,6 +4,7 @@ namespace DB
{
TTLColumnAlgorithm::TTLColumnAlgorithm(
const TTLExpressions & ttl_expressions_,
const TTLDescription & description_,
const TTLInfo & old_ttl_info_,
time_t current_time_,
@ -12,7 +13,7 @@ TTLColumnAlgorithm::TTLColumnAlgorithm(
const ExpressionActionsPtr & default_expression_,
const String & default_column_name_,
bool is_compact_part_)
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
: ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
, column_name(column_name_)
, default_expression(default_expression_)
, default_column_name(default_column_name_)
@ -49,7 +50,7 @@ void TTLColumnAlgorithm::execute(Block & block)
if (default_column)
default_column = default_column->convertToFullColumnIfConst();
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
auto & column_with_type = block.getByName(column_name);
const IColumn * values_column = column_with_type.column.get();

View File

@ -11,6 +11,7 @@ class TTLColumnAlgorithm final : public ITTLAlgorithm
{
public:
TTLColumnAlgorithm(
const TTLExpressions & ttl_expressions_,
const TTLDescription & description_,
const TTLInfo & old_ttl_info_,
time_t current_time_,

View File

@ -4,8 +4,8 @@ namespace DB
{
TTLDeleteAlgorithm::TTLDeleteAlgorithm(
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
: ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
{
if (!isMinTTLExpired())
new_ttl_info = old_ttl_info;
@ -19,8 +19,8 @@ void TTLDeleteAlgorithm::execute(Block & block)
if (!block || !isMinTTLExpired())
return;
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
auto where_column = executeExpressionAndGetColumn(ttl_expressions.where_expression, block, description.where_result_column);
MutableColumns result_columns;
const auto & column_names = block.getNames();
@ -54,7 +54,7 @@ void TTLDeleteAlgorithm::execute(Block & block)
void TTLDeleteAlgorithm::finalize(const MutableDataPartPtr & data_part) const
{
if (description.where_expression)
if (ttl_expressions.where_expression)
data_part->ttl_infos.rows_where_ttl[description.result_column] = new_ttl_info;
else
data_part->ttl_infos.table_ttl = new_ttl_info;

View File

@ -10,7 +10,7 @@ namespace DB
class TTLDeleteAlgorithm final : public ITTLAlgorithm
{
public:
TTLDeleteAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
TTLDeleteAlgorithm(const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
void execute(Block & block) override;
void finalize(const MutableDataPartPtr & data_part) const override;

View File

@ -4,13 +4,14 @@ namespace DB
{
TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm(
const TTLExpressions & ttl_expressions_,
const TTLDescription & description_,
const TTLUpdateField ttl_update_field_,
const String ttl_update_key_,
const TTLInfo & old_ttl_info_,
time_t current_time_,
bool force_)
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
: ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
, ttl_update_field(ttl_update_field_)
, ttl_update_key(ttl_update_key_)
{
@ -21,7 +22,7 @@ void TTLUpdateInfoAlgorithm::execute(Block & block)
if (!block)
return;
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
for (size_t i = 0; i < block.rows(); ++i)
{
UInt32 cur_ttl = ITTLAlgorithm::getTimestampByIndex(ttl_column.get(), i);

View File

@ -20,6 +20,7 @@ class TTLUpdateInfoAlgorithm : public ITTLAlgorithm
{
public:
TTLUpdateInfoAlgorithm(
const TTLExpressions & ttl_expressions_,
const TTLDescription & description_,
const TTLUpdateField ttl_update_field_,
const String ttl_update_key_,

View File

@ -145,10 +145,14 @@ void ColumnGathererStream::gather(Column & column_res)
next_required_source = -1;
while (row_source_pos < row_sources_end
&& column_res.size() < block_preferred_size_rows
&& column_res.allocatedBytes() < block_preferred_size_bytes)
/// We use do ... while here to ensure there will be at least one iteration of this loop.
/// Because the column_res.byteSize() could be bigger than block_preferred_size_bytes already at this point.
do
{
if (row_source_pos >= row_sources_end)
break;
RowSourcePart row_source = *row_source_pos;
size_t source_num = row_source.getSourceNum();
Source & source = sources[source_num];
@ -191,7 +195,7 @@ void ColumnGathererStream::gather(Column & column_res)
}
source.pos += len;
}
} while (column_res.size() < block_preferred_size_rows && column_res.byteSize() < block_preferred_size_bytes);
}
}

View File

@ -4,7 +4,24 @@
namespace DB
{
static TTLExpressions getExpressions(const TTLDescription & ttl_descr, PreparedSets::Subqueries & subqueries_for_sets, const ContextPtr & context)
{
auto expr = ttl_descr.buildExpression(context);
auto expr_queries = expr.sets->getSubqueries();
subqueries_for_sets.insert(subqueries_for_sets.end(), expr_queries.begin(), expr_queries.end());
auto where_expr = ttl_descr.buildWhereExpression(context);
if (where_expr.sets)
{
auto where_expr_queries = where_expr.sets->getSubqueries();
subqueries_for_sets.insert(subqueries_for_sets.end(), where_expr_queries.begin(), where_expr_queries.end());
}
return {expr.expression, where_expr.expression};
}
TTLCalcTransform::TTLCalcTransform(
const ContextPtr & context,
const Block & header_,
const MergeTreeData & storage_,
const StorageMetadataPtr & metadata_snapshot_,
@ -21,33 +38,39 @@ TTLCalcTransform::TTLCalcTransform(
{
const auto & rows_ttl = metadata_snapshot_->getRowsTTL();
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
rows_ttl, TTLUpdateField::TABLE_TTL, rows_ttl.result_column, old_ttl_infos.table_ttl, current_time_, force_));
getExpressions(rows_ttl, subqueries_for_sets, context), rows_ttl,
TTLUpdateField::TABLE_TTL, rows_ttl.result_column, old_ttl_infos.table_ttl, current_time_, force_));
}
for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs())
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
where_ttl, TTLUpdateField::ROWS_WHERE_TTL, where_ttl.result_column, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
getExpressions(where_ttl, subqueries_for_sets, context), where_ttl,
TTLUpdateField::ROWS_WHERE_TTL, where_ttl.result_column, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
group_by_ttl, TTLUpdateField::GROUP_BY_TTL, group_by_ttl.result_column, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_));
getExpressions(group_by_ttl, subqueries_for_sets, context), group_by_ttl,
TTLUpdateField::GROUP_BY_TTL, group_by_ttl.result_column, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_));
if (metadata_snapshot_->hasAnyColumnTTL())
{
for (const auto & [name, description] : metadata_snapshot_->getColumnTTLs())
{
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
description, TTLUpdateField::COLUMNS_TTL, name, old_ttl_infos.columns_ttl[name], current_time_, force_));
getExpressions(description, subqueries_for_sets, context), description,
TTLUpdateField::COLUMNS_TTL, name, old_ttl_infos.columns_ttl[name], current_time_, force_));
}
}
for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs())
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
move_ttl, TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
getExpressions(move_ttl, subqueries_for_sets, context), move_ttl,
TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs())
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
getExpressions(recompression_ttl, subqueries_for_sets, context), recompression_ttl,
TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
}
void TTLCalcTransform::consume(Chunk chunk)

View File

@ -15,6 +15,7 @@ class TTLCalcTransform : public IAccumulatingTransform
{
public:
TTLCalcTransform(
const ContextPtr & context,
const Block & header_,
const MergeTreeData & storage_,
const StorageMetadataPtr & metadata_snapshot_,
@ -23,6 +24,8 @@ public:
bool force_
);
PreparedSets::Subqueries getSubqueries() { return std::move(subqueries_for_sets); }
String getName() const override { return "TTL_CALC"; }
Status prepare() override;
@ -35,6 +38,7 @@ protected:
private:
std::vector<TTLAlgorithmPtr> algorithms;
PreparedSets::Subqueries subqueries_for_sets;
/// ttl_infos and empty_columns are updating while reading
const MergeTreeData::MutableDataPartPtr & data_part;

View File

@ -16,7 +16,24 @@
namespace DB
{
static TTLExpressions getExpressions(const TTLDescription & ttl_descr, PreparedSets::Subqueries & subqueries_for_sets, const ContextPtr & context)
{
auto expr = ttl_descr.buildExpression(context);
auto expr_queries = expr.sets->getSubqueries();
subqueries_for_sets.insert(subqueries_for_sets.end(), expr_queries.begin(), expr_queries.end());
auto where_expr = ttl_descr.buildWhereExpression(context);
if (where_expr.sets)
{
auto where_expr_queries = where_expr.sets->getSubqueries();
subqueries_for_sets.insert(subqueries_for_sets.end(), where_expr_queries.begin(), where_expr_queries.end());
}
return {expr.expression, where_expr.expression};
}
TTLTransform::TTLTransform(
const ContextPtr & context,
const Block & header_,
const MergeTreeData & storage_,
const StorageMetadataPtr & metadata_snapshot_,
@ -33,10 +50,11 @@ TTLTransform::TTLTransform(
{
const auto & rows_ttl = metadata_snapshot_->getRowsTTL();
auto algorithm = std::make_unique<TTLDeleteAlgorithm>(
rows_ttl, old_ttl_infos.table_ttl, current_time_, force_);
getExpressions(rows_ttl, subqueries_for_sets, context), rows_ttl,
old_ttl_infos.table_ttl, current_time_, force_);
/// Skip all data if table ttl is expired for part
if (algorithm->isMaxTTLExpired() && !rows_ttl.where_expression)
if (algorithm->isMaxTTLExpired() && !rows_ttl.where_expression_ast)
all_data_dropped = true;
delete_algorithm = algorithm.get();
@ -45,11 +63,13 @@ TTLTransform::TTLTransform(
for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs())
algorithms.emplace_back(std::make_unique<TTLDeleteAlgorithm>(
where_ttl, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
getExpressions(where_ttl, subqueries_for_sets, context), where_ttl,
old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
algorithms.emplace_back(std::make_unique<TTLAggregationAlgorithm>(
group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_,
getExpressions(group_by_ttl, subqueries_for_sets, context), group_by_ttl,
old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_,
getInputPort().getHeader(), storage_));
if (metadata_snapshot_->hasAnyColumnTTL())
@ -75,18 +95,21 @@ TTLTransform::TTLTransform(
}
algorithms.emplace_back(std::make_unique<TTLColumnAlgorithm>(
description, old_ttl_infos.columns_ttl[name], current_time_,
getExpressions(description, subqueries_for_sets, context), description,
old_ttl_infos.columns_ttl[name], current_time_,
force_, name, default_expression, default_column_name, isCompactPart(data_part)));
}
}
for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs())
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
move_ttl, TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
getExpressions(move_ttl, subqueries_for_sets, context), move_ttl,
TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs())
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
getExpressions(recompression_ttl, subqueries_for_sets, context), recompression_ttl,
TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
}
Block reorderColumns(Block block, const Block & header)

View File

@ -16,6 +16,7 @@ class TTLTransform : public IAccumulatingTransform
{
public:
TTLTransform(
const ContextPtr & context,
const Block & header_,
const MergeTreeData & storage_,
const StorageMetadataPtr & metadata_snapshot_,
@ -28,6 +29,8 @@ public:
Status prepare() override;
PreparedSets::Subqueries getSubqueries() { return std::move(subqueries_for_sets); }
protected:
void consume(Chunk chunk) override;
Chunk generate() override;
@ -40,6 +43,8 @@ private:
const TTLDeleteAlgorithm * delete_algorithm = nullptr;
bool all_data_dropped = false;
PreparedSets::Subqueries subqueries_for_sets;
/// ttl_infos and empty_columns are updating while reading
const MergeTreeData::MutableDataPartPtr & data_part;
LoggerPtr log;

View File

@ -4,7 +4,7 @@
#include <Columns/ColumnConst.h>
#include <Common/CurrentThread.h>
#include "Core/Protocol.h"
#include <Core/Protocol.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
@ -17,6 +17,7 @@
#include <Interpreters/Context.h>
#include <Interpreters/InternalTextLogsQueue.h>
#include <IO/ConnectionTimeouts.h>
#include <Client/ConnectionEstablisher.h>
#include <Client/MultiplexedConnections.h>
#include <Client/HedgedConnections.h>
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
@ -29,6 +30,7 @@ namespace ProfileEvents
extern const Event SuspendSendingQueryToShard;
extern const Event ReadTaskRequestsReceived;
extern const Event MergeTreeReadTaskRequestsReceived;
extern const Event ParallelReplicasAvailableCount;
}
namespace DB
@ -62,6 +64,55 @@ RemoteQueryExecutor::RemoteQueryExecutor(
{
}
RemoteQueryExecutor::RemoteQueryExecutor(
ConnectionPoolPtr pool,
const String & query_,
const Block & header_,
ContextPtr context_,
ThrottlerPtr throttler,
const Scalars & scalars_,
const Tables & external_tables_,
QueryProcessingStage::Enum stage_,
std::optional<Extension> extension_)
: RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_)
{
create_connections = [this, pool, throttler, extension_](AsyncCallback)
{
const Settings & current_settings = context->getSettingsRef();
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
ConnectionPoolWithFailover::TryResult result;
std::string fail_message;
if (main_table)
{
auto table_name = main_table.getQualifiedName();
ConnectionEstablisher connection_establisher(pool, &timeouts, current_settings, log, &table_name);
connection_establisher.run(result, fail_message);
}
else
{
ConnectionEstablisher connection_establisher(pool, &timeouts, current_settings, log, nullptr);
connection_establisher.run(result, fail_message);
}
std::vector<IConnectionPool::Entry> connection_entries;
if (!result.entry.isNull() && result.is_usable)
{
if (extension_ && extension_->parallel_reading_coordinator)
ProfileEvents::increment(ProfileEvents::ParallelReplicasAvailableCount);
connection_entries.emplace_back(std::move(result.entry));
}
auto res = std::make_unique<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler);
if (extension_ && extension_->replica_info)
res->setReplicaInfo(*extension_->replica_info);
return res;
};
}
RemoteQueryExecutor::RemoteQueryExecutor(
Connection & connection,
const String & query_,

View File

@ -52,6 +52,18 @@ public:
std::optional<IConnections::ReplicaInfo> replica_info = {};
};
/// Takes a connection pool for a node (not cluster)
RemoteQueryExecutor(
ConnectionPoolPtr pool,
const String & query_,
const Block & header_,
ContextPtr context_,
ThrottlerPtr throttler = nullptr,
const Scalars & scalars_ = Scalars(),
const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete,
std::optional<Extension> extension_ = std::nullopt);
/// Takes already set connection.
RemoteQueryExecutor(
Connection & connection,

View File

@ -76,7 +76,7 @@ namespace
static std::once_flag once_flag;
std::call_once(once_flag, [&config]
{
static LoggerPtr logger = getLogger("grpc");
static LoggerRawPtr logger = getRawLogger("grpc");
gpr_set_log_function([](gpr_log_func_args* args)
{
if (args->severity == GPR_LOG_SEVERITY_DEBUG)
@ -622,7 +622,7 @@ namespace
class Call
{
public:
Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerPtr log_);
Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerRawPtr log_);
~Call();
void start(const std::function<void(void)> & on_finish_call_callback);
@ -664,7 +664,7 @@ namespace
const CallType call_type;
std::unique_ptr<BaseResponder> responder;
IServer & iserver;
LoggerPtr log = nullptr;
LoggerRawPtr log = nullptr;
std::optional<Session> session;
ContextMutablePtr query_context;
@ -726,7 +726,7 @@ namespace
};
// NOLINTEND(clang-analyzer-optin.performance.Padding)
Call::Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerPtr log_)
Call::Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerRawPtr log_)
: call_type(call_type_), responder(std::move(responder_)), iserver(iserver_), log(log_)
{
}
@ -1851,7 +1851,7 @@ private:
GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & address_to_listen_)
: iserver(iserver_)
, address_to_listen(address_to_listen_)
, log(getLogger("GRPCServer"))
, log(getRawLogger("GRPCServer"))
, runner(std::make_unique<Runner>(*this))
{}

View File

@ -3,10 +3,11 @@
#include "config.h"
#if USE_GRPC
#include "clickhouse_grpc.grpc.pb.h"
#include <Poco/Net/SocketAddress.h>
#include <base/types.h>
#include <Common/Logger.h>
#include "clickhouse_grpc.grpc.pb.h"
namespace Poco { class Logger; }
@ -47,7 +48,7 @@ private:
IServer & iserver;
const Poco::Net::SocketAddress address_to_listen;
LoggerPtr log;
LoggerRawPtr log;
GRPCService grpc_service;
std::unique_ptr<grpc::Server> grpc_server;
std::unique_ptr<grpc::ServerCompletionQueue> queue;

View File

@ -933,7 +933,7 @@ void TCPHandler::processInsertQuery()
if (auto table = DatabaseCatalog::instance().tryGetTable(insert_query.table_id, query_context))
async_insert_enabled |= table->areAsynchronousInsertsEnabled();
if (insert_queue && async_insert_enabled && !insert_query.select)
if (insert_queue && async_insert_enabled && !insert_query.select && !settings.deduplicate_blocks_in_dependent_materialized_views)
{
auto result = processAsyncInsertQuery(*insert_queue);
if (result.status == AsynchronousInsertQueue::PushResult::OK)

View File

@ -33,6 +33,9 @@
#include <Processors/Transforms/TTLCalcTransform.h>
#include <Processors/Transforms/DistinctSortedTransform.h>
#include <Processors/Transforms/DistinctTransform.h>
#include <Processors/QueryPlan/CreatingSetsStep.h>
#include <Interpreters/PreparedSets.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
namespace DB
{
@ -1056,13 +1059,14 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
break;
}
auto res_pipe = Pipe::unitePipes(std::move(pipes));
res_pipe.addTransform(std::move(merged_transform));
auto builder = std::make_unique<QueryPipelineBuilder>();
builder->init(Pipe::unitePipes(std::move(pipes)));
builder->addTransform(std::move(merged_transform));
#ifndef NDEBUG
if (!sort_description.empty())
{
res_pipe.addSimpleTransform([&](const Block & header_)
builder->addSimpleTransform([&](const Block & header_)
{
auto transform = std::make_shared<CheckSortedTransform>(header_, sort_description);
return transform;
@ -1084,26 +1088,34 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
}
if (DistinctSortedTransform::isApplicable(header, sort_description, global_ctx->deduplicate_by_columns))
res_pipe.addTransform(std::make_shared<DistinctSortedTransform>(
res_pipe.getHeader(), sort_description, SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
builder->addTransform(std::make_shared<DistinctSortedTransform>(
builder->getHeader(), sort_description, SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
else
res_pipe.addTransform(std::make_shared<DistinctTransform>(
res_pipe.getHeader(), SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
builder->addTransform(std::make_shared<DistinctTransform>(
builder->getHeader(), SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
}
PreparedSets::Subqueries subqueries;
if (ctx->need_remove_expired_values)
res_pipe.addTransform(std::make_shared<TTLTransform>(
res_pipe.getHeader(), *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl));
{
auto transform = std::make_shared<TTLTransform>(global_ctx->context, builder->getHeader(), *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl);
subqueries = transform->getSubqueries();
builder->addTransform(std::move(transform));
}
if (global_ctx->metadata_snapshot->hasSecondaryIndices())
{
const auto & indices = global_ctx->metadata_snapshot->getSecondaryIndices();
res_pipe.addTransform(std::make_shared<ExpressionTransform>(
res_pipe.getHeader(), indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())));
res_pipe.addTransform(std::make_shared<MaterializingTransform>(res_pipe.getHeader()));
builder->addTransform(std::make_shared<ExpressionTransform>(
builder->getHeader(), indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())));
builder->addTransform(std::make_shared<MaterializingTransform>(builder->getHeader()));
}
global_ctx->merged_pipeline = QueryPipeline(std::move(res_pipe));
if (!subqueries.empty())
builder = addCreatingSetsTransform(std::move(builder), std::move(subqueries), global_ctx->context);
global_ctx->merged_pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
/// Dereference unique_ptr and pass horizontal_stage_progress by reference
global_ctx->merged_pipeline.setProgressCallback(MergeProgressCallback(global_ctx->merge_list_element_ptr, global_ctx->watch_prev_elapsed, *global_ctx->horizontal_stage_progress));
/// Is calculated inside MergeProgressCallback.

View File

@ -132,13 +132,18 @@ void buildScatterSelector(
/// Computes ttls and updates ttl infos
void updateTTL(
const ContextPtr context,
const TTLDescription & ttl_entry,
IMergeTreeDataPart::TTLInfos & ttl_infos,
DB::MergeTreeDataPartTTLInfo & ttl_info,
const Block & block,
bool update_part_min_max_ttls)
{
auto ttl_column = ITTLAlgorithm::executeExpressionAndGetColumn(ttl_entry.expression, block, ttl_entry.result_column);
auto expr_and_set = ttl_entry.buildExpression(context);
for (auto & subquery : expr_and_set.sets->getSubqueries())
subquery->buildSetInplace(context);
auto ttl_column = ITTLAlgorithm::executeExpressionAndGetColumn(expr_and_set.expression, block, ttl_entry.result_column);
if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(ttl_column.get()))
{
@ -507,7 +512,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
DB::IMergeTreeDataPart::TTLInfos move_ttl_infos;
const auto & move_ttl_entries = metadata_snapshot->getMoveTTLs();
for (const auto & ttl_entry : move_ttl_entries)
updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
updateTTL(context, ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
ReservationPtr reservation = data.reserveSpacePreferringTTLRules(metadata_snapshot, expected_size, move_ttl_infos, time(nullptr), 0, true);
VolumePtr volume = data.getStoragePolicy()->getVolume(0);
@ -562,20 +567,20 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
}
if (metadata_snapshot->hasRowsTTL())
updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
updateTTL(context, metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
for (const auto & ttl_entry : metadata_snapshot->getGroupByTTLs())
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true);
updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true);
for (const auto & ttl_entry : metadata_snapshot->getRowsWhereTTLs())
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true);
updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true);
for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
for (const auto & ttl_entry : recompression_ttl_entries)
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.recompression_ttl[ttl_entry.result_column], block, false);
updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.recompression_ttl[ttl_entry.result_column], block, false);
new_data_part->ttl_infos.update(move_ttl_infos);

View File

@ -15,6 +15,7 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
@ -217,7 +218,20 @@ MergeTreeIndexPtr minmaxIndexCreator(
return std::make_shared<MergeTreeIndexMinMax>(index);
}
void minmaxIndexValidator(const IndexDescription & /* index */, bool /* attach */)
void minmaxIndexValidator(const IndexDescription & index, bool attach)
{
if (attach)
return;
for (const auto & column : index.sample_block)
{
if (!column.type->isComparable())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Data type of argument for minmax index must be comparable, got {} type for column {} instead",
column.type->getName(), column.name);
}
}
}
}

View File

@ -8,6 +8,7 @@
#include <Parsers/queryToString.h>
#include <Interpreters/SquashingTransform.h>
#include <Interpreters/MergeTreeTransaction.h>
#include <Interpreters/PreparedSets.h>
#include <Processors/Transforms/TTLTransform.h>
#include <Processors/Transforms/TTLCalcTransform.h>
#include <Processors/Transforms/DistinctSortedTransform.h>
@ -16,6 +17,7 @@
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/MaterializingTransform.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/QueryPlan/CreatingSetsStep.h>
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
#include <Storages/MergeTree/MergeTreeDataWriter.h>
#include <Storages/MutationCommands.h>
@ -1552,21 +1554,34 @@ private:
if (!ctx->mutating_pipeline_builder.initialized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot mutate part columns with uninitialized mutations stream. It's a bug");
QueryPipelineBuilder builder(std::move(ctx->mutating_pipeline_builder));
auto builder = std::make_unique<QueryPipelineBuilder>(std::move(ctx->mutating_pipeline_builder));
if (ctx->metadata_snapshot->hasPrimaryKey() || ctx->metadata_snapshot->hasSecondaryIndices())
{
builder.addTransform(std::make_shared<ExpressionTransform>(
builder.getHeader(), ctx->data->getPrimaryKeyAndSkipIndicesExpression(ctx->metadata_snapshot, skip_indices)));
builder->addTransform(std::make_shared<ExpressionTransform>(
builder->getHeader(), ctx->data->getPrimaryKeyAndSkipIndicesExpression(ctx->metadata_snapshot, skip_indices)));
builder.addTransform(std::make_shared<MaterializingTransform>(builder.getHeader()));
builder->addTransform(std::make_shared<MaterializingTransform>(builder->getHeader()));
}
PreparedSets::Subqueries subqueries;
if (ctx->execute_ttl_type == ExecuteTTLType::NORMAL)
builder.addTransform(std::make_shared<TTLTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
{
auto transform = std::make_shared<TTLTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
subqueries = transform->getSubqueries();
builder->addTransform(std::move(transform));
}
if (ctx->execute_ttl_type == ExecuteTTLType::RECALCULATE)
builder.addTransform(std::make_shared<TTLCalcTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
{
auto transform = std::make_shared<TTLCalcTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
subqueries = transform->getSubqueries();
builder->addTransform(std::move(transform));
}
if (!subqueries.empty())
builder = addCreatingSetsTransform(std::move(builder), std::move(subqueries), ctx->context);
ctx->minmax_idx = std::make_shared<IMergeTreeDataPart::MinMaxIndex>();
@ -1600,7 +1615,7 @@ private:
ctx->context->getWriteSettings(),
computed_granularity);
ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
ctx->mutating_pipeline.setProgressCallback(ctx->progress_callback);
/// Is calculated inside MergeProgressCallback.
ctx->mutating_pipeline.disableProfileEventUpdate();
@ -1796,13 +1811,25 @@ private:
if (ctx->mutating_pipeline_builder.initialized())
{
QueryPipelineBuilder builder(std::move(ctx->mutating_pipeline_builder));
auto builder = std::make_unique<QueryPipelineBuilder>(std::move(ctx->mutating_pipeline_builder));
PreparedSets::Subqueries subqueries;
if (ctx->execute_ttl_type == ExecuteTTLType::NORMAL)
builder.addTransform(std::make_shared<TTLTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
{
auto transform = std::make_shared<TTLTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
subqueries = transform->getSubqueries();
builder->addTransform(std::move(transform));
}
if (ctx->execute_ttl_type == ExecuteTTLType::RECALCULATE)
builder.addTransform(std::make_shared<TTLCalcTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
{
auto transform = std::make_shared<TTLCalcTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
subqueries = transform->getSubqueries();
builder->addTransform(std::move(transform));
}
if (!subqueries.empty())
builder = addCreatingSetsTransform(std::move(builder), std::move(subqueries), ctx->context);
ctx->out = std::make_shared<MergedColumnOnlyOutputStream>(
ctx->new_data_part,
@ -1816,7 +1843,7 @@ private:
&ctx->source_part->index_granularity_info
);
ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
ctx->mutating_pipeline.setProgressCallback(ctx->progress_callback);
/// Is calculated inside MergeProgressCallback.
ctx->mutating_pipeline.disableProfileEventUpdate();

View File

@ -97,11 +97,9 @@ extern const Event ParallelReplicasCollectingOwnedSegmentsMicroseconds;
extern const Event ParallelReplicasReadAssignedMarks;
extern const Event ParallelReplicasReadUnassignedMarks;
extern const Event ParallelReplicasReadAssignedForStealingMarks;
}
namespace ProfileEvents
{
extern const Event ParallelReplicasUsedCount;
extern const Event ParallelReplicasUsedCount;
extern const Event ParallelReplicasUnavailableCount;
}
namespace DB
@ -1025,6 +1023,8 @@ ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelR
void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica_number)
{
ProfileEvents::increment(ProfileEvents::ParallelReplicasUnavailableCount);
std::lock_guard lock(mutex);
if (!pimpl)

View File

@ -297,7 +297,7 @@ void registerStorageAzureBlob(StorageFactory & factory)
return std::make_shared<StorageAzureBlob>(
std::move(configuration),
std::make_unique<AzureObjectStorage>("AzureBlobStorage", std::move(client), std::move(settings)),
std::make_unique<AzureObjectStorage>("AzureBlobStorage", std::move(client), std::move(settings),configuration.container),
args.getContext(),
args.table_id,
args.columns,

View File

@ -200,7 +200,7 @@ TTLDescription StorageInMemoryMetadata::getRowsTTL() const
bool StorageInMemoryMetadata::hasRowsTTL() const
{
return table_ttl.rows_ttl.expression != nullptr;
return table_ttl.rows_ttl.expression_ast != nullptr;
}
TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTLs() const
@ -258,9 +258,8 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
NameSet required_ttl_columns;
NameSet updated_ttl_columns;
auto add_dependent_columns = [&updated_columns](const auto & expression, auto & to_set)
auto add_dependent_columns = [&updated_columns](const Names & required_columns, auto & to_set)
{
auto required_columns = expression->getRequiredColumns();
for (const auto & dependency : required_columns)
{
if (updated_columns.contains(dependency))
@ -276,18 +275,18 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
for (const auto & index : getSecondaryIndices())
{
if (has_dependency(index.name, ColumnDependency::SKIP_INDEX))
add_dependent_columns(index.expression, indices_columns);
add_dependent_columns(index.expression->getRequiredColumns(), indices_columns);
}
for (const auto & projection : getProjections())
{
if (has_dependency(projection.name, ColumnDependency::PROJECTION))
add_dependent_columns(&projection, projections_columns);
add_dependent_columns(projection.getRequiredColumns(), projections_columns);
}
auto add_for_rows_ttl = [&](const auto & expression, auto & to_set)
{
if (add_dependent_columns(expression, to_set) && include_ttl_target)
if (add_dependent_columns(expression.getNames(), to_set) && include_ttl_target)
{
/// Filter all columns, if rows TTL expression have to be recalculated.
for (const auto & column : getColumns().getAllPhysical())
@ -296,25 +295,25 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
};
if (hasRowsTTL())
add_for_rows_ttl(getRowsTTL().expression, required_ttl_columns);
add_for_rows_ttl(getRowsTTL().expression_columns, required_ttl_columns);
for (const auto & entry : getRowsWhereTTLs())
add_for_rows_ttl(entry.expression, required_ttl_columns);
add_for_rows_ttl(entry.expression_columns, required_ttl_columns);
for (const auto & entry : getGroupByTTLs())
add_for_rows_ttl(entry.expression, required_ttl_columns);
add_for_rows_ttl(entry.expression_columns, required_ttl_columns);
for (const auto & entry : getRecompressionTTLs())
add_dependent_columns(entry.expression, required_ttl_columns);
add_dependent_columns(entry.expression_columns.getNames(), required_ttl_columns);
for (const auto & [name, entry] : getColumnTTLs())
{
if (add_dependent_columns(entry.expression, required_ttl_columns) && include_ttl_target)
if (add_dependent_columns(entry.expression_columns.getNames(), required_ttl_columns) && include_ttl_target)
updated_ttl_columns.insert(name);
}
for (const auto & entry : getMoveTTLs())
add_dependent_columns(entry.expression, required_ttl_columns);
add_dependent_columns(entry.expression_columns.getNames(), required_ttl_columns);
//TODO what about rows_where_ttl and group_by_ttl ??

View File

@ -756,16 +756,23 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
auto join_tree_type = query_node->getJoinTree()->getNodeType();
auto modified_query = query_node->cloneAndReplace(original_table_expression, replacement_table_expression);
// For the case when join tree is just a table or a table function we don't need to do anything more.
if (join_tree_type == QueryTreeNodeType::TABLE || join_tree_type == QueryTreeNodeType::TABLE_FUNCTION)
return modified_query;
// JOIN needs to be removed because StorageMerge should produce not joined data.
// GROUP BY should be removed as well.
auto * modified_query_node = modified_query->as<QueryNode>();
// Remove the JOIN statement. As a result query will have a form like: SELECT * FROM <table> ...
modified_query = modified_query->cloneAndReplace(modified_query_node->getJoinTree(), replacement_table_expression);
modified_query_node = modified_query->as<QueryNode>();
query_node = modified_query->as<QueryNode>();
// For backward compatibility we need to leave all filters related to this table.
// It may lead to some incorrect result.
if (query_node->hasPrewhere())
replaceFilterExpression(query_node->getPrewhere(), replacement_table_expression, context);
if (query_node->hasWhere())
@ -779,6 +786,9 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
projection.clear();
NamesAndTypes projection_columns;
// Select only required columns from the table, because projection list may contain:
// 1. aggregate functions
// 2. expressions referencing other tables of JOIN
for (auto const & column_name : required_column_names)
{
QueryTreeNodePtr fake_node = std::make_shared<IdentifierNode>(Identifier{column_name});
@ -791,6 +801,8 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
throw Exception(ErrorCodes::LOGICAL_ERROR, "Required column '{}' is not resolved", column_name);
auto fake_column = resolved_column->getColumn();
// Identifier is resolved to ColumnNode, but we need to get rid of ALIAS columns
// and also fix references to source expression (now column is referencing original table expression).
ApplyAliasColumnExpressionsVisitor visitor(replacement_table_expression);
visitor.visit(fake_node);
@ -860,7 +872,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
QueryTreeNodePtr column_node;
// Replace all references to ALIAS columns in the query by expressions.
if (is_alias)
{
QueryTreeNodePtr fake_node = std::make_shared<IdentifierNode>(Identifier{column});

View File

@ -18,6 +18,7 @@
#include <Interpreters/FunctionNameNormalizer.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
namespace DB
@ -110,7 +111,10 @@ using FindAggregateFunctionVisitor = InDepthNodeVisitor<FindAggregateFunctionFin
TTLDescription::TTLDescription(const TTLDescription & other)
: mode(other.mode)
, expression_ast(other.expression_ast ? other.expression_ast->clone() : nullptr)
, expression_columns(other.expression_columns)
, result_column(other.result_column)
, where_expression_ast(other.where_expression_ast ? other.where_expression_ast->clone() : nullptr)
, where_expression_columns(other.where_expression_columns)
, where_result_column(other.where_result_column)
, group_by_keys(other.group_by_keys)
, set_parts(other.set_parts)
@ -120,11 +124,6 @@ TTLDescription::TTLDescription(const TTLDescription & other)
, if_exists(other.if_exists)
, recompression_codec(other.recompression_codec)
{
if (other.expression)
expression = other.expression->clone();
if (other.where_expression)
where_expression = other.where_expression->clone();
}
TTLDescription & TTLDescription::operator=(const TTLDescription & other)
@ -138,17 +137,15 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other)
else
expression_ast.reset();
if (other.expression)
expression = other.expression->clone();
else
expression.reset();
expression_columns = other.expression_columns;
result_column = other.result_column;
if (other.where_expression)
where_expression = other.where_expression->clone();
else
where_expression.reset();
if (other.where_expression_ast)
where_expression_ast = other.where_expression_ast->clone();
else
where_expression_ast.reset();
where_expression_columns = other.where_expression_columns;
where_result_column = other.where_result_column;
group_by_keys = other.group_by_keys;
set_parts = other.set_parts;
@ -165,6 +162,44 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other)
return * this;
}
static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndTypesList & columns, const ContextPtr & context)
{
ExpressionAndSets result;
auto ttl_string = queryToString(ast);
auto syntax_analyzer_result = TreeRewriter(context).analyze(ast, columns);
ExpressionAnalyzer analyzer(ast, syntax_analyzer_result, context);
auto dag = analyzer.getActionsDAG(false);
const auto * col = &dag->findInOutputs(ast->getColumnName());
if (col->result_name != ttl_string)
col = &dag->addAlias(*col, ttl_string);
dag->getOutputs() = {col};
dag->removeUnusedActions();
result.expression = std::make_shared<ExpressionActions>(dag, ExpressionActionsSettings::fromContext(context));
result.sets = analyzer.getPreparedSets();
return result;
}
ExpressionAndSets TTLDescription::buildExpression(const ContextPtr & context) const
{
auto ast = expression_ast->clone();
return buildExpressionAndSets(ast, expression_columns, context);
}
ExpressionAndSets TTLDescription::buildWhereExpression(const ContextPtr & context) const
{
if (where_expression_ast)
{
auto ast = where_expression_ast->clone();
return buildExpressionAndSets(ast, where_expression_columns, context);
}
return {};
}
TTLDescription TTLDescription::getTTLFromAST(
const ASTPtr & definition_ast,
const ColumnsDescription & columns,
@ -182,9 +217,12 @@ TTLDescription TTLDescription::getTTLFromAST(
result.expression_ast = definition_ast->clone();
auto ttl_ast = result.expression_ast->clone();
auto syntax_analyzer_result = TreeRewriter(context).analyze(ttl_ast, columns.getAllPhysical());
result.expression = ExpressionAnalyzer(ttl_ast, syntax_analyzer_result, context).getActions(false);
result.result_column = ttl_ast->getColumnName();
auto expression = buildExpressionAndSets(ttl_ast, columns.getAllPhysical(), context).expression;
result.expression_columns = expression->getRequiredColumnsWithTypes();
result.result_column = expression->getSampleBlock().safeGetByPosition(0).name;
ExpressionActionsPtr where_expression;
if (ttl_element == nullptr) /// columns TTL
{
@ -202,9 +240,10 @@ TTLDescription TTLDescription::getTTLFromAST(
{
if (ASTPtr where_expr_ast = ttl_element->where())
{
auto where_syntax_result = TreeRewriter(context).analyze(where_expr_ast, columns.getAllPhysical());
result.where_expression = ExpressionAnalyzer(where_expr_ast, where_syntax_result, context).getActions(false);
result.where_result_column = where_expr_ast->getColumnName();
result.where_expression_ast = where_expr_ast->clone();
where_expression = buildExpressionAndSets(where_expr_ast, columns.getAllPhysical(), context).expression;
result.where_expression_columns = where_expression->getRequiredColumnsWithTypes();
result.where_result_column = where_expression->getSampleBlock().safeGetByPosition(0).name;
}
}
else if (ttl_element->mode == TTLMode::GROUP_BY)
@ -229,17 +268,17 @@ TTLDescription TTLDescription::getTTLFromAST(
for (const auto & ast : ttl_element->group_by_assignments)
{
const auto assignment = ast->as<const ASTAssignment &>();
auto expression = assignment.expression();
auto ass_expression = assignment.expression();
FindAggregateFunctionVisitor::Data data{false};
FindAggregateFunctionVisitor(data).visit(expression);
FindAggregateFunctionVisitor(data).visit(ass_expression);
if (!data.has_aggregate_function)
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
"Invalid expression for assignment of column {}. Should contain an aggregate function", assignment.column_name);
expression = addTypeConversionToAST(std::move(expression), columns.getPhysical(assignment.column_name).type->getName());
aggregations.emplace_back(assignment.column_name, std::move(expression));
ass_expression = addTypeConversionToAST(std::move(ass_expression), columns.getPhysical(assignment.column_name).type->getName());
aggregations.emplace_back(assignment.column_name, std::move(ass_expression));
aggregation_columns_set.insert(assignment.column_name);
}
@ -297,7 +336,7 @@ TTLDescription TTLDescription::getTTLFromAST(
}
}
checkTTLExpression(result.expression, result.result_column, is_attach || context->getSettingsRef().allow_suspicious_ttl_expressions);
checkTTLExpression(expression, result.result_column, is_attach || context->getSettingsRef().allow_suspicious_ttl_expressions);
return result;
}
@ -350,7 +389,7 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key, is_attach);
if (ttl.mode == TTLMode::DELETE)
{
if (!ttl.where_expression)
if (!ttl.where_expression_ast)
{
if (have_unconditional_delete_ttl)
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "More than one DELETE TTL expression without WHERE expression is not allowed");

View File

@ -35,6 +35,15 @@ struct TTLAggregateDescription
using TTLAggregateDescriptions = std::vector<TTLAggregateDescription>;
class PreparedSets;
using PreparedSetsPtr = std::shared_ptr<PreparedSets>;
struct ExpressionAndSets
{
ExpressionActionsPtr expression;
PreparedSetsPtr sets;
};
/// Common struct for TTL record in storage
struct TTLDescription
{
@ -44,9 +53,10 @@ struct TTLDescription
/// TTL d + INTERVAL 1 DAY
/// ^~~~~~~~~~~~~~~~~~~^
ASTPtr expression_ast;
NamesAndTypesList expression_columns;
/// Expression actions evaluated from AST
ExpressionActionsPtr expression;
ExpressionAndSets buildExpression(const ContextPtr & context) const;
/// Result column of this TTL expression
String result_column;
@ -54,7 +64,9 @@ struct TTLDescription
/// WHERE part in TTL expression
/// TTL ... WHERE x % 10 == 0 and y > 5
/// ^~~~~~~~~~~~~~~~~~~~~~^
ExpressionActionsPtr where_expression;
ASTPtr where_expression_ast;
NamesAndTypesList where_expression_columns;
ExpressionAndSets buildWhereExpression(const ContextPtr & context) const;
/// Name of result column from WHERE expression
String where_result_column;

View File

@ -262,7 +262,7 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex
auto client = StorageAzureBlob::createClient(configuration, !is_insert_query);
auto settings = StorageAzureBlob::createSettings(context);
auto object_storage = std::make_unique<AzureObjectStorage>("AzureBlobStorageTableFunction", std::move(client), std::move(settings));
auto object_storage = std::make_unique<AzureObjectStorage>("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container);
return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false);
}
@ -293,7 +293,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct
StoragePtr storage = std::make_shared<StorageAzureBlob>(
configuration,
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
context,
StorageID(getDatabaseName(), table_name),
columns,

View File

@ -40,7 +40,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
/// On worker node this filename won't contains globs
storage = std::make_shared<StorageAzureBlob>(
configuration,
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
context,
StorageID(getDatabaseName(), table_name),
columns,
@ -55,7 +55,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
storage = std::make_shared<StorageAzureBlobCluster>(
cluster_name,
configuration,
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
StorageID(getDatabaseName(), table_name),
columns,
ConstraintsDescription{},

View File

@ -1,5 +1,8 @@
import argparse
import concurrent.futures
from copy import deepcopy
from dataclasses import asdict, dataclass
from enum import Enum
import json
import logging
import os
@ -7,16 +10,14 @@ import re
import subprocess
import sys
import time
from dataclasses import asdict, dataclass
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Union
import docker_images_helper
import upload_result_helper
from build_check import get_release_or_pr
from ci_config import CI_CONFIG, Build, JobNames, Labels
from ci_utils import GHActions, is_hex
from ci_config import CI_CONFIG, Build, Labels, JobNames
from ci_utils import GHActions, is_hex, normalize_string
from clickhouse_helper import (
CiLogsCredentials,
ClickHouseHelper,
@ -48,7 +49,7 @@ from git_helper import GIT_PREFIX, Git
from git_helper import Runner as GitRunner
from github import Github
from pr_info import PRInfo
from report import SUCCESS, BuildResult, JobReport
from report import ERROR, SUCCESS, BuildResult, JobReport
from s3_helper import S3Helper
from version_helper import get_version_from_repo
@ -88,6 +89,7 @@ class CiCache:
class RecordType(Enum):
SUCCESSFUL = "successful"
PENDING = "pending"
FAILED = "failed"
@dataclass
class Record:
@ -249,6 +251,13 @@ class CiCache:
)
return record
def print_status(self):
for record_type in self.RecordType:
GHActions.print_in_group(
f"Cache records: [{record_type}]", list(self.records[record_type])
)
return self
def update(self):
"""
Pulls cache records from s3. Only records name w/o content.
@ -260,9 +269,6 @@ class CiCache:
path = self.cache_s3_paths[job_type]
records = self.s3.list_prefix(f"{path}{prefix}", S3_BUILDS_BUCKET)
records = [record.split("/")[-1] for record in records]
GHActions.print_in_group(
f"Cache records: [{record_type}] in [{job_type.value}]", records
)
for file in records:
record = self._parse_record_file_name(
record_type=record_type, file_name=file
@ -384,6 +390,9 @@ class CiCache:
if record_type == self.RecordType.SUCCESSFUL:
assert isinstance(status, CommitStatusData)
status.dump_to_file(record_file)
elif record_type == self.RecordType.FAILED:
assert isinstance(status, CommitStatusData)
status.dump_to_file(record_file)
elif record_type == self.RecordType.PENDING:
assert isinstance(status, PendingState)
with open(record_file, "w") as json_file:
@ -488,6 +497,16 @@ class CiCache:
self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch
)
def is_failed(
self, job: str, batch: int, num_batches: int, release_branch: bool
) -> bool:
"""
checks if a given job have already been done with failure
"""
return self.exist(
self.RecordType.FAILED, job, batch, num_batches, release_branch
)
def is_pending(
self, job: str, batch: int, num_batches: int, release_branch: bool
) -> bool:
@ -495,8 +514,9 @@ class CiCache:
check pending record in the cache for a given job
@release_branch - checks that "release" attribute is set for a record
"""
if self.is_successful(job, batch, num_batches, release_branch):
# successful record is present - not pending
if self.is_successful(
job, batch, num_batches, release_branch
) or self.is_failed(job, batch, num_batches, release_branch):
return False
return self.exist(
@ -524,6 +544,27 @@ class CiCache:
release_branch,
)
def push_failed(
self,
job: str,
batch: int,
num_batches: int,
job_status: CommitStatusData,
release_branch: bool = False,
) -> None:
"""
Pushes a cache record of type Failed (CommitStatusData)
@release_branch adds "release" attribute to a record
"""
self.push(
self.RecordType.FAILED,
job,
[batch],
num_batches,
job_status,
release_branch,
)
def push_pending(
self, job: str, batches: List[int], num_batches: int, release_branch: bool
) -> None:
@ -591,46 +632,85 @@ class CiCache:
bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path
)
# def await_jobs(self, jobs_with_params: Dict[str, Dict[str, Any]]) -> List[str]:
# if not jobs_with_params:
# return []
# print(f"Start awaiting jobs [{list(jobs_with_params)}]")
# poll_interval_sec = 180
# start_at = int(time.time())
# TIMEOUT = 3000
# expired_sec = 0
# done_jobs = [] # type: List[str]
# while expired_sec < TIMEOUT and jobs_with_params:
# time.sleep(poll_interval_sec)
# self.update()
# pending_finished: List[str] = []
# for job_name in jobs_with_params:
# num_batches = jobs_with_params[job_name]["num_batches"]
# for batch in jobs_with_params[job_name]["batches"]:
# if self.is_pending(job_name, batch, num_batches):
# continue
# print(
# f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore"
# )
# pending_finished.append(job_name)
# if pending_finished:
# # restart timer
# start_at = int(time.time())
# expired_sec = 0
# # remove finished jobs from awaiting list
# for job in pending_finished:
# del jobs_with_params[job]
# done_jobs.append(job)
# else:
# expired_sec = int(time.time()) - start_at
# print(f" ...awaiting continues... time left [{TIMEOUT - expired_sec}]")
# if done_jobs:
# print(
# f"Awaiting OK. Left jobs: [{list(jobs_with_params)}], finished jobs: [{done_jobs}]"
# )
# else:
# print("Awaiting FAILED. No job has finished.")
# return done_jobs
def await_jobs(
self, jobs_with_params: Dict[str, Dict[str, Any]], is_release_branch: bool
) -> Dict[str, List[int]]:
"""
await pending jobs to be finished
@jobs_with_params - jobs to await. {JOB_NAME: {"batches": [BATCHES...], "num_batches": NUM_BATCHES}}
returns successfully finished jobs: {JOB_NAME: [BATCHES...]}
"""
if not jobs_with_params:
return {}
poll_interval_sec = 300
TIMEOUT = 3600
await_finished: Dict[str, List[int]] = {}
round_cnt = 0
while len(jobs_with_params) > 4 and round_cnt < 5:
round_cnt += 1
GHActions.print_in_group(
f"Wait pending jobs, round [{round_cnt}]:", list(jobs_with_params)
)
# this is initial approach to wait pending jobs:
# start waiting for the next TIMEOUT seconds if there are more than X(=4) jobs to wait
# wait TIMEOUT seconds in rounds. Y(=5) is the max number of rounds
expired_sec = 0
start_at = int(time.time())
while expired_sec < TIMEOUT and jobs_with_params:
time.sleep(poll_interval_sec)
self.update()
jobs_with_params_copy = deepcopy(jobs_with_params)
for job_name in jobs_with_params:
num_batches = jobs_with_params[job_name]["num_batches"]
job_config = CI_CONFIG.get_job_config(job_name)
for batch in jobs_with_params[job_name]["batches"]:
if self.is_pending(
job_name,
batch,
num_batches,
release_branch=is_release_branch
and job_config.required_on_release_branch,
):
continue
print(
f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore"
)
# some_job_ready = True
jobs_with_params_copy[job_name]["batches"].remove(batch)
if not jobs_with_params_copy[job_name]["batches"]:
del jobs_with_params_copy[job_name]
if not self.is_successful(
job_name,
batch,
num_batches,
release_branch=is_release_branch
and job_config.required_on_release_branch,
):
print(
f"NOTE: Job [{job_name}:{batch}] finished but no success - remove from awaiting list, do not add to ready"
)
continue
if job_name in await_finished:
await_finished[job_name].append(batch)
else:
await_finished[job_name] = [batch]
jobs_with_params = jobs_with_params_copy
expired_sec = int(time.time()) - start_at
print(
f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]"
)
if await_finished:
GHActions.print_in_group(
f"Finished jobs, round [{round_cnt}]:",
[f"{job}:{batches}" for job, batches in await_finished.items()],
)
GHActions.print_in_group(
"Remaining jobs:",
[f"{job}:{params['batches']}" for job, params in jobs_with_params.items()],
)
return await_finished
def get_check_name(check_name: str, batch: int, num_batches: int) -> str:
@ -734,6 +814,12 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
default=False,
help="skip fetching data about job runs, used in --configure action (for debugging and nigthly ci)",
)
parser.add_argument(
"--force",
action="store_true",
default=False,
help="Used with --run, force the job to run, omitting the ci cache",
)
# FIXME: remove, not used
parser.add_argument(
"--rebuild-all-binaries",
@ -832,7 +918,10 @@ def _pre_action(s3, indata, pr_info):
ci_cache = CiCache(s3, indata["jobs_data"]["digests"])
# for release/master branches reports must be from the same branches
report_prefix = pr_info.head_ref if pr_info.number == 0 else ""
report_prefix = normalize_string(pr_info.head_ref) if pr_info.number == 0 else ""
print(
f"Use report prefix [{report_prefix}], pr_num [{pr_info.number}], head_ref [{pr_info.head_ref}]"
)
reports_files = ci_cache.download_build_reports(file_prefix=report_prefix)
print(f"Pre action done. Report files [{reports_files}] have been downloaded")
@ -883,8 +972,19 @@ def _mark_success_action(
job, batch, num_batches, job_status, pr_info.is_release_branch()
)
print(f"Job [{job}] is ok")
elif job_status:
print(f"Job [{job}] is not ok, status [{job_status.status}]")
elif job_status and not job_status.is_ok():
ci_cache.push_failed(
job, batch, num_batches, job_status, pr_info.is_release_branch()
)
print(f"Job [{job}] is failed with status [{job_status.status}]")
else:
job_status = CommitStatusData(
description="dummy description", status=ERROR, report_url="dummy url"
)
ci_cache.push_failed(
job, batch, num_batches, job_status, pr_info.is_release_branch()
)
print(f"No CommitStatusData for [{job}], push dummy failure to ci_cache")
def _print_results(result: Any, outfile: Optional[str], pretty: bool = False) -> None:
@ -992,8 +1092,8 @@ def _configure_jobs(
jobs_to_do: List[str] = []
jobs_to_skip: List[str] = []
digests: Dict[str, str] = {}
print("::group::Job Digests")
print("::group::Job Digests")
for job in CI_CONFIG.job_generator():
digest = job_digester.get_job_digest(CI_CONFIG.get_digest_config(job))
digests[job] = digest
@ -1003,7 +1103,8 @@ def _configure_jobs(
## b. check what we need to run
ci_cache = None
if not ci_cache_disabled:
ci_cache = CiCache(s3, digests)
ci_cache = CiCache(s3, digests).update()
ci_cache.print_status()
jobs_to_wait: Dict[str, Dict[str, Any]] = {}
@ -1012,10 +1113,13 @@ def _configure_jobs(
job_config = CI_CONFIG.get_job_config(job)
num_batches: int = job_config.num_batches
batches_to_do: List[int] = []
add_to_skip = False
for batch in range(num_batches): # type: ignore
if job_config.pr_only and pr_info.is_release_branch():
continue
if job_config.release_only and not pr_info.is_release_branch():
continue
if job_config.run_by_label:
# this job controlled by label, add to todo if its label is set in pr
if job_config.run_by_label in pr_info.labels:
@ -1036,7 +1140,13 @@ def _configure_jobs(
batches_to_do.append(batch)
# check if it's pending in the cache
if ci_cache.is_pending(job, batch, num_batches, release_branch=False):
if ci_cache.is_pending(
job,
batch,
num_batches,
release_branch=pr_info.is_release_branch()
and job_config.required_on_release_branch,
):
if job in jobs_to_wait:
jobs_to_wait[job]["batches"].append(batch)
else:
@ -1044,10 +1154,12 @@ def _configure_jobs(
"batches": [batch],
"num_batches": num_batches,
}
else:
add_to_skip = True
if batches_to_do:
jobs_to_do.append(job)
elif not job_config.run_by_label:
elif add_to_skip:
# treat job as being skipped only if it's controlled by digest
jobs_to_skip.append(job)
jobs_params[job] = {
@ -1119,49 +1231,64 @@ def _configure_jobs(
"digests": digests,
"jobs_to_do": jobs_to_do,
"jobs_to_skip": jobs_to_skip,
"jobs_to_wait": jobs_to_wait,
"jobs_to_wait": {
job: params for job, params in jobs_to_wait.items() if job in jobs_to_do
},
"jobs_params": {
job: params for job, params in jobs_params.items() if job in jobs_to_do
},
}
def _create_gh_status(
commit: Any, job: str, batch: int, num_batches: int, job_status: CommitStatusData
) -> None:
print(f"Going to re-create GH status for job [{job}]")
assert job_status.status == SUCCESS, "BUG!"
commit.create_status(
state=job_status.status,
target_url=job_status.report_url,
description=format_description(
f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: "
f"{job_status.description}"
),
context=get_check_name(job, batch=batch, num_batches=num_batches),
)
def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None:
if indata["ci_flags"][Labels.NO_CI_CACHE]:
print("CI cache is disabled - skip restoring commit statuses from CI cache")
return
job_digests = indata["jobs_data"]["digests"]
ci_cache = CiCache(s3, job_digests).update().fetch_records_data()
jobs_to_skip = indata["jobs_data"]["jobs_to_skip"]
jobs_to_do = indata["jobs_data"]["jobs_to_do"]
ci_cache = CiCache(s3, job_digests).update().fetch_records_data().print_status()
# create GH status
pr_info = PRInfo()
commit = get_commit(Github(get_best_robot_token(), per_page=100), pr_info.sha)
def _run_create_status(job: str, batch: int, num_batches: int) -> None:
def _concurrent_create_status(job: str, batch: int, num_batches: int) -> None:
job_status = ci_cache.get_successful(job, batch, num_batches)
if not job_status:
return
print(f"Going to re-create GH status for job [{job}] sha [{pr_info.sha}]")
assert job_status.status == SUCCESS, "BUG!"
commit.create_status(
state=job_status.status,
target_url=job_status.report_url,
description=format_description(
f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: "
f"{job_status.description}"
),
context=get_check_name(job, batch=batch, num_batches=num_batches),
)
_create_gh_status(commit, job, batch, num_batches, job_status)
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for job in job_digests:
if job not in jobs_to_skip and job not in jobs_to_do:
# no need to create status for job that are not supposed to be executed
continue
if CI_CONFIG.is_build_job(job):
# no GH status for build jobs
continue
num_batches = CI_CONFIG.get_job_config(job).num_batches
for batch in range(num_batches):
future = executor.submit(_run_create_status, job, batch, num_batches)
future = executor.submit(
_concurrent_create_status, job, batch, num_batches
)
futures.append(future)
done, _ = concurrent.futures.wait(futures)
for future in done:
@ -1194,7 +1321,7 @@ def _upload_build_artifacts(
(
get_release_or_pr(pr_info, get_version_from_repo())[1],
pr_info.sha,
CI_CONFIG.normalize_string(build_name),
normalize_string(build_name),
"performance.tar.zst",
)
)
@ -1509,30 +1636,51 @@ def main() -> int:
if not args.skip_jobs and pr_info.has_changes_in_documentation_only():
_update_config_for_docs_only(jobs_data)
# TODO: await pending jobs
# wait for pending jobs to be finished, await_jobs is a long blocking call if any job has to be awaited
# awaited_jobs = ci_cache.await_jobs(jobs_data.get("jobs_to_wait", {}))
# for job in awaited_jobs:
# jobs_to_do = jobs_data["jobs_to_do"]
# if job in jobs_to_do:
# jobs_to_do.remove(job)
# else:
# assert False, "BUG"
# set planned jobs as pending in the CI cache if on the master
if pr_info.is_master() and not args.skip_jobs:
if not args.skip_jobs:
ci_cache = CiCache(s3, jobs_data["digests"])
for job in jobs_data["jobs_to_do"]:
config = CI_CONFIG.get_job_config(job)
if config.run_always or config.run_by_label:
continue
job_params = jobs_data["jobs_params"][job]
ci_cache.push_pending(
job,
job_params["batches"],
config.num_batches,
release_branch=pr_info.is_release_branch(),
if (
pr_info.is_release_branch()
or pr_info.event.get("pull_request", {})
.get("user", {})
.get("login", "not_maxknv")
== "maxknv"
):
# wait for pending jobs to be finished, await_jobs is a long blocking call
# wait pending jobs (for now only on release/master branches)
ready_jobs_batches_dict = ci_cache.await_jobs(
jobs_data.get("jobs_to_wait", {}), pr_info.is_release_branch()
)
jobs_to_do = jobs_data["jobs_to_do"]
jobs_to_skip = jobs_data["jobs_to_skip"]
jobs_params = jobs_data["jobs_params"]
for job, batches in ready_jobs_batches_dict.items():
if job not in jobs_params:
print(f"WARNING: Job [{job}] is not in the params list")
continue
for batch in batches:
jobs_params[job]["batches"].remove(batch)
if not jobs_params[job]["batches"]:
jobs_to_do.remove(job)
jobs_to_skip.append(job)
del jobs_params[job]
# set planned jobs as pending in the CI cache if on the master
if pr_info.is_master():
for job in jobs_data["jobs_to_do"]:
config = CI_CONFIG.get_job_config(job)
if config.run_always or config.run_by_label:
continue
job_params = jobs_data["jobs_params"][job]
ci_cache.push_pending(
job,
job_params["batches"],
config.num_batches,
release_branch=pr_info.is_release_branch(),
)
if "jobs_to_wait" in jobs_data:
del jobs_data["jobs_to_wait"]
# conclude results
result["git_ref"] = git_ref
@ -1608,25 +1756,17 @@ def main() -> int:
check_name, args.batch, job_config.num_batches
)
assert job_status, "BUG"
commit.create_status(
state=job_status.status,
target_url=job_status.report_url,
description=format_description(
f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: "
f"{job_status.description}"
),
context=get_check_name(
check_name,
batch=args.batch,
num_batches=job_config.num_batches,
),
_create_gh_status(
commit,
check_name,
args.batch,
job_config.num_batches,
job_status,
)
previous_status = job_status.status
print("::group::Commit Status Data")
print(job_status)
print("::endgroup::")
GHActions.print_in_group("Commit Status Data", job_status)
if previous_status:
if previous_status and not args.force:
print(
f"Commit status or Build Report is already present - job will be skipped with status: [{previous_status}]"
)
@ -1648,7 +1788,7 @@ def main() -> int:
if CI_CONFIG.is_build_job(args.job_name):
assert (
indata
), "--infile with config must be provided for POST action of a build type job [{args.job_name}]"
), f"--infile with config must be provided for POST action of a build type job [{args.job_name}]"
build_name = args.job_name
s3_path_prefix = "/".join(
(
@ -1676,7 +1816,7 @@ def main() -> int:
(
get_release_or_pr(pr_info, get_version_from_repo())[0],
pr_info.sha,
CI_CONFIG.normalize_string(
normalize_string(
job_report.check_name or _get_ext_check_name(args.job_name)
),
)

View File

@ -22,6 +22,8 @@ class Labels(metaclass=WithIter):
CI_SET_ARM = "ci_set_arm"
CI_SET_INTEGRATION = "ci_set_integration"
libFuzzer = "libFuzzer"
class Build(metaclass=WithIter):
PACKAGE_RELEASE = "package_release"
@ -138,8 +140,6 @@ class JobNames(metaclass=WithIter):
DOCS_CHECK = "Docs check"
BUGFIX_VALIDATE = "tests bugfix validate check"
MARK_RELEASE_READY = "Mark Commit Release Ready"
# dynamically update JobName with Build jobs
for attr_name in dir(Build):
@ -193,6 +193,8 @@ class JobConfig:
required_on_release_branch: bool = False
# job is for pr workflow only
pr_only: bool = False
# job is for release/master branches only
release_only: bool = False
@dataclass
@ -790,6 +792,7 @@ CI_CONFIG = CiConfig(
name=Build.FUZZERS,
compiler="clang-17",
package_type="fuzzers",
job_config=JobConfig(run_by_label=Labels.libFuzzer),
),
},
builds_report_config={
@ -823,9 +826,6 @@ CI_CONFIG = CiConfig(
),
},
other_jobs_configs={
JobNames.MARK_RELEASE_READY: TestConfig(
"", job_config=JobConfig(required_on_release_branch=True)
),
JobNames.DOCKER_SERVER: TestConfig(
"",
job_config=JobConfig(
@ -909,13 +909,6 @@ CI_CONFIG = CiConfig(
JobNames.STATEFUL_TEST_AARCH64: TestConfig(
Build.PACKAGE_AARCH64, job_config=JobConfig(**stateful_test_common_params) # type: ignore
),
# FIXME: delete?
# "Stateful tests (release, DatabaseOrdinary)": TestConfig(
# Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore
# ),
# "Stateful tests (release, DatabaseReplicated)": TestConfig(
# Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore
# ),
# Stateful tests for parallel replicas
JobNames.STATEFUL_TEST_PARALLEL_REPL_RELEASE: TestConfig(
Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore
@ -997,16 +990,16 @@ CI_CONFIG = CiConfig(
Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore
),
JobNames.UPGRADE_TEST_ASAN: TestConfig(
Build.PACKAGE_ASAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore
Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore
),
JobNames.UPGRADE_TEST_TSAN: TestConfig(
Build.PACKAGE_TSAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore
Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore
),
JobNames.UPGRADE_TEST_MSAN: TestConfig(
Build.PACKAGE_MSAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore
Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore
),
JobNames.UPGRADE_TEST_DEBUG: TestConfig(
Build.PACKAGE_DEBUG, job_config=JobConfig(**upgrade_test_common_params) # type: ignore
Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore
),
JobNames.INTEGRATION_TEST_ASAN: TestConfig(
Build.PACKAGE_ASAN,
@ -1033,7 +1026,7 @@ CI_CONFIG = CiConfig(
job_config=JobConfig(num_batches=4, **integration_test_common_params), # type: ignore
),
JobNames.INTEGRATION_TEST_FLAKY: TestConfig(
Build.PACKAGE_ASAN, job_config=JobConfig(**integration_test_common_params) # type: ignore
Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **integration_test_common_params) # type: ignore
),
JobNames.COMPATIBILITY_TEST: TestConfig(
Build.PACKAGE_RELEASE,
@ -1080,7 +1073,7 @@ CI_CONFIG = CiConfig(
JobNames.STATELESS_TEST_FLAKY_ASAN: TestConfig(
# replace to non-default
Build.PACKAGE_ASAN,
job_config=JobConfig(**{**statless_test_common_params, "timeout": 3600}), # type: ignore
job_config=JobConfig(pr_only=True, **{**statless_test_common_params, "timeout": 3600}), # type: ignore
),
JobNames.JEPSEN_KEEPER: TestConfig(
Build.BINARY_RELEASE,
@ -1116,7 +1109,7 @@ CI_CONFIG = CiConfig(
),
JobNames.CLCIKBENCH_TEST: TestConfig(Build.PACKAGE_RELEASE),
JobNames.CLCIKBENCH_TEST_ARM: TestConfig(Build.PACKAGE_AARCH64),
JobNames.LIBFUZZER_TEST: TestConfig(Build.FUZZERS), # type: ignore
JobNames.LIBFUZZER_TEST: TestConfig(Build.FUZZERS, job_config=JobConfig(run_by_label=Labels.libFuzzer)), # type: ignore
},
)
CI_CONFIG.validate()

View File

@ -1,6 +1,6 @@
from contextlib import contextmanager
import os
from typing import List, Union, Iterator
from typing import Any, List, Union, Iterator
from pathlib import Path
@ -27,9 +27,22 @@ def is_hex(s):
return False
def normalize_string(string: str) -> str:
lowercase_string = string.lower()
normalized_string = (
lowercase_string.replace(" ", "_")
.replace("-", "_")
.replace("/", "_")
.replace("(", "")
.replace(")", "")
.replace(",", "")
)
return normalized_string
class GHActions:
@staticmethod
def print_in_group(group_name: str, lines: Union[str, List[str]]) -> None:
def print_in_group(group_name: str, lines: Union[Any, List[Any]]) -> None:
lines = list(lines)
print(f"::group::{group_name}")
for line in lines:

View File

@ -370,6 +370,9 @@ class CommitStatusData:
def is_ok(self):
return self.status == SUCCESS
def is_failure(self):
return self.status == FAILURE
@staticmethod
def cleanup():
STATUS_FILE_PATH.unlink(missing_ok=True)

View File

@ -44,6 +44,7 @@ TRUSTED_CONTRIBUTORS = {
"kitaisreal",
"k-morozov", # Konstantin Morozov, Yandex Cloud
"justindeguzman", # ClickHouse, Inc
"jrdi", # ClickHouse contributor, TinyBird
]
}

Some files were not shown because too many files have changed in this diff Show More