Merge remote-tracking branch 'rschu1ze/master' into countMatches-fuzzed

This commit is contained in:
Robert Schulze 2024-02-10 13:22:30 +00:00
commit 4e2d0e8bf6
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
238 changed files with 4181 additions and 3140 deletions

View File

@ -318,15 +318,19 @@ jobs:
run_command: |
python3 build_report_check.py "$CHECK_NAME"
MarkReleaseReady:
needs: [RunConfig, BuilderBinDarwin, BuilderBinDarwinAarch64, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Mark Commit Release Ready
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 mark_release_ready.py
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
- BuilderDebRelease
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Mark Commit Release Ready
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 mark_release_ready.py
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################
@ -385,6 +389,22 @@ jobs:
test_name: Stateless tests (release, s3 storage)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestS3Debug:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (debug, s3 storage)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestS3Tsan:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (tsan, s3 storage)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestAarch64:
needs: [RunConfig, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
@ -493,6 +513,55 @@ jobs:
test_name: Stateful tests (debug)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
# Parallel replicas
FunctionalStatefulTestDebugParallelReplicas:
needs: [RunConfig, BuilderDebDebug]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (debug, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestUBsanParallelReplicas:
needs: [RunConfig, BuilderDebUBsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (ubsan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestMsanParallelReplicas:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (msan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestTsanParallelReplicas:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (tsan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestAsanParallelReplicas:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (asan, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatefulTestReleaseParallelReplicas:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateful tests (release, ParallelReplicas)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
########################### ClickBench #######################################################
##############################################################################################
@ -700,6 +769,28 @@ jobs:
runner_type: func-tester-aarch64
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
############################ SQLLOGIC TEST ###################################################
##############################################################################################
SQLLogicTestRelease:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Sqllogic test (release)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
##################################### SQL TEST ###############################################
##############################################################################################
SQLTest:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: SQLTest
runner_type: fuzzer-unit-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
###################################### SQLANCER FUZZERS ######################################
##############################################################################################
SQLancerTestRelease:
@ -732,6 +823,8 @@ jobs:
- FunctionalStatelessTestTsan
- FunctionalStatelessTestMsan
- FunctionalStatelessTestUBsan
- FunctionalStatelessTestS3Debug
- FunctionalStatelessTestS3Tsan
- FunctionalStatefulTestDebug
- FunctionalStatefulTestRelease
- FunctionalStatefulTestAarch64
@ -739,6 +832,12 @@ jobs:
- FunctionalStatefulTestTsan
- FunctionalStatefulTestMsan
- FunctionalStatefulTestUBsan
- FunctionalStatefulTestDebugParallelReplicas
- FunctionalStatefulTestUBsanParallelReplicas
- FunctionalStatefulTestMsanParallelReplicas
- FunctionalStatefulTestTsanParallelReplicas
- FunctionalStatefulTestAsanParallelReplicas
- FunctionalStatefulTestReleaseParallelReplicas
- StressTestDebug
- StressTestAsan
- StressTestTsan
@ -764,6 +863,8 @@ jobs:
- UnitTestsReleaseClang
- SQLancerTestRelease
- SQLancerTestDebug
- SQLLogicTestRelease
- SQLTest
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code

View File

@ -1002,7 +1002,7 @@ jobs:
####################################### libFuzzer ###########################################
#############################################################################################
libFuzzer:
if: ${{ !failure() && !cancelled() && contains(github.event.pull_request.labels.*.name, 'libFuzzer') }}
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, StyleCheck]
uses: ./.github/workflows/libfuzzer.yml
with:

View File

@ -91,6 +91,8 @@ jobs:
build_name: package_release
checkout_depth: 0
data: ${{ needs.RunConfig.outputs.data }}
# always rebuild on release branches to be able to publish from any commit
force: true
BuilderDebAarch64:
needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }}
@ -99,6 +101,8 @@ jobs:
build_name: package_aarch64
checkout_depth: 0
data: ${{ needs.RunConfig.outputs.data }}
# always rebuild on release branches to be able to publish from any commit
force: true
BuilderDebAsan:
needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }}
@ -142,6 +146,8 @@ jobs:
build_name: binary_darwin
checkout_depth: 0
data: ${{ needs.RunConfig.outputs.data }}
# always rebuild on release branches to be able to publish from any commit
force: true
BuilderBinDarwinAarch64:
needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }}
@ -150,6 +156,8 @@ jobs:
build_name: binary_darwin_aarch64
checkout_depth: 0
data: ${{ needs.RunConfig.outputs.data }}
# always rebuild on release branches to be able to publish from any commit
force: true
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -206,13 +214,8 @@ jobs:
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderDebRelease
- BuilderDebAarch64
- BuilderDebAsan
- BuilderDebTsan
- BuilderDebUBsan
- BuilderDebMsan
- BuilderDebDebug
- BuilderBinDarwin
- BuilderBinDarwinAarch64
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
@ -225,7 +228,6 @@ jobs:
run_command: |
python3 build_report_check.py "$CHECK_NAME"
MarkReleaseReady:
if: ${{ !failure() && !cancelled() }}
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
@ -235,8 +237,6 @@ jobs:
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Mark Commit Release Ready
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -26,6 +26,10 @@ name: Build ClickHouse
description: json ci data
type: string
required: true
force:
description: disallow job skipping
type: boolean
default: false
additional_envs:
description: additional ENV variables to setup the job
type: string
@ -33,7 +37,7 @@ name: Build ClickHouse
jobs:
Build:
name: Build-${{inputs.build_name}}
if: contains(fromJson(inputs.data).jobs_data.jobs_to_do, inputs.build_name)
if: ${{ contains(fromJson(inputs.data).jobs_data.jobs_to_do, inputs.build_name) || inputs.force }}
env:
GITHUB_JOB_OVERRIDDEN: Build-${{inputs.build_name}}
runs-on: [self-hosted, '${{inputs.runner_type}}']
@ -78,13 +82,15 @@ jobs:
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" \
--infile ${{ toJson(inputs.data) }} \
--job-name "$BUILD_NAME" \
--run
--run \
${{ inputs.force && '--force' || '' }}
- name: Post
# it still be build report to upload for failed build job
if: ${{ !cancelled() }}
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.build_name}}'
- name: Mark as done
if: ${{ !cancelled() }}
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --mark-success --job-name '${{inputs.build_name}}'
- name: Clean

View File

@ -107,6 +107,7 @@ jobs:
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.test_name}}'
- name: Mark as done
if: ${{ !cancelled() }}
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --mark-success --job-name '${{inputs.test_name}}' --batch ${{matrix.batch}}
- name: Clean

View File

@ -254,10 +254,17 @@ endif()
include(cmake/cpu_features.cmake)
# Asynchronous unwind tables are needed for Query Profiler.
# They are already by default on some platforms but possibly not on all platforms.
# Enable it explicitly.
set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables")
# Query Profiler doesn't work on MacOS for several reasons
# - PHDR cache is not available
# - We use native functionality to get stacktraces which is not async signal safe
# and thus we don't need to generate asynchronous unwind tables
if (NOT OS_DARWIN)
# Asynchronous unwind tables are needed for Query Profiler.
# They are already by default on some platforms but possibly not on all platforms.
# Enable it explicitly.
set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables")
endif()
# Reproducible builds.
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")

View File

@ -22,6 +22,7 @@
#include <cstddef>
#include <map>
#include <vector>
#include "Poco/Channel.h"
#include "Poco/Format.h"
#include "Poco/Foundation.h"
@ -871,21 +872,11 @@ public:
/// If the Logger does not yet exist, it is created, based
/// on its parent logger.
static LoggerPtr getShared(const std::string & name);
static LoggerPtr getShared(const std::string & name, bool should_be_owned_by_shared_ptr_if_created = true);
/// Returns a shared pointer to the Logger with the given name.
/// If the Logger does not yet exist, it is created, based
/// on its parent logger.
static Logger & unsafeGet(const std::string & name);
/// Returns a reference to the Logger with the given name.
/// If the Logger does not yet exist, it is created, based
/// on its parent logger.
///
/// WARNING: This method is not thread safe. You should
/// probably use get() instead.
/// The only time this method should be used is during
/// program initialization, when only one thread is running.
static Logger & create(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
/// Creates and returns a reference to a Logger with the
/// given name. The Logger's Channel and log level as set as
@ -932,6 +923,16 @@ public:
static const std::string ROOT; /// The name of the root logger ("").
public:
struct LoggerEntry
{
Poco::Logger * logger;
bool owned_by_shared_ptr = false;
};
using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
using LoggerMapIterator = LoggerMap::iterator;
protected:
Logger(const std::string & name, Channel * pChannel, int level);
~Logger();
@ -940,12 +941,16 @@ protected:
void log(const std::string & text, Message::Priority prio, const char * file, int line);
static std::string format(const std::string & fmt, int argc, std::string argv[]);
static Logger & unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
static Logger & parent(const std::string & name);
static void add(Logger * pLogger);
static Logger * find(const std::string & name);
private:
static std::pair<Logger::LoggerMapIterator, bool> unsafeGet(const std::string & name, bool get_shared);
static Logger * unsafeGetRawPtr(const std::string & name);
static std::pair<LoggerMapIterator, bool> unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
static Logger & parent(const std::string & name);
static std::pair<LoggerMapIterator, bool> add(Logger * pLogger);
static std::optional<LoggerMapIterator> find(const std::string & name);
static Logger * findRawPtr(const std::string & name);
Logger();
Logger(const Logger &);
Logger & operator=(const Logger &);

View File

@ -38,14 +38,7 @@ std::mutex & getLoggerMutex()
return *logger_mutex;
}
struct LoggerEntry
{
Poco::Logger * logger;
bool owned_by_shared_ptr = false;
};
using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
LoggerMap * _pLoggerMap = nullptr;
Poco::Logger::LoggerMap * _pLoggerMap = nullptr;
}
@ -337,10 +330,12 @@ struct LoggerDeleter
}
};
inline LoggerPtr makeLoggerPtr(Logger & logger)
inline LoggerPtr makeLoggerPtr(Logger & logger, bool owned_by_shared_ptr)
{
return std::shared_ptr<Logger>(&logger, LoggerDeleter());
if (owned_by_shared_ptr)
return LoggerPtr(&logger, LoggerDeleter());
return LoggerPtr(std::shared_ptr<void>{}, &logger);
}
}
@ -350,64 +345,67 @@ Logger& Logger::get(const std::string& name)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
Logger & logger = unsafeGet(name);
/** If there are already shared pointer created for this logger
* we need to increment Logger reference count and now logger
* is owned by logger infrastructure.
*/
auto it = _pLoggerMap->find(name);
if (it->second.owned_by_shared_ptr)
{
it->second.logger->duplicate();
it->second.owned_by_shared_ptr = false;
}
return logger;
auto [it, inserted] = unsafeGet(name, false /*get_shared*/);
return *it->second.logger;
}
LoggerPtr Logger::getShared(const std::string & name)
LoggerPtr Logger::getShared(const std::string & name, bool should_be_owned_by_shared_ptr_if_created)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
bool logger_exists = _pLoggerMap && _pLoggerMap->contains(name);
auto [it, inserted] = unsafeGet(name, true /*get_shared*/);
Logger & logger = unsafeGet(name);
/** If logger already exists, then this shared pointer does not own it.
* If logger does not exists, logger infrastructure could be already destroyed
* or logger was created.
/** If during `unsafeGet` logger was created, then this shared pointer owns it.
* If logger was already created, then this shared pointer does not own it.
*/
if (logger_exists)
{
logger.duplicate();
}
else if (_pLoggerMap)
{
_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
}
if (inserted && should_be_owned_by_shared_ptr_if_created)
it->second.owned_by_shared_ptr = true;
return makeLoggerPtr(logger);
return makeLoggerPtr(*it->second.logger, it->second.owned_by_shared_ptr);
}
Logger& Logger::unsafeGet(const std::string& name)
std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeGet(const std::string& name, bool get_shared)
{
Logger* pLogger = find(name);
if (!pLogger)
std::optional<Logger::LoggerMapIterator> optional_logger_it = find(name);
if (optional_logger_it)
{
auto & logger_it = *optional_logger_it;
if (logger_it->second.owned_by_shared_ptr)
{
logger_it->second.logger->duplicate();
if (!get_shared)
logger_it->second.owned_by_shared_ptr = false;
}
}
if (!optional_logger_it)
{
Logger * logger = nullptr;
if (name == ROOT)
{
pLogger = new Logger(name, 0, Message::PRIO_INFORMATION);
logger = new Logger(name, nullptr, Message::PRIO_INFORMATION);
}
else
{
Logger& par = parent(name);
pLogger = new Logger(name, par.getChannel(), par.getLevel());
logger = new Logger(name, par.getChannel(), par.getLevel());
}
add(pLogger);
return add(logger);
}
return *pLogger;
return std::make_pair(*optional_logger_it, false);
}
Logger * Logger::unsafeGetRawPtr(const std::string & name)
{
return unsafeGet(name, false /*get_shared*/).first->second.logger;
}
@ -415,24 +413,24 @@ Logger& Logger::create(const std::string& name, Channel* pChannel, int level)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
return unsafeCreate(name, pChannel, level);
return *unsafeCreate(name, pChannel, level).first->second.logger;
}
LoggerPtr Logger::createShared(const std::string & name, Channel * pChannel, int level)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
Logger & logger = unsafeCreate(name, pChannel, level);
_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
auto [it, inserted] = unsafeCreate(name, pChannel, level);
it->second.owned_by_shared_ptr = true;
return makeLoggerPtr(logger);
return makeLoggerPtr(*it->second.logger, it->second.owned_by_shared_ptr);
}
Logger& Logger::root()
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
return unsafeGet(ROOT);
return *unsafeGetRawPtr(ROOT);
}
@ -440,7 +438,11 @@ Logger* Logger::has(const std::string& name)
{
std::lock_guard<std::mutex> lock(getLoggerMutex());
return find(name);
auto optional_it = find(name);
if (!optional_it)
return nullptr;
return (*optional_it)->second.logger;
}
@ -459,20 +461,32 @@ void Logger::shutdown()
}
delete _pLoggerMap;
_pLoggerMap = 0;
_pLoggerMap = nullptr;
}
}
Logger* Logger::find(const std::string& name)
std::optional<Logger::LoggerMapIterator> Logger::find(const std::string& name)
{
if (_pLoggerMap)
{
LoggerMap::iterator it = _pLoggerMap->find(name);
if (it != _pLoggerMap->end())
return it->second.logger;
return it;
return {};
}
return 0;
return {};
}
Logger * Logger::findRawPtr(const std::string & name)
{
auto optional_it = find(name);
if (!optional_it)
return nullptr;
return (*optional_it)->second.logger;
}
@ -490,28 +504,28 @@ void Logger::names(std::vector<std::string>& names)
}
}
Logger& Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level)
std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level)
{
if (find(name)) throw ExistsException();
Logger* pLogger = new Logger(name, pChannel, level);
add(pLogger);
return *pLogger;
return add(pLogger);
}
Logger& Logger::parent(const std::string& name)
{
std::string::size_type pos = name.rfind('.');
if (pos != std::string::npos)
{
std::string pname = name.substr(0, pos);
Logger* pParent = find(pname);
Logger* pParent = findRawPtr(pname);
if (pParent)
return *pParent;
else
return parent(pname);
}
else return unsafeGet(ROOT);
else return *unsafeGetRawPtr(ROOT);
}
@ -579,12 +593,14 @@ namespace
}
void Logger::add(Logger* pLogger)
std::pair<Logger::LoggerMapIterator, bool> Logger::add(Logger* pLogger)
{
if (!_pLoggerMap)
_pLoggerMap = new LoggerMap;
_pLoggerMap = new Logger::LoggerMap;
_pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
auto result = _pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
assert(result.second);
return result;
}

View File

@ -1,6 +1,10 @@
#include <libunwind.h>
/// On MacOS this function will be replaced with a dynamic symbol
/// from the system library.
#if !defined(OS_DARWIN)
int backtrace(void ** buffer, int size)
{
return unw_backtrace(buffer, size);
}
#endif

2
contrib/libuv vendored

@ -1 +1 @@
Subproject commit 3a85b2eb3d83f369b8a8cafd329d7e9dc28f60cf
Subproject commit 4482964660c77eec1166cd7d14fb915e3dbd774a

@ -1 +1 @@
Subproject commit 2568a7cd1297c7c3044b0f3cc0c23a6f6444d856
Subproject commit d2142eed98046a47ff7112e3cc1e197c8a5cd80f

View File

@ -451,3 +451,24 @@ To disallow concurrent backup/restore, you can use these settings respectively.
The default value for both is true, so by default concurrent backup/restores are allowed.
When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time.
## Configuring BACKUP/RESTORE to use an AzureBlobStorage Endpoint
To write backups to an AzureBlobStorage container you need the following pieces of information:
- AzureBlobStorage endpoint connection string / url,
- Container,
- Path,
- Account name (if url is specified)
- Account Key (if url is specified)
The destination for a backup will be specified like this:
```
AzureBlobStorage('<connection string>/<url>', '<container>', '<path>', '<account name>', '<account key>')
```
```sql
BACKUP TABLE data TO AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
'test_container', 'data_backup');
RESTORE TABLE data AS data_restored FROM AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
'test_container', 'data_backup');
```

View File

@ -28,6 +28,8 @@ The maximum amount of RAM to use for running a query on a single server.
The default setting is unlimited (set to `0`).
Cloud default value: depends on the amount of RAM on the replica.
The setting does not consider the volume of available memory or the total volume of memory on the machine.
The restriction applies to a single query within a single server.
You can use `SHOW PROCESSLIST` to see the current memory consumption for each query.
@ -104,7 +106,9 @@ Possible values:
- Maximum volume of RAM (in bytes) that can be used by the single [GROUP BY](../../sql-reference/statements/select/group-by.md#select-group-by-clause) operation.
- 0 — `GROUP BY` in external memory disabled.
Default value: 0.
Default value: `0`.
Cloud default value: half the memory amount per replica.
## max_bytes_before_external_sort {#settings-max_bytes_before_external_sort}
@ -115,6 +119,8 @@ Enables or disables execution of `ORDER BY` clauses in external memory. See [ORD
Default value: 0.
Cloud default value: half the memory amount per replica.
## max_rows_to_sort {#max-rows-to-sort}
A maximum number of rows before sorting. This allows you to limit memory consumption when sorting.
@ -129,7 +135,11 @@ What to do if the number of rows received before sorting exceeds one of the limi
## max_result_rows {#setting-max_result_rows}
Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query.
Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query. No limit is applied when value is `0`.
Default value: `0`.
Cloud default value: `0`.
## max_result_bytes {#max-result-bytes}
@ -137,10 +147,14 @@ Limit on the number of bytes in the result. The same as the previous setting.
## result_overflow_mode {#result-overflow-mode}
What to do if the volume of the result exceeds one of the limits: throw or break. By default, throw.
What to do if the volume of the result exceeds one of the limits: throw or break.
Using break is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#max_threads).
Default value: `throw`.
Cloud default value: `throw`.
Example:
``` sql

View File

@ -212,6 +212,8 @@ Possible values:
Default value: `'basic'`.
Cloud default value: `'best_effort'`.
See also:
- [DateTime data type.](../../sql-reference/data-types/datetime.md)

View File

@ -508,7 +508,9 @@ Possible values:
- Any positive integer number of hops.
- 0 — No hops allowed.
Default value: 0.
Default value: `0`.
Cloud default value: `10`.
## insert_null_as_default {#insert_null_as_default}
@ -1126,7 +1128,9 @@ Possible values:
- 0 (or 1) — `INSERT SELECT` no parallel execution.
- Positive integer. Bigger than 1.
Default value: 0.
Default value: `0`.
Cloud default value: from `2` to `4`, depending on the service size.
Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#max_threads) setting.
Higher values will lead to higher memory usage.
@ -1207,7 +1211,9 @@ Default value: 10000.
Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response.
Default value: 0
Default value: `0`.
Cloud default value: `1`.
## poll_interval {#poll-interval}
@ -1946,6 +1952,8 @@ Possible values:
Default value: `200`.
Cloud default value: `1000`.
### async_insert_poll_timeout_ms {#async-insert-poll-timeout-ms}
Timeout in milliseconds for polling data from asynchronous insert queue.
@ -2130,7 +2138,9 @@ Possible values:
- Positive integer.
- 0 — Retries are disabled
Default value: 0
Default value: 20
Cloud default value: `20`.
Keeper request retries are done after some timeout. The timeout is controlled by the following settings: `insert_keeper_retry_initial_backoff_ms`, `insert_keeper_retry_max_backoff_ms`.
The first retry is done after `insert_keeper_retry_initial_backoff_ms` timeout. The consequent timeouts will be calculated as follows:
@ -2660,6 +2670,8 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md).
Default value: 1000000000 nanoseconds (once a second).
**Temporarily disabled in ClickHouse Cloud.**
See also:
- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
@ -2683,6 +2695,8 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md).
Default value: 1000000000 nanoseconds.
**Temporarily disabled in ClickHouse Cloud.**
See also:
- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
@ -2804,6 +2818,8 @@ Possible values:
Default value: `0`.
Cloud default value: `1`.
**See Also**
- [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed)
@ -3319,7 +3335,9 @@ Possible values:
- a string representing any valid table engine name
Default value: `None`
Default value: `MergeTree`.
Cloud default value: `SharedMergeTree`.
**Example**
@ -3895,6 +3913,8 @@ Possible values:
Default value: `0`.
Cloud default value: `1`.
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.
@ -3933,6 +3953,8 @@ Possible values:
Default value: `throw`.
Cloud default value: `none`.
## flatten_nested {#flatten-nested}
Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns.
@ -4068,6 +4090,8 @@ Possible values:
Default value: `1`.
Cloud default value: `0`.
:::note
`alter_sync` is applicable to `Replicated` tables only, it does nothing to alters of not `Replicated` tables.
:::
@ -4723,6 +4747,8 @@ other connections are cancelled. Queries with `max_parallel_replicas > 1` are su
Enabled by default.
Disabled by default on Cloud.
## hedged_connection_timeout {#hedged_connection_timeout}
If we can't establish connection with replica after this timeout in hedged requests, we start working with the next replica without cancelling connection to the previous.
@ -5348,10 +5374,11 @@ Default value: `false`.
## max_partition_size_to_drop
Restriction on dropping partitions in query time.
Restriction on dropping partitions in query time. The value 0 means that you can drop partitions without any restrictions.
Default value: 50 GB.
The value 0 means that you can drop partitions without any restrictions.
Cloud default value: 1 TB.
:::note
This query setting overwrites its server setting equivalent, see [max_partition_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-partition-size-to-drop)
@ -5359,10 +5386,11 @@ This query setting overwrites its server setting equivalent, see [max_partition_
## max_table_size_to_drop
Restriction on deleting tables in query time.
Restriction on deleting tables in query time. The value 0 means that you can delete all tables without any restrictions.
Default value: 50 GB.
The value 0 means that you can delete all tables without any restrictions.
Cloud default value: 1 TB.
:::note
This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop)

View File

@ -206,7 +206,7 @@ Some of these settings will disable cache features per query/profile that are en
- `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` - allows to use cache in query only if it already exists, otherwise query data will not be written to local cache storage. Default: `false`.
- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. Default: `false`.
- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. Default: `false`. Cloud default value: `true`.
- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. It can be turn on for specific queries or enabled in a profile. Default: `false`.

View File

@ -112,7 +112,7 @@ Note that:
For the query to run successfully, the following conditions must be met:
- Both tables must have the same structure.
- Both tables must have the same order by key and the same primary key.
- Both tables must have the same partition key, the same order by key and the same primary key.
- Both tables must have the same indices and projections.
- Both tables must have the same storage policy.

View File

@ -20,6 +20,6 @@ sidebar_position: 11
Если вы не видели наших футболок, посмотрите видео о ClickHouse. Например, вот это:
![iframe](https://www.youtube.com/embed/bSyQahMVZ7w)
<iframe width="675" height="380" src="https://www.youtube.com/embed/bSyQahMVZ7w" frameborder="0" allow="accelerometer; autoplay; gyroscope; picture-in-picture" allowfullscreen></iframe>
P.S. Эти футболки не продаются, а распространяются бесплатно на большинстве митапов [ClickHouse](https://clickhouse.com/#meet), обычно в награду за самые интересные вопросы или другие виды активного участия.

View File

@ -19,7 +19,7 @@ slug: /ru/operations/system-tables/grants
- `column` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Имя столбца, к которому предоставляется доступ.
- `is_partial_revoke` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Логическое значение. Показывает, были ли отменены некоторые привилегии. Возможные значения:
- `0` — Строка описывает частичный отзыв.
- `1` — Строка описывает грант.
- `0` — Строка описывает грант.
- `1` — Строка описывает частичный отзыв.
- `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Разрешение предоставлено с опцией `WITH GRANT OPTION`, подробнее см. [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax).

View File

@ -826,10 +826,12 @@ try
0, // We don't need any threads one all the parts will be deleted
server_settings.max_parts_cleaning_thread_pool_size);
auto max_database_replicated_create_table_thread_pool_size = server_settings.max_database_replicated_create_table_thread_pool_size ?
server_settings.max_database_replicated_create_table_thread_pool_size : getNumberOfPhysicalCPUCores();
getDatabaseReplicatedCreateTablesThreadPool().initialize(
server_settings.max_database_replicated_create_table_thread_pool_size,
max_database_replicated_create_table_thread_pool_size,
0, // We don't need any threads once all the tables will be created
server_settings.max_database_replicated_create_table_thread_pool_size);
max_database_replicated_create_table_thread_pool_size);
/// Initialize global local cache for remote filesystem.
if (config().has("local_cache_for_remote_fs"))

View File

@ -527,10 +527,11 @@ let queries = [];
/// Query parameters with predefined default values.
/// All other parameters will be automatically found in the queries.
let params = {
let default_params = {
'rounding': '60',
'seconds': '86400'
};
let params = default_params;
/// Palette generation for charts
function generatePalette(baseColor, numColors) {
@ -594,13 +595,19 @@ let plots = [];
let charts = document.getElementById('charts');
/// This is not quite correct (we cannot really parse SQL with regexp) but tolerable.
const query_param_regexp = /\{(\w+):[^}]+\}/g;
const query_param_regexp = /\{(\w+):([^}]+)\}/g;
/// Automatically parse more parameters from the queries.
function findParamsInQuery(query, new_params) {
const typeDefault = (type) => type.includes('Int') ? '0'
: (type.includes('Float') ? '0.0'
: (type.includes('Bool') ? 'false'
: (type.includes('Date') ? new Date().toISOString().slice(0, 10)
: (type.includes('UUID') ? '00000000-0000-0000-0000-000000000000'
: ''))));
for (let match of query.matchAll(query_param_regexp)) {
const name = match[1];
new_params[name] = params[name] || '';
new_params[name] = params[name] || default_params[name] || typeDefault(match[2]);
}
}

View File

@ -108,6 +108,11 @@ public:
*/
QueryTreeNodePtr getColumnSourceOrNull() const;
void setColumnSource(const QueryTreeNodePtr & source)
{
getSourceWeakPointer() = source;
}
QueryTreeNodeType getNodeType() const override
{
return QueryTreeNodeType::COLUMN;

View File

@ -31,7 +31,7 @@ public:
virtual String getDescription() = 0;
/// Run pass over query tree
virtual void run(QueryTreeNodePtr query_tree_node, ContextPtr context) = 0;
virtual void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) = 0;
};

View File

@ -194,7 +194,7 @@ private:
}
void AggregateFunctionsArithmericOperationsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void AggregateFunctionsArithmericOperationsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
AggregateFunctionsArithmericOperationsVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -17,7 +17,7 @@ public:
String getDescription() override { return "Extract arithmeric operations from aggregate functions."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -92,7 +92,7 @@ public:
}
void RewriteArrayExistsToHasPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void RewriteArrayExistsToHasPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
RewriteArrayExistsToHasVisitor visitor(context);
visitor.visit(query_tree_node);

View File

@ -20,7 +20,7 @@ public:
String getDescription() override { return "Rewrite arrayExists(func, arr) functions to has(arr, elem) when logically equivalent"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -67,7 +67,7 @@ private:
}
void AutoFinalOnQueryPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void AutoFinalOnQueryPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
auto visitor = AutoFinalOnQueryPassVisitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -25,7 +25,7 @@ public:
return "Automatically applies final modifier to table expressions in queries if it is supported and if user level final setting is set";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -213,7 +213,7 @@ private:
}
void ComparisonTupleEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void ComparisonTupleEliminationPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
ComparisonTupleEliminationPassVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -17,7 +17,7 @@ public:
String getDescription() override { return "Rewrite tuples comparison into equivalent comparison of tuples arguments"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -132,7 +132,7 @@ private:
}
void ConvertOrLikeChainPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void ConvertOrLikeChainPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
auto or_function_resolver = FunctionFactory::instance().get("or", context);
auto match_function_resolver = FunctionFactory::instance().get("multiMatchAny", context);

View File

@ -14,7 +14,7 @@ public:
String getDescription() override { return "Replaces all the 'or's with {i}like to multiMatchAny"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -718,7 +718,7 @@ public:
}
void ConvertLogicalExpressionToCNFPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void ConvertLogicalExpressionToCNFPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
const auto & settings = context->getSettingsRef();
if (!settings.convert_query_to_cnf)

View File

@ -12,7 +12,7 @@ public:
String getDescription() override { return "Convert logical expression to CNF and apply optimizations using constraints"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -87,7 +87,7 @@ public:
}
void CountDistinctPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void CountDistinctPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
CountDistinctVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -20,7 +20,7 @@ public:
return "Optimize single countDistinct into count over subquery";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -264,7 +264,7 @@ private:
}
void CrossToInnerJoinPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void CrossToInnerJoinPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
CrossToInnerJoinVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -22,7 +22,7 @@ public:
return "Replace CROSS JOIN with INNER JOIN";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -224,7 +224,7 @@ private:
}
void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
FunctionToSubcolumnsVisitor visitor(context);
visitor.visit(query_tree_node);

View File

@ -24,7 +24,7 @@ public:
String getDescription() override { return "Rewrite function to subcolumns, for example tupleElement(column, subcolumn) into column.subcolumn"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -256,7 +256,7 @@ void tryFuseQuantiles(QueryTreeNodePtr query_tree_node, ContextPtr context)
}
void FuseFunctionsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void FuseFunctionsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
tryFuseSumCountAvg(query_tree_node, context);
tryFuseQuantiles(query_tree_node, context);

View File

@ -20,7 +20,7 @@ public:
String getDescription() override { return "Replaces several calls of aggregate functions of the same family into one call"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -249,7 +249,7 @@ private:
}
void GroupingFunctionsResolvePass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void GroupingFunctionsResolvePass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
GroupingFunctionsResolveVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -24,7 +24,7 @@ public:
String getDescription() override { return "Resolve GROUPING functions based on GROUP BY modifiers"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -73,7 +73,7 @@ private:
}
void IfChainToMultiIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void IfChainToMultiIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
auto multi_if_function_ptr = FunctionFactory::instance().get("multiIf", context);
IfChainToMultiIfPassVisitor visitor(std::move(multi_if_function_ptr), std::move(context));

View File

@ -18,7 +18,7 @@ public:
String getDescription() override { return "Optimize if chain to multiIf"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -57,7 +57,7 @@ public:
}
void IfConstantConditionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void IfConstantConditionPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
IfConstantConditionVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -21,7 +21,7 @@ public:
String getDescription() override { return "Optimize if, multiIf for constant condition."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -190,7 +190,7 @@ public:
}
void IfTransformStringsToEnumPass::run(QueryTreeNodePtr query, ContextPtr context)
void IfTransformStringsToEnumPass::run(QueryTreeNodePtr & query, ContextPtr context)
{
ConvertStringsToEnumVisitor visitor(std::move(context));
visitor.visit(query);

View File

@ -33,7 +33,7 @@ public:
String getDescription() override { return "Replaces string-type arguments in If and Transform to enum"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -404,12 +404,12 @@ private:
auto operand_type = and_operands[0]->getResultType();
auto function_type = function_node.getResultType();
assert(!function_type->isNullable());
chassert(!function_type->isNullable());
if (!function_type->equals(*operand_type))
{
/// Result of equality operator can be low cardinality, while AND always returns UInt8.
/// In that case we replace `(lc = 1) AND (lc = 1)` with `(lc = 1) AS UInt8`
assert(function_type->equals(*removeLowCardinality(operand_type)));
chassert(function_type->equals(*removeLowCardinality(operand_type)));
node = createCastFunction(std::move(and_operands[0]), function_type, getContext());
}
else
@ -427,7 +427,7 @@ private:
void tryReplaceOrEqualsChainWithIn(QueryTreeNodePtr & node)
{
auto & function_node = node->as<FunctionNode &>();
assert(function_node.getFunctionName() == "or");
chassert(function_node.getFunctionName() == "or");
QueryTreeNodes or_operands;
@ -486,7 +486,7 @@ private:
/// first we create tuple from RHS of equals functions
for (const auto & equals : equals_functions)
{
is_any_nullable |= equals->getResultType()->isNullable();
is_any_nullable |= removeLowCardinality(equals->getResultType())->isNullable();
const auto * equals_function = equals->as<FunctionNode>();
assert(equals_function && equals_function->getFunctionName() == "equals");
@ -554,7 +554,7 @@ private:
}
};
void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
LogicalExpressionOptimizerVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -109,7 +109,7 @@ public:
"replace chains of equality functions inside an OR with a single IN operator";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -52,7 +52,7 @@ private:
}
void MultiIfToIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void MultiIfToIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
auto if_function_ptr = FunctionFactory::instance().get("if", context);
MultiIfToIfVisitor visitor(std::move(if_function_ptr), std::move(context));

View File

@ -17,7 +17,7 @@ public:
String getDescription() override { return "Optimize multiIf with single condition to if."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -64,7 +64,7 @@ private:
}
void NormalizeCountVariantsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void NormalizeCountVariantsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
NormalizeCountVariantsVisitor visitor(context);
visitor.visit(query_tree_node);

View File

@ -20,7 +20,7 @@ public:
String getDescription() override { return "Optimize count(literal), sum(1) into count()."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -215,7 +215,7 @@ private:
}
void OptimizeDateOrDateTimeConverterWithPreimagePass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void OptimizeDateOrDateTimeConverterWithPreimagePass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
OptimizeDateOrDateTimeConverterWithPreimageVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -17,7 +17,7 @@ public:
String getDescription() override { return "Replace predicate having Date/DateTime converters with their preimages"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -130,7 +130,7 @@ private:
}
};
void OptimizeGroupByFunctionKeysPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void OptimizeGroupByFunctionKeysPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
OptimizeGroupByFunctionKeysVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -16,7 +16,7 @@ public:
String getDescription() override { return "Eliminates functions of other keys in GROUP BY section."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -115,7 +115,7 @@ private:
}
void OptimizeGroupByInjectiveFunctionsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void OptimizeGroupByInjectiveFunctionsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
OptimizeGroupByInjectiveFunctionsVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -14,7 +14,7 @@ public:
String getDescription() override { return "Replaces injective functions by it's arguments in GROUP BY section."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -124,7 +124,7 @@ private:
}
void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
OptimizeRedundantFunctionsInOrderByVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -17,7 +17,7 @@ public:
String getDescription() override { return "If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -70,7 +70,7 @@ private:
}
void OrderByLimitByDuplicateEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr)
void OrderByLimitByDuplicateEliminationPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr)
{
OrderByLimitByDuplicateEliminationVisitor visitor;
visitor.visit(query_tree_node);

View File

@ -20,7 +20,7 @@ public:
String getDescription() override { return "Remove duplicate columns from ORDER BY, LIMIT BY."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -50,7 +50,7 @@ public:
}
void OrderByTupleEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr)
void OrderByTupleEliminationPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr)
{
OrderByTupleEliminationVisitor visitor;
visitor.visit(query_tree_node);

View File

@ -17,7 +17,7 @@ public:
String getDescription() override { return "Remove tuple from ORDER BY."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -79,6 +79,8 @@
#include <Analyzer/QueryTreeBuilder.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/Identifier.h>
#include <Poco/Logger.h>
#include <Common/logger_useful.h>
namespace ProfileEvents
{
@ -1066,7 +1068,7 @@ private:
class QueryAnalyzer
{
public:
void resolve(QueryTreeNodePtr node, const QueryTreeNodePtr & table_expression, ContextPtr context)
void resolve(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, ContextPtr context)
{
IdentifierResolveScope scope(node, nullptr /*parent_scope*/);
@ -7649,7 +7651,7 @@ QueryAnalysisPass::QueryAnalysisPass(QueryTreeNodePtr table_expression_)
: table_expression(std::move(table_expression_))
{}
void QueryAnalysisPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void QueryAnalysisPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
QueryAnalyzer analyzer;
analyzer.resolve(query_tree_node, table_expression, context);

View File

@ -89,7 +89,7 @@ public:
return "Resolve type for each query expression. Replace identifiers, matchers with query expressions. Perform constant folding. Evaluate scalar subqueries.";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
private:
QueryTreeNodePtr table_expression;

View File

@ -132,7 +132,7 @@ void updateUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, s
}
void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
std::vector<QueryTreeNodePtr> nodes_to_visit;
nodes_to_visit.push_back(query_tree_node);

View File

@ -17,7 +17,7 @@ public:
String getDescription() override { return "Remove unused projection columns in subqueries."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -109,7 +109,7 @@ private:
}
void RewriteAggregateFunctionWithIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void RewriteAggregateFunctionWithIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
RewriteAggregateFunctionWithIfVisitor visitor(context);
visitor.visit(query_tree_node);

View File

@ -20,7 +20,7 @@ public:
return "Rewrite aggregate functions with if expression as argument when logically equivalent";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -120,7 +120,7 @@ private:
}
void RewriteSumFunctionWithSumAndCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void RewriteSumFunctionWithSumAndCountPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
RewriteSumFunctionWithSumAndCountVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -20,7 +20,7 @@ public:
String getDescription() override { return "Rewrite sum(column +/- literal) into sum(column) and literal * count(column)"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -58,7 +58,7 @@ public:
}
void ShardNumColumnToFunctionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void ShardNumColumnToFunctionPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
ShardNumColumnToFunctionVisitor visitor(context);
visitor.visit(query_tree_node);

View File

@ -17,7 +17,7 @@ public:
String getDescription() override { return "Rewrite _shard_num column into shardNum() function"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -178,7 +178,7 @@ private:
}
void SumIfToCountIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void SumIfToCountIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
SumIfToCountIfVisitor visitor(context);
visitor.visit(query_tree_node);

View File

@ -23,7 +23,7 @@ public:
String getDescription() override { return "Rewrite sum(if) and sumIf into countIf"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -89,7 +89,7 @@ public:
}
void UniqInjectiveFunctionsEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void UniqInjectiveFunctionsEliminationPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
UniqInjectiveFunctionsEliminationVisitor visitor(std::move(context));
visitor.visit(query_tree_node);

View File

@ -17,7 +17,7 @@ public:
String getDescription() override { return "Remove injective functions from uniq functions arguments."; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};

View File

@ -185,7 +185,7 @@ public:
};
void UniqToCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
void UniqToCountPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{
UniqToCountVisitor visitor(context);
visitor.visit(query_tree_node);

View File

@ -24,7 +24,7 @@ public:
return "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override;
};
}

View File

@ -33,11 +33,13 @@ void BackupFactory::registerBackupEngine(const String & engine_name, const Creat
void registerBackupEnginesFileAndDisk(BackupFactory &);
void registerBackupEngineS3(BackupFactory &);
void registerBackupEngineAzureBlobStorage(BackupFactory &);
void registerBackupEngines(BackupFactory & factory)
{
registerBackupEnginesFileAndDisk(factory);
registerBackupEngineS3(factory);
registerBackupEngineAzureBlobStorage(factory);
}
BackupFactory::BackupFactory()

View File

@ -0,0 +1,320 @@
#include <Backups/BackupIO_AzureBlobStorage.h>
#if USE_AZURE_BLOB_STORAGE
#include <Common/quoteString.h>
#include <Interpreters/threadPoolCallbackRunner.h>
#include <Interpreters/Context.h>
#include <IO/SharedThreadPools.h>
#include <IO/HTTPHeaderEntries.h>
#include <Storages/StorageAzureBlobCluster.h>
#include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
#include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
#include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
#include <Disks/IDisk.h>
#include <Disks/DiskType.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int AZURE_BLOB_STORAGE_ERROR;
extern const int LOGICAL_ERROR;
}
BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_)
: BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage"))
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
, configuration(configuration_)
{
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
std::move(client_ptr),
StorageAzureBlob::createSettings(context_),
configuration_.container);
client = object_storage->getAzureBlobStorageClient();
settings = object_storage->getSettings();
}
BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default;
bool BackupReaderAzureBlobStorage::fileExists(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
return object_storage->exists(StoredObject(key));
}
UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
return object_metadata.size_bytes;
}
std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
return std::make_unique<ReadBufferFromAzureBlobStorage>(
client, key, read_settings, settings->max_single_read_retries,
settings->max_single_download_retries);
}
void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
{
auto destination_data_source_description = destination_disk->getDataSourceDescription();
if ((destination_data_source_description.type == DataSourceType::ObjectStorage)
&& (destination_data_source_description.object_storage_type == ObjectStorageType::Azure)
&& (destination_data_source_description.is_encrypted == encrypted_in_backup))
{
LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName());
auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> &) -> size_t
{
/// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
if (blob_path.size() != 2 || mode != WriteMode::Rewrite)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Blob writing function called with unexpected blob_path.size={} or mode={}",
blob_path.size(), mode);
copyAzureBlobStorageFile(
client,
destination_disk->getObjectStorage()->getAzureBlobStorageClient(),
configuration.container,
fs::path(configuration.blob_path) / path_in_backup,
0,
file_size,
/* dest_container */ blob_path[1],
/* dest_path */ blob_path[0],
settings,
read_settings,
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupRDAzure"),
/* for_disk_azure_blob_storage= */ true);
return file_size;
};
destination_disk->writeFileUsingBlobWritingFunction(destination_path, write_mode, write_blob_function);
return; /// copied!
}
/// Fallback to copy through buffers.
BackupReaderDefault::copyFileToDisk(path_in_backup, file_size, encrypted_in_backup, destination_disk, destination_path, write_mode);
}
BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_)
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage"))
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
, configuration(configuration_)
{
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
std::move(client_ptr),
StorageAzureBlob::createSettings(context_),
configuration_.container);
client = object_storage->getAzureBlobStorageClient();
settings = object_storage->getSettings();
}
void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length)
{
/// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible.
auto source_data_source_description = src_disk->getDataSourceDescription();
if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted))
{
/// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container.
/// In this case we can't use the native copy.
if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2)
{
LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName());
copyAzureBlobStorageFile(
src_disk->getObjectStorage()->getAzureBlobStorageClient(),
client,
/* src_container */ blob_path[1],
/* src_path */ blob_path[0],
start_pos,
length,
configuration.container,
fs::path(configuration.blob_path) / path_in_backup,
settings,
read_settings,
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
return; /// copied!
}
}
/// Fallback to copy through buffers.
BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
}
void BackupWriterAzureBlobStorage::copyFile(const String & destination, const String & source, size_t size)
{
LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
copyAzureBlobStorageFile(
client,
client,
configuration.container,
fs::path(source),
0,
size,
/* dest_container */ configuration.container,
/* dest_path */ destination,
settings,
read_settings,
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"),
/* for_disk_azure_blob_storage= */ true);
}
void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
{
copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings,
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
}
BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
bool BackupWriterAzureBlobStorage::fileExists(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
return object_storage->exists(StoredObject(key));
}
UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
RelativePathsWithMetadata children;
object_storage->listObjects(key,children,/*max_keys*/0);
if (children.empty())
throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist");
return children[0].metadata.size_bytes;
}
std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
return std::make_unique<ReadBufferFromAzureBlobStorage>(
client, key, read_settings, settings->max_single_read_retries,
settings->max_single_download_retries);
}
std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
return std::make_unique<WriteBufferFromAzureBlobStorage>(
client,
key,
settings->max_single_part_upload_size,
settings->max_unexpected_write_error_retries,
DBMS_DEFAULT_BUFFER_SIZE,
write_settings);
}
void BackupWriterAzureBlobStorage::removeFile(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
StoredObject object(key);
object_storage->removeObjectIfExists(object);
}
void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names)
{
StoredObjects objects;
for (const auto & file_name : file_names)
objects.emplace_back(file_name);
object_storage->removeObjectsIfExist(objects);
}
void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names)
{
StoredObjects objects;
for (const auto & file_name : file_names)
objects.emplace_back(file_name);
object_storage->removeObjectsIfExist(objects);
}
}
#endif

View File

@ -0,0 +1,68 @@
#pragma once
#include "config.h"
#if USE_AZURE_BLOB_STORAGE
#include <Backups/BackupIO_Default.h>
#include <Disks/DiskType.h>
#include <Storages/StorageAzureBlobCluster.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
/// Represents a backup stored to Azure
class BackupReaderAzureBlobStorage : public BackupReaderDefault
{
public:
BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
~BackupReaderAzureBlobStorage() override;
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
private:
const DataSourceDescription data_source_description;
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
StorageAzureBlob::Configuration configuration;
std::unique_ptr<AzureObjectStorage> object_storage;
std::shared_ptr<const AzureObjectStorageSettings> settings;
};
class BackupWriterAzureBlobStorage : public BackupWriterDefault
{
public:
BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
~BackupWriterAzureBlobStorage() override;
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
void copyFile(const String & destination, const String & source, size_t size) override;
void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;
private:
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
void removeFilesBatch(const Strings & file_names);
const DataSourceDescription data_source_description;
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
StorageAzureBlob::Configuration configuration;
std::unique_ptr<AzureObjectStorage> object_storage;
std::shared_ptr<const AzureObjectStorageSettings> settings;
};
}
#endif

View File

@ -939,12 +939,12 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
}
else if (src_disk && from_immutable_file)
{
LOG_TRACE(log, "Writing backup for file {} from {} (disk {}): data file #{}", info.data_file_name, src_file_desc, src_disk->getName(), info.data_file_index);
LOG_INFO(log, "Writing backup for file {} from {} (disk {}): data file #{}", info.data_file_name, src_file_desc, src_disk->getName(), info.data_file_index);
writer->copyFileFromDisk(info.data_file_name, src_disk, src_file_path, info.encrypted_by_disk, info.base_size, info.size - info.base_size);
}
else
{
LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, src_file_desc, info.data_file_index);
LOG_INFO(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, src_file_desc, info.data_file_index);
auto create_read_buffer = [entry, read_settings = writer->getReadSettings()] { return entry->getReadBuffer(read_settings); };
writer->copyDataToFile(info.data_file_name, create_read_buffer, info.base_size, info.size - info.base_size);
}

View File

@ -0,0 +1,172 @@
#include "config.h"
#include <Backups/BackupFactory.h>
#include <Common/Exception.h>
#if USE_AZURE_BLOB_STORAGE
#include <Backups/BackupIO_AzureBlobStorage.h>
#include <Storages/StorageAzureBlob.h>
#include <Backups/BackupImpl.h>
#include <IO/Archives/hasRegisteredArchiveFileExtension.h>
#include <Interpreters/Context.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <filesystem>
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int SUPPORT_IS_DISABLED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
#if USE_AZURE_BLOB_STORAGE
namespace
{
String removeFileNameFromURL(String & url)
{
Poco::URI url2{url};
String path = url2.getPath();
size_t slash_pos = path.find_last_of('/');
String file_name = path.substr(slash_pos + 1);
path.resize(slash_pos + 1);
url2.setPath(path);
url = url2.toString();
return file_name;
}
}
#endif
void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
{
auto creator_fn = []([[maybe_unused]] const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
{
#if USE_AZURE_BLOB_STORAGE
const String & id_arg = params.backup_info.id_arg;
const auto & args = params.backup_info.args;
StorageAzureBlob::Configuration configuration;
if (!id_arg.empty())
{
const auto & config = params.context->getConfigRef();
auto config_prefix = "named_collections." + id_arg;
if (!config.has(config_prefix))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg);
if (config.has(config_prefix + ".connection_string"))
{
configuration.connection_url = config.getString(config_prefix + ".connection_string");
configuration.is_connection_string = true;
configuration.container = config.getString(config_prefix + ".container");
}
else
{
configuration.connection_url = config.getString(config_prefix + ".storage_account_url");
configuration.is_connection_string = false;
configuration.container = config.getString(config_prefix + ".container");
configuration.account_name = config.getString(config_prefix + ".account_name");
configuration.account_key = config.getString(config_prefix + ".account_key");
}
if (args.size() > 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]");
if (args.size() == 1)
configuration.blob_path = args[0].safeGet<String>();
}
else
{
if (args.size() == 3)
{
configuration.connection_url = args[0].safeGet<String>();
configuration.is_connection_string = true;
configuration.container = args[1].safeGet<String>();
configuration.blob_path = args[2].safeGet<String>();
}
else if (args.size() == 5)
{
configuration.connection_url = args[0].safeGet<String>();
configuration.is_connection_string = false;
configuration.container = args[1].safeGet<String>();
configuration.blob_path = args[2].safeGet<String>();
configuration.account_name = args[3].safeGet<String>();
configuration.account_key = args[4].safeGet<String>();
}
else
{
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Backup AzureBlobStorage requires 3 or 5 arguments: connection string>/<url, container, path, [account name], [account key]");
}
}
BackupImpl::ArchiveParams archive_params;
if (hasRegisteredArchiveFileExtension(configuration.blob_path))
{
if (params.is_internal_backup)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled");
archive_params.archive_name = removeFileNameFromURL(configuration.blob_path);
archive_params.compression_method = params.compression_method;
archive_params.compression_level = params.compression_level;
archive_params.password = params.password;
}
else
{
if (!params.password.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Password is not applicable, backup cannot be encrypted");
}
if (params.open_mode == IBackup::OpenMode::READ)
{
auto reader = std::make_shared<BackupReaderAzureBlobStorage>(configuration,
params.read_settings,
params.write_settings,
params.context);
return std::make_unique<BackupImpl>(
params.backup_info,
archive_params,
params.base_backup_info,
reader,
params.context,
/* use_same_s3_credentials_for_base_backup*/ false);
}
else
{
auto writer = std::make_shared<BackupWriterAzureBlobStorage>(configuration,
params.read_settings,
params.write_settings,
params.context);
return std::make_unique<BackupImpl>(
params.backup_info,
archive_params,
params.base_backup_info,
writer,
params.context,
params.is_internal_backup,
params.backup_coordination,
params.backup_uuid,
params.deduplicate_files,
/* use_same_s3_credentials_for_base_backup */ false);
}
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "AzureBlobStorage support is disabled");
#endif
};
factory.registerBackupEngine("AzureBlobStorage", creator_fn);
}
}

View File

@ -89,6 +89,7 @@ add_headers_and_sources(clickhouse_common_io Common/SSH)
add_headers_and_sources(clickhouse_common_io IO)
add_headers_and_sources(clickhouse_common_io IO/Archives)
add_headers_and_sources(clickhouse_common_io IO/S3)
add_headers_and_sources(clickhouse_common_io IO/AzureBlobStorage)
list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
@ -141,6 +142,7 @@ endif()
if (TARGET ch_contrib::azure_sdk)
add_headers_and_sources(dbms Disks/ObjectStorages/AzureBlobStorage)
add_headers_and_sources(dbms IO/AzureBlobStorage)
endif()
if (TARGET ch_contrib::hdfs)
@ -496,6 +498,7 @@ if (TARGET ch_contrib::aws_s3)
endif()
if (TARGET ch_contrib::azure_sdk)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::azure_sdk)
dbms_target_link_libraries (PRIVATE ch_contrib::azure_sdk)
endif()

View File

@ -19,7 +19,6 @@
#include <Storages/MergeTree/RequestResponse.h>
#include <atomic>
#include <optional>
#include "config.h"

View File

@ -27,14 +27,12 @@ ConnectionEstablisher::ConnectionEstablisher(
const Settings & settings_,
LoggerPtr log_,
const QualifiedTableName * table_to_check_)
: pool(std::move(pool_)), timeouts(timeouts_), settings(settings_), log(log_), table_to_check(table_to_check_), is_finished(false)
: pool(std::move(pool_)), timeouts(timeouts_), settings(settings_), log(log_), table_to_check(table_to_check_)
{
}
void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::string & fail_message)
{
is_finished = false;
SCOPE_EXIT(is_finished = true);
try
{
ProfileEvents::increment(ProfileEvents::DistributedConnectionTries);

View File

@ -30,8 +30,6 @@ public:
/// Set async callback that will be called when reading from socket blocks.
void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); }
bool isFinished() const { return is_finished; }
private:
ConnectionPoolPtr pool;
const ConnectionTimeouts * timeouts;
@ -39,7 +37,6 @@ private:
LoggerPtr log;
const QualifiedTableName * table_to_check;
bool is_finished;
AsyncCallback async_callback = {};
};

View File

@ -27,6 +27,9 @@ class IConnectionPool : private boost::noncopyable
public:
using Entry = PoolBase<Connection>::Entry;
IConnectionPool() = default;
IConnectionPool(String host_, UInt16 port_) : host(host_), port(port_), address(host + ":" + toString(port_)) {}
virtual ~IConnectionPool() = default;
/// Selects the connection to work.
@ -36,7 +39,15 @@ public:
const Settings & settings,
bool force_connected = true) = 0;
const std::string & getHost() const { return host; }
UInt16 getPort() const { return port; }
const String & getAddress() const { return address; }
virtual Priority getPriority() const { return Priority{1}; }
protected:
const String host;
const UInt16 port = 0;
const String address;
};
using ConnectionPoolPtr = std::shared_ptr<IConnectionPool>;
@ -63,10 +74,9 @@ public:
Protocol::Compression compression_,
Protocol::Secure secure_,
Priority priority_ = Priority{1})
: Base(max_connections_,
: IConnectionPool(host_, port_),
Base(max_connections_,
getLogger("ConnectionPool (" + host_ + ":" + toString(port_) + ")")),
host(host_),
port(port_),
default_database(default_database_),
user(user_),
password(password_),
@ -99,10 +109,6 @@ public:
return entry;
}
const std::string & getHost() const
{
return host;
}
std::string getDescription() const
{
return host + ":" + toString(port);
@ -125,8 +131,6 @@ protected:
}
private:
String host;
UInt16 port;
String default_database;
String user;
String password;

View File

@ -1,7 +1,5 @@
#pragma once
#include <compare>
#include <Client/Connection.h>
#include <Storages/MergeTree/RequestResponse.h>

View File

@ -554,6 +554,21 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng
}
MutableColumnPtr ColumnArray::getDataInRange(size_t start, size_t length) const
{
if (start + length > getOffsets().size())
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnArray::getDataPtrForRange method. "
"[start({}) + length({}) > offsets.size({})]", start, length, getOffsets().size());
size_t start_offset = offsetAt(start);
size_t end_offset = offsetAt(start + length);
auto res = getData().cloneEmpty();
res->insertRangeFrom(getData(), start_offset, end_offset - start_offset);
return res;
}
ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const
{
if (typeid_cast<const ColumnUInt8 *>(data.get()))

View File

@ -143,6 +143,10 @@ public:
const ColumnPtr & getOffsetsPtr() const { return offsets; }
ColumnPtr & getOffsetsPtr() { return offsets; }
/// Returns a copy of the data column's part corresponding to a specified range of rows.
/// For example, `getDataInRange(0, size())` is the same as `getDataPtr()->clone()`.
MutableColumnPtr getDataInRange(size_t start, size_t length) const;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{
return scatterImpl<ColumnArray>(num_columns, selector);

View File

@ -593,6 +593,7 @@
M(711, FILECACHE_ACCESS_DENIED) \
M(712, TOO_MANY_MATERIALIZED_VIEWS) \
M(713, BROKEN_PROJECTION) \
M(714, UNEXPECTED_CLUSTER) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -2,6 +2,8 @@
#include <memory>
#include <base/defines.h>
#include <Poco/Channel.h>
#include <Poco/Logger.h>
#include <Poco/Message.h>
@ -24,6 +26,16 @@ using LoggerRawPtr = Poco::Logger *;
*/
LoggerPtr getLogger(const std::string & name);
/** Get Logger with specified name. If the Logger does not exists, it is created.
* This overload was added for specific purpose, when logger is constructed from constexpr string.
* Logger is destroyed only during program shutdown.
*/
template <size_t n>
ALWAYS_INLINE LoggerPtr getLogger(const char (&name)[n])
{
return Poco::Logger::getShared(name, false /*should_be_owned_by_shared_ptr_if_created*/);
}
/** Create Logger with specified name, channel and logging level.
* If Logger already exists, throws exception.
* Logger is destroyed, when last shared ptr that refers to Logger with specified name is destroyed.

Some files were not shown because too many files have changed in this diff Show More