mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge remote-tracking branch 'rschu1ze/master' into cgroup-observing-memory-tracker
This commit is contained in:
commit
3a4b6a2379
117
.github/workflows/master.yml
vendored
117
.github/workflows/master.yml
vendored
@ -318,14 +318,18 @@ jobs:
|
||||
run_command: |
|
||||
python3 build_report_check.py "$CHECK_NAME"
|
||||
MarkReleaseReady:
|
||||
needs: [RunConfig, BuilderBinDarwin, BuilderBinDarwinAarch64, BuilderDebRelease, BuilderDebAarch64]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Mark Commit Release Ready
|
||||
runner_type: style-checker
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
run_command: |
|
||||
needs:
|
||||
- BuilderBinDarwin
|
||||
- BuilderBinDarwinAarch64
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Mark Commit Release Ready
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 mark_release_ready.py
|
||||
############################################################################################
|
||||
#################################### INSTALL PACKAGES ######################################
|
||||
@ -385,6 +389,22 @@ jobs:
|
||||
test_name: Stateless tests (release, s3 storage)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatelessTestS3Debug:
|
||||
needs: [RunConfig, BuilderDebDebug]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateless tests (debug, s3 storage)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatelessTestS3Tsan:
|
||||
needs: [RunConfig, BuilderDebTsan]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateless tests (tsan, s3 storage)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatelessTestAarch64:
|
||||
needs: [RunConfig, BuilderDebAarch64]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
@ -493,6 +513,55 @@ jobs:
|
||||
test_name: Stateful tests (debug)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
# Parallel replicas
|
||||
FunctionalStatefulTestDebugParallelReplicas:
|
||||
needs: [RunConfig, BuilderDebDebug]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateful tests (debug, ParallelReplicas)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatefulTestUBsanParallelReplicas:
|
||||
needs: [RunConfig, BuilderDebUBsan]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateful tests (ubsan, ParallelReplicas)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatefulTestMsanParallelReplicas:
|
||||
needs: [RunConfig, BuilderDebMsan]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateful tests (msan, ParallelReplicas)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatefulTestTsanParallelReplicas:
|
||||
needs: [RunConfig, BuilderDebTsan]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateful tests (tsan, ParallelReplicas)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatefulTestAsanParallelReplicas:
|
||||
needs: [RunConfig, BuilderDebAsan]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateful tests (asan, ParallelReplicas)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatefulTestReleaseParallelReplicas:
|
||||
needs: [RunConfig, BuilderDebRelease]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateful tests (release, ParallelReplicas)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
##############################################################################################
|
||||
########################### ClickBench #######################################################
|
||||
##############################################################################################
|
||||
@ -700,6 +769,28 @@ jobs:
|
||||
runner_type: func-tester-aarch64
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
##############################################################################################
|
||||
############################ SQLLOGIC TEST ###################################################
|
||||
##############################################################################################
|
||||
SQLLogicTestRelease:
|
||||
needs: [RunConfig, BuilderDebRelease]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Sqllogic test (release)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
##############################################################################################
|
||||
##################################### SQL TEST ###############################################
|
||||
##############################################################################################
|
||||
SQLTest:
|
||||
needs: [RunConfig, BuilderDebRelease]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: SQLTest
|
||||
runner_type: fuzzer-unit-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
##############################################################################################
|
||||
###################################### SQLANCER FUZZERS ######################################
|
||||
##############################################################################################
|
||||
SQLancerTestRelease:
|
||||
@ -732,6 +823,8 @@ jobs:
|
||||
- FunctionalStatelessTestTsan
|
||||
- FunctionalStatelessTestMsan
|
||||
- FunctionalStatelessTestUBsan
|
||||
- FunctionalStatelessTestS3Debug
|
||||
- FunctionalStatelessTestS3Tsan
|
||||
- FunctionalStatefulTestDebug
|
||||
- FunctionalStatefulTestRelease
|
||||
- FunctionalStatefulTestAarch64
|
||||
@ -739,6 +832,12 @@ jobs:
|
||||
- FunctionalStatefulTestTsan
|
||||
- FunctionalStatefulTestMsan
|
||||
- FunctionalStatefulTestUBsan
|
||||
- FunctionalStatefulTestDebugParallelReplicas
|
||||
- FunctionalStatefulTestUBsanParallelReplicas
|
||||
- FunctionalStatefulTestMsanParallelReplicas
|
||||
- FunctionalStatefulTestTsanParallelReplicas
|
||||
- FunctionalStatefulTestAsanParallelReplicas
|
||||
- FunctionalStatefulTestReleaseParallelReplicas
|
||||
- StressTestDebug
|
||||
- StressTestAsan
|
||||
- StressTestTsan
|
||||
@ -764,6 +863,8 @@ jobs:
|
||||
- UnitTestsReleaseClang
|
||||
- SQLancerTestRelease
|
||||
- SQLancerTestDebug
|
||||
- SQLLogicTestRelease
|
||||
- SQLTest
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
|
2
.github/workflows/pull_request.yml
vendored
2
.github/workflows/pull_request.yml
vendored
@ -1002,7 +1002,7 @@ jobs:
|
||||
####################################### libFuzzer ###########################################
|
||||
#############################################################################################
|
||||
libFuzzer:
|
||||
if: ${{ !failure() && !cancelled() && contains(github.event.pull_request.labels.*.name, 'libFuzzer') }}
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
needs: [RunConfig, StyleCheck]
|
||||
uses: ./.github/workflows/libfuzzer.yml
|
||||
with:
|
||||
|
20
.github/workflows/release_branches.yml
vendored
20
.github/workflows/release_branches.yml
vendored
@ -91,6 +91,8 @@ jobs:
|
||||
build_name: package_release
|
||||
checkout_depth: 0
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
# always rebuild on release branches to be able to publish from any commit
|
||||
force: true
|
||||
BuilderDebAarch64:
|
||||
needs: [RunConfig, BuildDockers]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
@ -99,6 +101,8 @@ jobs:
|
||||
build_name: package_aarch64
|
||||
checkout_depth: 0
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
# always rebuild on release branches to be able to publish from any commit
|
||||
force: true
|
||||
BuilderDebAsan:
|
||||
needs: [RunConfig, BuildDockers]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
@ -142,6 +146,8 @@ jobs:
|
||||
build_name: binary_darwin
|
||||
checkout_depth: 0
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
# always rebuild on release branches to be able to publish from any commit
|
||||
force: true
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [RunConfig, BuildDockers]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
@ -150,6 +156,8 @@ jobs:
|
||||
build_name: binary_darwin_aarch64
|
||||
checkout_depth: 0
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
# always rebuild on release branches to be able to publish from any commit
|
||||
force: true
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
@ -206,13 +214,8 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
needs:
|
||||
- RunConfig
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
- BuilderDebAsan
|
||||
- BuilderDebTsan
|
||||
- BuilderDebUBsan
|
||||
- BuilderDebMsan
|
||||
- BuilderDebDebug
|
||||
- BuilderBinDarwin
|
||||
- BuilderBinDarwinAarch64
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: ClickHouse special build check
|
||||
@ -225,7 +228,6 @@ jobs:
|
||||
run_command: |
|
||||
python3 build_report_check.py "$CHECK_NAME"
|
||||
MarkReleaseReady:
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
needs:
|
||||
- BuilderBinDarwin
|
||||
- BuilderBinDarwinAarch64
|
||||
@ -235,8 +237,6 @@ jobs:
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Mark Commit Release Ready
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
|
10
.github/workflows/reusable_build.yml
vendored
10
.github/workflows/reusable_build.yml
vendored
@ -26,6 +26,10 @@ name: Build ClickHouse
|
||||
description: json ci data
|
||||
type: string
|
||||
required: true
|
||||
force:
|
||||
description: disallow job skipping
|
||||
type: boolean
|
||||
default: false
|
||||
additional_envs:
|
||||
description: additional ENV variables to setup the job
|
||||
type: string
|
||||
@ -33,7 +37,7 @@ name: Build ClickHouse
|
||||
jobs:
|
||||
Build:
|
||||
name: Build-${{inputs.build_name}}
|
||||
if: contains(fromJson(inputs.data).jobs_data.jobs_to_do, inputs.build_name)
|
||||
if: ${{ contains(fromJson(inputs.data).jobs_data.jobs_to_do, inputs.build_name) || inputs.force }}
|
||||
env:
|
||||
GITHUB_JOB_OVERRIDDEN: Build-${{inputs.build_name}}
|
||||
runs-on: [self-hosted, '${{inputs.runner_type}}']
|
||||
@ -78,13 +82,15 @@ jobs:
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" \
|
||||
--infile ${{ toJson(inputs.data) }} \
|
||||
--job-name "$BUILD_NAME" \
|
||||
--run
|
||||
--run \
|
||||
${{ inputs.force && '--force' || '' }}
|
||||
- name: Post
|
||||
# it still be build report to upload for failed build job
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.build_name}}'
|
||||
- name: Mark as done
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --mark-success --job-name '${{inputs.build_name}}'
|
||||
- name: Clean
|
||||
|
1
.github/workflows/reusable_test.yml
vendored
1
.github/workflows/reusable_test.yml
vendored
@ -107,6 +107,7 @@ jobs:
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.test_name}}'
|
||||
- name: Mark as done
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --mark-success --job-name '${{inputs.test_name}}' --batch ${{matrix.batch}}
|
||||
- name: Clean
|
||||
|
@ -254,10 +254,17 @@ endif()
|
||||
|
||||
include(cmake/cpu_features.cmake)
|
||||
|
||||
|
||||
# Query Profiler doesn't work on MacOS for several reasons
|
||||
# - PHDR cache is not available
|
||||
# - We use native functionality to get stacktraces which is not async signal safe
|
||||
# and thus we don't need to generate asynchronous unwind tables
|
||||
if (NOT OS_DARWIN)
|
||||
# Asynchronous unwind tables are needed for Query Profiler.
|
||||
# They are already by default on some platforms but possibly not on all platforms.
|
||||
# Enable it explicitly.
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables")
|
||||
endif()
|
||||
|
||||
# Reproducible builds.
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <cstddef>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "Poco/Channel.h"
|
||||
#include "Poco/Format.h"
|
||||
#include "Poco/Foundation.h"
|
||||
@ -871,21 +872,11 @@ public:
|
||||
/// If the Logger does not yet exist, it is created, based
|
||||
/// on its parent logger.
|
||||
|
||||
static LoggerPtr getShared(const std::string & name);
|
||||
static LoggerPtr getShared(const std::string & name, bool should_be_owned_by_shared_ptr_if_created = true);
|
||||
/// Returns a shared pointer to the Logger with the given name.
|
||||
/// If the Logger does not yet exist, it is created, based
|
||||
/// on its parent logger.
|
||||
|
||||
static Logger & unsafeGet(const std::string & name);
|
||||
/// Returns a reference to the Logger with the given name.
|
||||
/// If the Logger does not yet exist, it is created, based
|
||||
/// on its parent logger.
|
||||
///
|
||||
/// WARNING: This method is not thread safe. You should
|
||||
/// probably use get() instead.
|
||||
/// The only time this method should be used is during
|
||||
/// program initialization, when only one thread is running.
|
||||
|
||||
static Logger & create(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
|
||||
/// Creates and returns a reference to a Logger with the
|
||||
/// given name. The Logger's Channel and log level as set as
|
||||
@ -932,6 +923,16 @@ public:
|
||||
|
||||
static const std::string ROOT; /// The name of the root logger ("").
|
||||
|
||||
public:
|
||||
struct LoggerEntry
|
||||
{
|
||||
Poco::Logger * logger;
|
||||
bool owned_by_shared_ptr = false;
|
||||
};
|
||||
|
||||
using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
|
||||
using LoggerMapIterator = LoggerMap::iterator;
|
||||
|
||||
protected:
|
||||
Logger(const std::string & name, Channel * pChannel, int level);
|
||||
~Logger();
|
||||
@ -940,12 +941,16 @@ protected:
|
||||
void log(const std::string & text, Message::Priority prio, const char * file, int line);
|
||||
|
||||
static std::string format(const std::string & fmt, int argc, std::string argv[]);
|
||||
static Logger & unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
|
||||
static Logger & parent(const std::string & name);
|
||||
static void add(Logger * pLogger);
|
||||
static Logger * find(const std::string & name);
|
||||
|
||||
private:
|
||||
static std::pair<Logger::LoggerMapIterator, bool> unsafeGet(const std::string & name, bool get_shared);
|
||||
static Logger * unsafeGetRawPtr(const std::string & name);
|
||||
static std::pair<LoggerMapIterator, bool> unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
|
||||
static Logger & parent(const std::string & name);
|
||||
static std::pair<LoggerMapIterator, bool> add(Logger * pLogger);
|
||||
static std::optional<LoggerMapIterator> find(const std::string & name);
|
||||
static Logger * findRawPtr(const std::string & name);
|
||||
|
||||
Logger();
|
||||
Logger(const Logger &);
|
||||
Logger & operator=(const Logger &);
|
||||
|
@ -38,14 +38,7 @@ std::mutex & getLoggerMutex()
|
||||
return *logger_mutex;
|
||||
}
|
||||
|
||||
struct LoggerEntry
|
||||
{
|
||||
Poco::Logger * logger;
|
||||
bool owned_by_shared_ptr = false;
|
||||
};
|
||||
|
||||
using LoggerMap = std::unordered_map<std::string, LoggerEntry>;
|
||||
LoggerMap * _pLoggerMap = nullptr;
|
||||
Poco::Logger::LoggerMap * _pLoggerMap = nullptr;
|
||||
|
||||
}
|
||||
|
||||
@ -337,10 +330,12 @@ struct LoggerDeleter
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
inline LoggerPtr makeLoggerPtr(Logger & logger)
|
||||
inline LoggerPtr makeLoggerPtr(Logger & logger, bool owned_by_shared_ptr)
|
||||
{
|
||||
return std::shared_ptr<Logger>(&logger, LoggerDeleter());
|
||||
if (owned_by_shared_ptr)
|
||||
return LoggerPtr(&logger, LoggerDeleter());
|
||||
|
||||
return LoggerPtr(std::shared_ptr<void>{}, &logger);
|
||||
}
|
||||
|
||||
}
|
||||
@ -350,64 +345,67 @@ Logger& Logger::get(const std::string& name)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
Logger & logger = unsafeGet(name);
|
||||
|
||||
/** If there are already shared pointer created for this logger
|
||||
* we need to increment Logger reference count and now logger
|
||||
* is owned by logger infrastructure.
|
||||
*/
|
||||
auto it = _pLoggerMap->find(name);
|
||||
if (it->second.owned_by_shared_ptr)
|
||||
{
|
||||
it->second.logger->duplicate();
|
||||
it->second.owned_by_shared_ptr = false;
|
||||
}
|
||||
|
||||
return logger;
|
||||
auto [it, inserted] = unsafeGet(name, false /*get_shared*/);
|
||||
return *it->second.logger;
|
||||
}
|
||||
|
||||
|
||||
LoggerPtr Logger::getShared(const std::string & name)
|
||||
LoggerPtr Logger::getShared(const std::string & name, bool should_be_owned_by_shared_ptr_if_created)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
bool logger_exists = _pLoggerMap && _pLoggerMap->contains(name);
|
||||
auto [it, inserted] = unsafeGet(name, true /*get_shared*/);
|
||||
|
||||
Logger & logger = unsafeGet(name);
|
||||
|
||||
/** If logger already exists, then this shared pointer does not own it.
|
||||
* If logger does not exists, logger infrastructure could be already destroyed
|
||||
* or logger was created.
|
||||
/** If during `unsafeGet` logger was created, then this shared pointer owns it.
|
||||
* If logger was already created, then this shared pointer does not own it.
|
||||
*/
|
||||
if (logger_exists)
|
||||
{
|
||||
logger.duplicate();
|
||||
}
|
||||
else if (_pLoggerMap)
|
||||
{
|
||||
_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
|
||||
}
|
||||
if (inserted && should_be_owned_by_shared_ptr_if_created)
|
||||
it->second.owned_by_shared_ptr = true;
|
||||
|
||||
return makeLoggerPtr(logger);
|
||||
return makeLoggerPtr(*it->second.logger, it->second.owned_by_shared_ptr);
|
||||
}
|
||||
|
||||
|
||||
Logger& Logger::unsafeGet(const std::string& name)
|
||||
std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeGet(const std::string& name, bool get_shared)
|
||||
{
|
||||
Logger* pLogger = find(name);
|
||||
if (!pLogger)
|
||||
std::optional<Logger::LoggerMapIterator> optional_logger_it = find(name);
|
||||
|
||||
if (optional_logger_it)
|
||||
{
|
||||
auto & logger_it = *optional_logger_it;
|
||||
|
||||
if (logger_it->second.owned_by_shared_ptr)
|
||||
{
|
||||
logger_it->second.logger->duplicate();
|
||||
|
||||
if (!get_shared)
|
||||
logger_it->second.owned_by_shared_ptr = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!optional_logger_it)
|
||||
{
|
||||
Logger * logger = nullptr;
|
||||
|
||||
if (name == ROOT)
|
||||
{
|
||||
pLogger = new Logger(name, 0, Message::PRIO_INFORMATION);
|
||||
logger = new Logger(name, nullptr, Message::PRIO_INFORMATION);
|
||||
}
|
||||
else
|
||||
{
|
||||
Logger& par = parent(name);
|
||||
pLogger = new Logger(name, par.getChannel(), par.getLevel());
|
||||
logger = new Logger(name, par.getChannel(), par.getLevel());
|
||||
}
|
||||
add(pLogger);
|
||||
|
||||
return add(logger);
|
||||
}
|
||||
return *pLogger;
|
||||
|
||||
return std::make_pair(*optional_logger_it, false);
|
||||
}
|
||||
|
||||
|
||||
Logger * Logger::unsafeGetRawPtr(const std::string & name)
|
||||
{
|
||||
return unsafeGet(name, false /*get_shared*/).first->second.logger;
|
||||
}
|
||||
|
||||
|
||||
@ -415,24 +413,24 @@ Logger& Logger::create(const std::string& name, Channel* pChannel, int level)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
return unsafeCreate(name, pChannel, level);
|
||||
return *unsafeCreate(name, pChannel, level).first->second.logger;
|
||||
}
|
||||
|
||||
LoggerPtr Logger::createShared(const std::string & name, Channel * pChannel, int level)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
Logger & logger = unsafeCreate(name, pChannel, level);
|
||||
_pLoggerMap->find(name)->second.owned_by_shared_ptr = true;
|
||||
auto [it, inserted] = unsafeCreate(name, pChannel, level);
|
||||
it->second.owned_by_shared_ptr = true;
|
||||
|
||||
return makeLoggerPtr(logger);
|
||||
return makeLoggerPtr(*it->second.logger, it->second.owned_by_shared_ptr);
|
||||
}
|
||||
|
||||
Logger& Logger::root()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
return unsafeGet(ROOT);
|
||||
return *unsafeGetRawPtr(ROOT);
|
||||
}
|
||||
|
||||
|
||||
@ -440,7 +438,11 @@ Logger* Logger::has(const std::string& name)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
return find(name);
|
||||
auto optional_it = find(name);
|
||||
if (!optional_it)
|
||||
return nullptr;
|
||||
|
||||
return (*optional_it)->second.logger;
|
||||
}
|
||||
|
||||
|
||||
@ -459,20 +461,32 @@ void Logger::shutdown()
|
||||
}
|
||||
|
||||
delete _pLoggerMap;
|
||||
_pLoggerMap = 0;
|
||||
_pLoggerMap = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Logger* Logger::find(const std::string& name)
|
||||
std::optional<Logger::LoggerMapIterator> Logger::find(const std::string& name)
|
||||
{
|
||||
if (_pLoggerMap)
|
||||
{
|
||||
LoggerMap::iterator it = _pLoggerMap->find(name);
|
||||
if (it != _pLoggerMap->end())
|
||||
return it->second.logger;
|
||||
return it;
|
||||
|
||||
return {};
|
||||
}
|
||||
return 0;
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
Logger * Logger::findRawPtr(const std::string & name)
|
||||
{
|
||||
auto optional_it = find(name);
|
||||
if (!optional_it)
|
||||
return nullptr;
|
||||
|
||||
return (*optional_it)->second.logger;
|
||||
}
|
||||
|
||||
|
||||
@ -490,28 +504,28 @@ void Logger::names(std::vector<std::string>& names)
|
||||
}
|
||||
}
|
||||
|
||||
Logger& Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level)
|
||||
|
||||
std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level)
|
||||
{
|
||||
if (find(name)) throw ExistsException();
|
||||
Logger* pLogger = new Logger(name, pChannel, level);
|
||||
add(pLogger);
|
||||
|
||||
return *pLogger;
|
||||
return add(pLogger);
|
||||
}
|
||||
|
||||
|
||||
Logger& Logger::parent(const std::string& name)
|
||||
{
|
||||
std::string::size_type pos = name.rfind('.');
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
std::string pname = name.substr(0, pos);
|
||||
Logger* pParent = find(pname);
|
||||
Logger* pParent = findRawPtr(pname);
|
||||
if (pParent)
|
||||
return *pParent;
|
||||
else
|
||||
return parent(pname);
|
||||
}
|
||||
else return unsafeGet(ROOT);
|
||||
else return *unsafeGetRawPtr(ROOT);
|
||||
}
|
||||
|
||||
|
||||
@ -579,12 +593,14 @@ namespace
|
||||
}
|
||||
|
||||
|
||||
void Logger::add(Logger* pLogger)
|
||||
std::pair<Logger::LoggerMapIterator, bool> Logger::add(Logger* pLogger)
|
||||
{
|
||||
if (!_pLoggerMap)
|
||||
_pLoggerMap = new LoggerMap;
|
||||
_pLoggerMap = new Logger::LoggerMap;
|
||||
|
||||
_pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
|
||||
auto result = _pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/});
|
||||
assert(result.second);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,6 +1,10 @@
|
||||
#include <libunwind.h>
|
||||
|
||||
/// On MacOS this function will be replaced with a dynamic symbol
|
||||
/// from the system library.
|
||||
#if !defined(OS_DARWIN)
|
||||
int backtrace(void ** buffer, int size)
|
||||
{
|
||||
return unw_backtrace(buffer, size);
|
||||
}
|
||||
#endif
|
||||
|
2
contrib/libuv
vendored
2
contrib/libuv
vendored
@ -1 +1 @@
|
||||
Subproject commit 3a85b2eb3d83f369b8a8cafd329d7e9dc28f60cf
|
||||
Subproject commit 4482964660c77eec1166cd7d14fb915e3dbd774a
|
2
contrib/llvm-project
vendored
2
contrib/llvm-project
vendored
@ -1 +1 @@
|
||||
Subproject commit 2568a7cd1297c7c3044b0f3cc0c23a6f6444d856
|
||||
Subproject commit d2142eed98046a47ff7112e3cc1e197c8a5cd80f
|
@ -451,3 +451,24 @@ To disallow concurrent backup/restore, you can use these settings respectively.
|
||||
|
||||
The default value for both is true, so by default concurrent backup/restores are allowed.
|
||||
When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time.
|
||||
|
||||
## Configuring BACKUP/RESTORE to use an AzureBlobStorage Endpoint
|
||||
|
||||
To write backups to an AzureBlobStorage container you need the following pieces of information:
|
||||
- AzureBlobStorage endpoint connection string / url,
|
||||
- Container,
|
||||
- Path,
|
||||
- Account name (if url is specified)
|
||||
- Account Key (if url is specified)
|
||||
|
||||
The destination for a backup will be specified like this:
|
||||
```
|
||||
AzureBlobStorage('<connection string>/<url>', '<container>', '<path>', '<account name>', '<account key>')
|
||||
```
|
||||
|
||||
```sql
|
||||
BACKUP TABLE data TO AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
|
||||
'test_container', 'data_backup');
|
||||
RESTORE TABLE data AS data_restored FROM AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
|
||||
'test_container', 'data_backup');
|
||||
```
|
||||
|
@ -28,6 +28,8 @@ The maximum amount of RAM to use for running a query on a single server.
|
||||
|
||||
The default setting is unlimited (set to `0`).
|
||||
|
||||
Cloud default value: depends on the amount of RAM on the replica.
|
||||
|
||||
The setting does not consider the volume of available memory or the total volume of memory on the machine.
|
||||
The restriction applies to a single query within a single server.
|
||||
You can use `SHOW PROCESSLIST` to see the current memory consumption for each query.
|
||||
@ -104,7 +106,9 @@ Possible values:
|
||||
- Maximum volume of RAM (in bytes) that can be used by the single [GROUP BY](../../sql-reference/statements/select/group-by.md#select-group-by-clause) operation.
|
||||
- 0 — `GROUP BY` in external memory disabled.
|
||||
|
||||
Default value: 0.
|
||||
Default value: `0`.
|
||||
|
||||
Cloud default value: half the memory amount per replica.
|
||||
|
||||
## max_bytes_before_external_sort {#settings-max_bytes_before_external_sort}
|
||||
|
||||
@ -115,6 +119,8 @@ Enables or disables execution of `ORDER BY` clauses in external memory. See [ORD
|
||||
|
||||
Default value: 0.
|
||||
|
||||
Cloud default value: half the memory amount per replica.
|
||||
|
||||
## max_rows_to_sort {#max-rows-to-sort}
|
||||
|
||||
A maximum number of rows before sorting. This allows you to limit memory consumption when sorting.
|
||||
@ -129,7 +135,11 @@ What to do if the number of rows received before sorting exceeds one of the limi
|
||||
|
||||
## max_result_rows {#setting-max_result_rows}
|
||||
|
||||
Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query.
|
||||
Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query. No limit is applied when value is `0`.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
Cloud default value: `0`.
|
||||
|
||||
## max_result_bytes {#max-result-bytes}
|
||||
|
||||
@ -137,10 +147,14 @@ Limit on the number of bytes in the result. The same as the previous setting.
|
||||
|
||||
## result_overflow_mode {#result-overflow-mode}
|
||||
|
||||
What to do if the volume of the result exceeds one of the limits: ‘throw’ or ‘break’. By default, throw.
|
||||
What to do if the volume of the result exceeds one of the limits: ‘throw’ or ‘break’.
|
||||
|
||||
Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#max_threads).
|
||||
|
||||
Default value: `throw`.
|
||||
|
||||
Cloud default value: `throw`.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
|
@ -212,6 +212,8 @@ Possible values:
|
||||
|
||||
Default value: `'basic'`.
|
||||
|
||||
Cloud default value: `'best_effort'`.
|
||||
|
||||
See also:
|
||||
|
||||
- [DateTime data type.](../../sql-reference/data-types/datetime.md)
|
||||
|
@ -508,7 +508,9 @@ Possible values:
|
||||
- Any positive integer number of hops.
|
||||
- 0 — No hops allowed.
|
||||
|
||||
Default value: 0.
|
||||
Default value: `0`.
|
||||
|
||||
Cloud default value: `10`.
|
||||
|
||||
## insert_null_as_default {#insert_null_as_default}
|
||||
|
||||
@ -1126,7 +1128,9 @@ Possible values:
|
||||
- 0 (or 1) — `INSERT SELECT` no parallel execution.
|
||||
- Positive integer. Bigger than 1.
|
||||
|
||||
Default value: 0.
|
||||
Default value: `0`.
|
||||
|
||||
Cloud default value: from `2` to `4`, depending on the service size.
|
||||
|
||||
Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#max_threads) setting.
|
||||
Higher values will lead to higher memory usage.
|
||||
@ -1207,7 +1211,9 @@ Default value: 10000.
|
||||
|
||||
Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response.
|
||||
|
||||
Default value: 0
|
||||
Default value: `0`.
|
||||
|
||||
Cloud default value: `1`.
|
||||
|
||||
## poll_interval {#poll-interval}
|
||||
|
||||
@ -1946,6 +1952,8 @@ Possible values:
|
||||
|
||||
Default value: `200`.
|
||||
|
||||
Cloud default value: `1000`.
|
||||
|
||||
### async_insert_poll_timeout_ms {#async-insert-poll-timeout-ms}
|
||||
|
||||
Timeout in milliseconds for polling data from asynchronous insert queue.
|
||||
@ -2130,7 +2138,9 @@ Possible values:
|
||||
- Positive integer.
|
||||
- 0 — Retries are disabled
|
||||
|
||||
Default value: 0
|
||||
Default value: 20
|
||||
|
||||
Cloud default value: `20`.
|
||||
|
||||
Keeper request retries are done after some timeout. The timeout is controlled by the following settings: `insert_keeper_retry_initial_backoff_ms`, `insert_keeper_retry_max_backoff_ms`.
|
||||
The first retry is done after `insert_keeper_retry_initial_backoff_ms` timeout. The consequent timeouts will be calculated as follows:
|
||||
@ -2660,6 +2670,8 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
Default value: 1000000000 nanoseconds (once a second).
|
||||
|
||||
**Temporarily disabled in ClickHouse Cloud.**
|
||||
|
||||
See also:
|
||||
|
||||
- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
|
||||
@ -2683,6 +2695,8 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
Default value: 1000000000 nanoseconds.
|
||||
|
||||
**Temporarily disabled in ClickHouse Cloud.**
|
||||
|
||||
See also:
|
||||
|
||||
- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
|
||||
@ -2804,6 +2818,8 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
Cloud default value: `1`.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed)
|
||||
@ -3319,7 +3335,9 @@ Possible values:
|
||||
|
||||
- a string representing any valid table engine name
|
||||
|
||||
Default value: `None`
|
||||
Default value: `MergeTree`.
|
||||
|
||||
Cloud default value: `SharedMergeTree`.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -3895,6 +3913,8 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
Cloud default value: `1`.
|
||||
|
||||
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
|
||||
|
||||
Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.
|
||||
@ -3933,6 +3953,8 @@ Possible values:
|
||||
|
||||
Default value: `throw`.
|
||||
|
||||
Cloud default value: `none`.
|
||||
|
||||
## flatten_nested {#flatten-nested}
|
||||
|
||||
Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns.
|
||||
@ -4068,6 +4090,8 @@ Possible values:
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
Cloud default value: `0`.
|
||||
|
||||
:::note
|
||||
`alter_sync` is applicable to `Replicated` tables only, it does nothing to alters of not `Replicated` tables.
|
||||
:::
|
||||
@ -4723,6 +4747,8 @@ other connections are cancelled. Queries with `max_parallel_replicas > 1` are su
|
||||
|
||||
Enabled by default.
|
||||
|
||||
Disabled by default on Cloud.
|
||||
|
||||
## hedged_connection_timeout {#hedged_connection_timeout}
|
||||
|
||||
If we can't establish connection with replica after this timeout in hedged requests, we start working with the next replica without cancelling connection to the previous.
|
||||
@ -5348,10 +5374,11 @@ Default value: `false`.
|
||||
|
||||
## max_partition_size_to_drop
|
||||
|
||||
Restriction on dropping partitions in query time.
|
||||
Restriction on dropping partitions in query time. The value 0 means that you can drop partitions without any restrictions.
|
||||
|
||||
Default value: 50 GB.
|
||||
The value 0 means that you can drop partitions without any restrictions.
|
||||
|
||||
Cloud default value: 1 TB.
|
||||
|
||||
:::note
|
||||
This query setting overwrites its server setting equivalent, see [max_partition_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-partition-size-to-drop)
|
||||
@ -5359,10 +5386,11 @@ This query setting overwrites its server setting equivalent, see [max_partition_
|
||||
|
||||
## max_table_size_to_drop
|
||||
|
||||
Restriction on deleting tables in query time.
|
||||
Restriction on deleting tables in query time. The value 0 means that you can delete all tables without any restrictions.
|
||||
|
||||
Default value: 50 GB.
|
||||
The value 0 means that you can delete all tables without any restrictions.
|
||||
|
||||
Cloud default value: 1 TB.
|
||||
|
||||
:::note
|
||||
This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop)
|
||||
|
@ -206,7 +206,7 @@ Some of these settings will disable cache features per query/profile that are en
|
||||
|
||||
- `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` - allows to use cache in query only if it already exists, otherwise query data will not be written to local cache storage. Default: `false`.
|
||||
|
||||
- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. Default: `false`.
|
||||
- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. Default: `false`. Cloud default value: `true`.
|
||||
|
||||
- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. It can be turn on for specific queries or enabled in a profile. Default: `false`.
|
||||
|
||||
|
@ -20,6 +20,6 @@ sidebar_position: 11
|
||||
|
||||
Если вы не видели наших футболок, посмотрите видео о ClickHouse. Например, вот это:
|
||||
|
||||
![iframe](https://www.youtube.com/embed/bSyQahMVZ7w)
|
||||
<iframe width="675" height="380" src="https://www.youtube.com/embed/bSyQahMVZ7w" frameborder="0" allow="accelerometer; autoplay; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
|
||||
P.S. Эти футболки не продаются, а распространяются бесплатно на большинстве митапов [ClickHouse](https://clickhouse.com/#meet), обычно в награду за самые интересные вопросы или другие виды активного участия.
|
||||
|
@ -19,7 +19,7 @@ slug: /ru/operations/system-tables/grants
|
||||
- `column` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Имя столбца, к которому предоставляется доступ.
|
||||
|
||||
- `is_partial_revoke` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Логическое значение. Показывает, были ли отменены некоторые привилегии. Возможные значения:
|
||||
- `0` — Строка описывает частичный отзыв.
|
||||
- `1` — Строка описывает грант.
|
||||
- `0` — Строка описывает грант.
|
||||
- `1` — Строка описывает частичный отзыв.
|
||||
|
||||
- `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Разрешение предоставлено с опцией `WITH GRANT OPTION`, подробнее см. [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax).
|
||||
|
@ -527,10 +527,11 @@ let queries = [];
|
||||
|
||||
/// Query parameters with predefined default values.
|
||||
/// All other parameters will be automatically found in the queries.
|
||||
let params = {
|
||||
let default_params = {
|
||||
'rounding': '60',
|
||||
'seconds': '86400'
|
||||
};
|
||||
let params = default_params;
|
||||
|
||||
/// Palette generation for charts
|
||||
function generatePalette(baseColor, numColors) {
|
||||
@ -594,13 +595,19 @@ let plots = [];
|
||||
let charts = document.getElementById('charts');
|
||||
|
||||
/// This is not quite correct (we cannot really parse SQL with regexp) but tolerable.
|
||||
const query_param_regexp = /\{(\w+):[^}]+\}/g;
|
||||
const query_param_regexp = /\{(\w+):([^}]+)\}/g;
|
||||
|
||||
/// Automatically parse more parameters from the queries.
|
||||
function findParamsInQuery(query, new_params) {
|
||||
const typeDefault = (type) => type.includes('Int') ? '0'
|
||||
: (type.includes('Float') ? '0.0'
|
||||
: (type.includes('Bool') ? 'false'
|
||||
: (type.includes('Date') ? new Date().toISOString().slice(0, 10)
|
||||
: (type.includes('UUID') ? '00000000-0000-0000-0000-000000000000'
|
||||
: ''))));
|
||||
for (let match of query.matchAll(query_param_regexp)) {
|
||||
const name = match[1];
|
||||
new_params[name] = params[name] || '';
|
||||
new_params[name] = params[name] || default_params[name] || typeDefault(match[2]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -404,12 +404,12 @@ private:
|
||||
|
||||
auto operand_type = and_operands[0]->getResultType();
|
||||
auto function_type = function_node.getResultType();
|
||||
assert(!function_type->isNullable());
|
||||
chassert(!function_type->isNullable());
|
||||
if (!function_type->equals(*operand_type))
|
||||
{
|
||||
/// Result of equality operator can be low cardinality, while AND always returns UInt8.
|
||||
/// In that case we replace `(lc = 1) AND (lc = 1)` with `(lc = 1) AS UInt8`
|
||||
assert(function_type->equals(*removeLowCardinality(operand_type)));
|
||||
chassert(function_type->equals(*removeLowCardinality(operand_type)));
|
||||
node = createCastFunction(std::move(and_operands[0]), function_type, getContext());
|
||||
}
|
||||
else
|
||||
@ -427,7 +427,7 @@ private:
|
||||
void tryReplaceOrEqualsChainWithIn(QueryTreeNodePtr & node)
|
||||
{
|
||||
auto & function_node = node->as<FunctionNode &>();
|
||||
assert(function_node.getFunctionName() == "or");
|
||||
chassert(function_node.getFunctionName() == "or");
|
||||
|
||||
QueryTreeNodes or_operands;
|
||||
|
||||
@ -486,7 +486,7 @@ private:
|
||||
/// first we create tuple from RHS of equals functions
|
||||
for (const auto & equals : equals_functions)
|
||||
{
|
||||
is_any_nullable |= equals->getResultType()->isNullable();
|
||||
is_any_nullable |= removeLowCardinality(equals->getResultType())->isNullable();
|
||||
|
||||
const auto * equals_function = equals->as<FunctionNode>();
|
||||
assert(equals_function && equals_function->getFunctionName() == "equals");
|
||||
|
@ -33,11 +33,13 @@ void BackupFactory::registerBackupEngine(const String & engine_name, const Creat
|
||||
|
||||
void registerBackupEnginesFileAndDisk(BackupFactory &);
|
||||
void registerBackupEngineS3(BackupFactory &);
|
||||
void registerBackupEngineAzureBlobStorage(BackupFactory &);
|
||||
|
||||
void registerBackupEngines(BackupFactory & factory)
|
||||
{
|
||||
registerBackupEnginesFileAndDisk(factory);
|
||||
registerBackupEngineS3(factory);
|
||||
registerBackupEngineAzureBlobStorage(factory);
|
||||
}
|
||||
|
||||
BackupFactory::BackupFactory()
|
||||
|
320
src/Backups/BackupIO_AzureBlobStorage.cpp
Normal file
320
src/Backups/BackupIO_AzureBlobStorage.cpp
Normal file
@ -0,0 +1,320 @@
|
||||
#include <Backups/BackupIO_AzureBlobStorage.h>
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
#include <Common/quoteString.h>
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/SharedThreadPools.h>
|
||||
#include <IO/HTTPHeaderEntries.h>
|
||||
#include <Storages/StorageAzureBlobCluster.h>
|
||||
#include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
|
||||
#include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
|
||||
#include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Disks/DiskType.h>
|
||||
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int AZURE_BLOB_STORAGE_ERROR;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
|
||||
StorageAzureBlob::Configuration configuration_,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
const ContextPtr & context_)
|
||||
: BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage"))
|
||||
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
|
||||
, configuration(configuration_)
|
||||
{
|
||||
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
|
||||
object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
|
||||
std::move(client_ptr),
|
||||
StorageAzureBlob::createSettings(context_),
|
||||
configuration_.container);
|
||||
client = object_storage->getAzureBlobStorageClient();
|
||||
settings = object_storage->getSettings();
|
||||
}
|
||||
|
||||
BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default;
|
||||
|
||||
bool BackupReaderAzureBlobStorage::fileExists(const String & file_name)
|
||||
{
|
||||
String key;
|
||||
if (startsWith(file_name, "."))
|
||||
{
|
||||
key= configuration.blob_path + file_name;
|
||||
}
|
||||
else
|
||||
{
|
||||
key = file_name;
|
||||
}
|
||||
return object_storage->exists(StoredObject(key));
|
||||
}
|
||||
|
||||
UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name)
|
||||
{
|
||||
String key;
|
||||
if (startsWith(file_name, "."))
|
||||
{
|
||||
key= configuration.blob_path + file_name;
|
||||
}
|
||||
else
|
||||
{
|
||||
key = file_name;
|
||||
}
|
||||
ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
|
||||
return object_metadata.size_bytes;
|
||||
}
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const String & file_name)
|
||||
{
|
||||
String key;
|
||||
if (startsWith(file_name, "."))
|
||||
{
|
||||
key= configuration.blob_path + file_name;
|
||||
}
|
||||
else
|
||||
{
|
||||
key = file_name;
|
||||
}
|
||||
return std::make_unique<ReadBufferFromAzureBlobStorage>(
|
||||
client, key, read_settings, settings->max_single_read_retries,
|
||||
settings->max_single_download_retries);
|
||||
}
|
||||
|
||||
void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
|
||||
{
|
||||
auto destination_data_source_description = destination_disk->getDataSourceDescription();
|
||||
if ((destination_data_source_description.type == DataSourceType::ObjectStorage)
|
||||
&& (destination_data_source_description.object_storage_type == ObjectStorageType::Azure)
|
||||
&& (destination_data_source_description.is_encrypted == encrypted_in_backup))
|
||||
{
|
||||
LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName());
|
||||
auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> &) -> size_t
|
||||
{
|
||||
/// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
|
||||
if (blob_path.size() != 2 || mode != WriteMode::Rewrite)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Blob writing function called with unexpected blob_path.size={} or mode={}",
|
||||
blob_path.size(), mode);
|
||||
|
||||
copyAzureBlobStorageFile(
|
||||
client,
|
||||
destination_disk->getObjectStorage()->getAzureBlobStorageClient(),
|
||||
configuration.container,
|
||||
fs::path(configuration.blob_path) / path_in_backup,
|
||||
0,
|
||||
file_size,
|
||||
/* dest_container */ blob_path[1],
|
||||
/* dest_path */ blob_path[0],
|
||||
settings,
|
||||
read_settings,
|
||||
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupRDAzure"),
|
||||
/* for_disk_azure_blob_storage= */ true);
|
||||
|
||||
return file_size;
|
||||
};
|
||||
|
||||
destination_disk->writeFileUsingBlobWritingFunction(destination_path, write_mode, write_blob_function);
|
||||
return; /// copied!
|
||||
}
|
||||
|
||||
/// Fallback to copy through buffers.
|
||||
BackupReaderDefault::copyFileToDisk(path_in_backup, file_size, encrypted_in_backup, destination_disk, destination_path, write_mode);
|
||||
}
|
||||
|
||||
|
||||
BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
|
||||
StorageAzureBlob::Configuration configuration_,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
const ContextPtr & context_)
|
||||
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage"))
|
||||
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
|
||||
, configuration(configuration_)
|
||||
{
|
||||
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
|
||||
object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
|
||||
std::move(client_ptr),
|
||||
StorageAzureBlob::createSettings(context_),
|
||||
configuration_.container);
|
||||
client = object_storage->getAzureBlobStorageClient();
|
||||
settings = object_storage->getSettings();
|
||||
}
|
||||
|
||||
void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length)
|
||||
{
|
||||
/// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible.
|
||||
auto source_data_source_description = src_disk->getDataSourceDescription();
|
||||
if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted))
|
||||
{
|
||||
/// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container.
|
||||
/// In this case we can't use the native copy.
|
||||
if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2)
|
||||
{
|
||||
LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName());
|
||||
copyAzureBlobStorageFile(
|
||||
src_disk->getObjectStorage()->getAzureBlobStorageClient(),
|
||||
client,
|
||||
/* src_container */ blob_path[1],
|
||||
/* src_path */ blob_path[0],
|
||||
start_pos,
|
||||
length,
|
||||
configuration.container,
|
||||
fs::path(configuration.blob_path) / path_in_backup,
|
||||
settings,
|
||||
read_settings,
|
||||
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
|
||||
return; /// copied!
|
||||
}
|
||||
}
|
||||
|
||||
/// Fallback to copy through buffers.
|
||||
BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
|
||||
}
|
||||
|
||||
void BackupWriterAzureBlobStorage::copyFile(const String & destination, const String & source, size_t size)
|
||||
{
|
||||
LOG_TRACE(log, "Copying file inside backup from {} to {} ", source, destination);
|
||||
copyAzureBlobStorageFile(
|
||||
client,
|
||||
client,
|
||||
configuration.container,
|
||||
fs::path(source),
|
||||
0,
|
||||
size,
|
||||
/* dest_container */ configuration.container,
|
||||
/* dest_path */ destination,
|
||||
settings,
|
||||
read_settings,
|
||||
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"),
|
||||
/* for_disk_azure_blob_storage= */ true);
|
||||
}
|
||||
|
||||
void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
|
||||
{
|
||||
copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings,
|
||||
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
|
||||
}
|
||||
|
||||
BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
|
||||
|
||||
bool BackupWriterAzureBlobStorage::fileExists(const String & file_name)
|
||||
{
|
||||
String key;
|
||||
if (startsWith(file_name, "."))
|
||||
{
|
||||
key= configuration.blob_path + file_name;
|
||||
}
|
||||
else
|
||||
{
|
||||
key = file_name;
|
||||
}
|
||||
return object_storage->exists(StoredObject(key));
|
||||
}
|
||||
|
||||
UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
|
||||
{
|
||||
String key;
|
||||
if (startsWith(file_name, "."))
|
||||
{
|
||||
key= configuration.blob_path + file_name;
|
||||
}
|
||||
else
|
||||
{
|
||||
key = file_name;
|
||||
}
|
||||
RelativePathsWithMetadata children;
|
||||
object_storage->listObjects(key,children,/*max_keys*/0);
|
||||
if (children.empty())
|
||||
throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist");
|
||||
return children[0].metadata.size_bytes;
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)
|
||||
{
|
||||
String key;
|
||||
if (startsWith(file_name, "."))
|
||||
{
|
||||
key= configuration.blob_path + file_name;
|
||||
}
|
||||
else
|
||||
{
|
||||
key = file_name;
|
||||
}
|
||||
|
||||
return std::make_unique<ReadBufferFromAzureBlobStorage>(
|
||||
client, key, read_settings, settings->max_single_read_retries,
|
||||
settings->max_single_download_retries);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const String & file_name)
|
||||
{
|
||||
String key;
|
||||
if (startsWith(file_name, "."))
|
||||
{
|
||||
key= configuration.blob_path + file_name;
|
||||
}
|
||||
else
|
||||
{
|
||||
key = file_name;
|
||||
}
|
||||
return std::make_unique<WriteBufferFromAzureBlobStorage>(
|
||||
client,
|
||||
key,
|
||||
settings->max_single_part_upload_size,
|
||||
settings->max_unexpected_write_error_retries,
|
||||
DBMS_DEFAULT_BUFFER_SIZE,
|
||||
write_settings);
|
||||
}
|
||||
|
||||
void BackupWriterAzureBlobStorage::removeFile(const String & file_name)
|
||||
{
|
||||
String key;
|
||||
if (startsWith(file_name, "."))
|
||||
{
|
||||
key= configuration.blob_path + file_name;
|
||||
}
|
||||
else
|
||||
{
|
||||
key = file_name;
|
||||
}
|
||||
StoredObject object(key);
|
||||
object_storage->removeObjectIfExists(object);
|
||||
}
|
||||
|
||||
void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names)
|
||||
{
|
||||
StoredObjects objects;
|
||||
for (const auto & file_name : file_names)
|
||||
objects.emplace_back(file_name);
|
||||
|
||||
object_storage->removeObjectsIfExist(objects);
|
||||
|
||||
}
|
||||
|
||||
void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names)
|
||||
{
|
||||
StoredObjects objects;
|
||||
for (const auto & file_name : file_names)
|
||||
objects.emplace_back(file_name);
|
||||
|
||||
object_storage->removeObjectsIfExist(objects);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
68
src/Backups/BackupIO_AzureBlobStorage.h
Normal file
68
src/Backups/BackupIO_AzureBlobStorage.h
Normal file
@ -0,0 +1,68 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
#include <Backups/BackupIO_Default.h>
|
||||
#include <Disks/DiskType.h>
|
||||
#include <Storages/StorageAzureBlobCluster.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Represents a backup stored to Azure
|
||||
class BackupReaderAzureBlobStorage : public BackupReaderDefault
|
||||
{
|
||||
public:
|
||||
BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
|
||||
~BackupReaderAzureBlobStorage() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
|
||||
|
||||
void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
|
||||
|
||||
private:
|
||||
const DataSourceDescription data_source_description;
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
|
||||
StorageAzureBlob::Configuration configuration;
|
||||
std::unique_ptr<AzureObjectStorage> object_storage;
|
||||
std::shared_ptr<const AzureObjectStorageSettings> settings;
|
||||
};
|
||||
|
||||
class BackupWriterAzureBlobStorage : public BackupWriterDefault
|
||||
{
|
||||
public:
|
||||
BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
|
||||
~BackupWriterAzureBlobStorage() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
|
||||
|
||||
void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
|
||||
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
|
||||
|
||||
void copyFile(const String & destination, const String & source, size_t size) override;
|
||||
|
||||
void removeFile(const String & file_name) override;
|
||||
void removeFiles(const Strings & file_names) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
|
||||
void removeFilesBatch(const Strings & file_names);
|
||||
const DataSourceDescription data_source_description;
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
|
||||
StorageAzureBlob::Configuration configuration;
|
||||
std::unique_ptr<AzureObjectStorage> object_storage;
|
||||
std::shared_ptr<const AzureObjectStorageSettings> settings;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -939,12 +939,12 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
|
||||
}
|
||||
else if (src_disk && from_immutable_file)
|
||||
{
|
||||
LOG_TRACE(log, "Writing backup for file {} from {} (disk {}): data file #{}", info.data_file_name, src_file_desc, src_disk->getName(), info.data_file_index);
|
||||
LOG_INFO(log, "Writing backup for file {} from {} (disk {}): data file #{}", info.data_file_name, src_file_desc, src_disk->getName(), info.data_file_index);
|
||||
writer->copyFileFromDisk(info.data_file_name, src_disk, src_file_path, info.encrypted_by_disk, info.base_size, info.size - info.base_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, src_file_desc, info.data_file_index);
|
||||
LOG_INFO(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, src_file_desc, info.data_file_index);
|
||||
auto create_read_buffer = [entry, read_settings = writer->getReadSettings()] { return entry->getReadBuffer(read_settings); };
|
||||
writer->copyDataToFile(info.data_file_name, create_read_buffer, info.base_size, info.size - info.base_size);
|
||||
}
|
||||
|
172
src/Backups/registerBackupEngineAzureBlobStorage.cpp
Normal file
172
src/Backups/registerBackupEngineAzureBlobStorage.cpp
Normal file
@ -0,0 +1,172 @@
|
||||
#include "config.h"
|
||||
|
||||
#include <Backups/BackupFactory.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
#include <Backups/BackupIO_AzureBlobStorage.h>
|
||||
#include <Storages/StorageAzureBlob.h>
|
||||
#include <Backups/BackupImpl.h>
|
||||
#include <IO/Archives/hasRegisteredArchiveFileExtension.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <filesystem>
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
namespace
|
||||
{
|
||||
String removeFileNameFromURL(String & url)
|
||||
{
|
||||
Poco::URI url2{url};
|
||||
String path = url2.getPath();
|
||||
size_t slash_pos = path.find_last_of('/');
|
||||
String file_name = path.substr(slash_pos + 1);
|
||||
path.resize(slash_pos + 1);
|
||||
url2.setPath(path);
|
||||
url = url2.toString();
|
||||
return file_name;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
|
||||
{
|
||||
auto creator_fn = []([[maybe_unused]] const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
|
||||
{
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
const String & id_arg = params.backup_info.id_arg;
|
||||
const auto & args = params.backup_info.args;
|
||||
|
||||
StorageAzureBlob::Configuration configuration;
|
||||
|
||||
if (!id_arg.empty())
|
||||
{
|
||||
const auto & config = params.context->getConfigRef();
|
||||
auto config_prefix = "named_collections." + id_arg;
|
||||
|
||||
if (!config.has(config_prefix))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg);
|
||||
|
||||
if (config.has(config_prefix + ".connection_string"))
|
||||
{
|
||||
configuration.connection_url = config.getString(config_prefix + ".connection_string");
|
||||
configuration.is_connection_string = true;
|
||||
configuration.container = config.getString(config_prefix + ".container");
|
||||
}
|
||||
else
|
||||
{
|
||||
configuration.connection_url = config.getString(config_prefix + ".storage_account_url");
|
||||
configuration.is_connection_string = false;
|
||||
configuration.container = config.getString(config_prefix + ".container");
|
||||
configuration.account_name = config.getString(config_prefix + ".account_name");
|
||||
configuration.account_key = config.getString(config_prefix + ".account_key");
|
||||
}
|
||||
|
||||
if (args.size() > 1)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]");
|
||||
|
||||
if (args.size() == 1)
|
||||
configuration.blob_path = args[0].safeGet<String>();
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
if (args.size() == 3)
|
||||
{
|
||||
configuration.connection_url = args[0].safeGet<String>();
|
||||
configuration.is_connection_string = true;
|
||||
|
||||
configuration.container = args[1].safeGet<String>();
|
||||
configuration.blob_path = args[2].safeGet<String>();
|
||||
}
|
||||
else if (args.size() == 5)
|
||||
{
|
||||
configuration.connection_url = args[0].safeGet<String>();
|
||||
configuration.is_connection_string = false;
|
||||
|
||||
configuration.container = args[1].safeGet<String>();
|
||||
configuration.blob_path = args[2].safeGet<String>();
|
||||
configuration.account_name = args[3].safeGet<String>();
|
||||
configuration.account_key = args[4].safeGet<String>();
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Backup AzureBlobStorage requires 3 or 5 arguments: connection string>/<url, container, path, [account name], [account key]");
|
||||
}
|
||||
}
|
||||
|
||||
BackupImpl::ArchiveParams archive_params;
|
||||
if (hasRegisteredArchiveFileExtension(configuration.blob_path))
|
||||
{
|
||||
if (params.is_internal_backup)
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled");
|
||||
|
||||
archive_params.archive_name = removeFileNameFromURL(configuration.blob_path);
|
||||
archive_params.compression_method = params.compression_method;
|
||||
archive_params.compression_level = params.compression_level;
|
||||
archive_params.password = params.password;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!params.password.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Password is not applicable, backup cannot be encrypted");
|
||||
}
|
||||
|
||||
|
||||
if (params.open_mode == IBackup::OpenMode::READ)
|
||||
{
|
||||
auto reader = std::make_shared<BackupReaderAzureBlobStorage>(configuration,
|
||||
params.read_settings,
|
||||
params.write_settings,
|
||||
params.context);
|
||||
|
||||
return std::make_unique<BackupImpl>(
|
||||
params.backup_info,
|
||||
archive_params,
|
||||
params.base_backup_info,
|
||||
reader,
|
||||
params.context,
|
||||
/* use_same_s3_credentials_for_base_backup*/ false);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto writer = std::make_shared<BackupWriterAzureBlobStorage>(configuration,
|
||||
params.read_settings,
|
||||
params.write_settings,
|
||||
params.context);
|
||||
|
||||
return std::make_unique<BackupImpl>(
|
||||
params.backup_info,
|
||||
archive_params,
|
||||
params.base_backup_info,
|
||||
writer,
|
||||
params.context,
|
||||
params.is_internal_backup,
|
||||
params.backup_coordination,
|
||||
params.backup_uuid,
|
||||
params.deduplicate_files,
|
||||
/* use_same_s3_credentials_for_base_backup */ false);
|
||||
}
|
||||
#else
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "AzureBlobStorage support is disabled");
|
||||
#endif
|
||||
};
|
||||
|
||||
factory.registerBackupEngine("AzureBlobStorage", creator_fn);
|
||||
}
|
||||
|
||||
}
|
@ -89,6 +89,7 @@ add_headers_and_sources(clickhouse_common_io Common/SSH)
|
||||
add_headers_and_sources(clickhouse_common_io IO)
|
||||
add_headers_and_sources(clickhouse_common_io IO/Archives)
|
||||
add_headers_and_sources(clickhouse_common_io IO/S3)
|
||||
add_headers_and_sources(clickhouse_common_io IO/AzureBlobStorage)
|
||||
list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
|
||||
|
||||
|
||||
@ -141,6 +142,7 @@ endif()
|
||||
|
||||
if (TARGET ch_contrib::azure_sdk)
|
||||
add_headers_and_sources(dbms Disks/ObjectStorages/AzureBlobStorage)
|
||||
add_headers_and_sources(dbms IO/AzureBlobStorage)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::hdfs)
|
||||
@ -496,6 +498,7 @@ if (TARGET ch_contrib::aws_s3)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::azure_sdk)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::azure_sdk)
|
||||
dbms_target_link_libraries (PRIVATE ch_contrib::azure_sdk)
|
||||
endif()
|
||||
|
||||
|
@ -19,7 +19,6 @@
|
||||
|
||||
#include <Storages/MergeTree/RequestResponse.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <optional>
|
||||
|
||||
#include "config.h"
|
||||
|
@ -27,6 +27,9 @@ class IConnectionPool : private boost::noncopyable
|
||||
public:
|
||||
using Entry = PoolBase<Connection>::Entry;
|
||||
|
||||
IConnectionPool() = default;
|
||||
IConnectionPool(String host_, UInt16 port_) : host(host_), port(port_), address(host + ":" + toString(port_)) {}
|
||||
|
||||
virtual ~IConnectionPool() = default;
|
||||
|
||||
/// Selects the connection to work.
|
||||
@ -36,7 +39,15 @@ public:
|
||||
const Settings & settings,
|
||||
bool force_connected = true) = 0;
|
||||
|
||||
const std::string & getHost() const { return host; }
|
||||
UInt16 getPort() const { return port; }
|
||||
const String & getAddress() const { return address; }
|
||||
virtual Priority getPriority() const { return Priority{1}; }
|
||||
|
||||
protected:
|
||||
const String host;
|
||||
const UInt16 port = 0;
|
||||
const String address;
|
||||
};
|
||||
|
||||
using ConnectionPoolPtr = std::shared_ptr<IConnectionPool>;
|
||||
@ -63,10 +74,9 @@ public:
|
||||
Protocol::Compression compression_,
|
||||
Protocol::Secure secure_,
|
||||
Priority priority_ = Priority{1})
|
||||
: Base(max_connections_,
|
||||
: IConnectionPool(host_, port_),
|
||||
Base(max_connections_,
|
||||
getLogger("ConnectionPool (" + host_ + ":" + toString(port_) + ")")),
|
||||
host(host_),
|
||||
port(port_),
|
||||
default_database(default_database_),
|
||||
user(user_),
|
||||
password(password_),
|
||||
@ -99,10 +109,6 @@ public:
|
||||
return entry;
|
||||
}
|
||||
|
||||
const std::string & getHost() const
|
||||
{
|
||||
return host;
|
||||
}
|
||||
std::string getDescription() const
|
||||
{
|
||||
return host + ":" + toString(port);
|
||||
@ -125,8 +131,6 @@ protected:
|
||||
}
|
||||
|
||||
private:
|
||||
String host;
|
||||
UInt16 port;
|
||||
String default_database;
|
||||
String user;
|
||||
String password;
|
||||
|
@ -1,7 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <compare>
|
||||
|
||||
#include <Client/Connection.h>
|
||||
#include <Storages/MergeTree/RequestResponse.h>
|
||||
|
||||
|
@ -554,6 +554,21 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng
|
||||
}
|
||||
|
||||
|
||||
MutableColumnPtr ColumnArray::getDataInRange(size_t start, size_t length) const
|
||||
{
|
||||
if (start + length > getOffsets().size())
|
||||
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnArray::getDataPtrForRange method. "
|
||||
"[start({}) + length({}) > offsets.size({})]", start, length, getOffsets().size());
|
||||
|
||||
size_t start_offset = offsetAt(start);
|
||||
size_t end_offset = offsetAt(start + length);
|
||||
|
||||
auto res = getData().cloneEmpty();
|
||||
res->insertRangeFrom(getData(), start_offset, end_offset - start_offset);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const
|
||||
{
|
||||
if (typeid_cast<const ColumnUInt8 *>(data.get()))
|
||||
|
@ -143,6 +143,10 @@ public:
|
||||
const ColumnPtr & getOffsetsPtr() const { return offsets; }
|
||||
ColumnPtr & getOffsetsPtr() { return offsets; }
|
||||
|
||||
/// Returns a copy of the data column's part corresponding to a specified range of rows.
|
||||
/// For example, `getDataInRange(0, size())` is the same as `getDataPtr()->clone()`.
|
||||
MutableColumnPtr getDataInRange(size_t start, size_t length) const;
|
||||
|
||||
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
|
||||
{
|
||||
return scatterImpl<ColumnArray>(num_columns, selector);
|
||||
|
@ -593,6 +593,7 @@
|
||||
M(711, FILECACHE_ACCESS_DENIED) \
|
||||
M(712, TOO_MANY_MATERIALIZED_VIEWS) \
|
||||
M(713, BROKEN_PROJECTION) \
|
||||
M(714, UNEXPECTED_CLUSTER) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <base/defines.h>
|
||||
|
||||
#include <Poco/Channel.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Message.h>
|
||||
@ -24,6 +26,16 @@ using LoggerRawPtr = Poco::Logger *;
|
||||
*/
|
||||
LoggerPtr getLogger(const std::string & name);
|
||||
|
||||
/** Get Logger with specified name. If the Logger does not exists, it is created.
|
||||
* This overload was added for specific purpose, when logger is constructed from constexpr string.
|
||||
* Logger is destroyed only during program shutdown.
|
||||
*/
|
||||
template <size_t n>
|
||||
ALWAYS_INLINE LoggerPtr getLogger(const char (&name)[n])
|
||||
{
|
||||
return Poco::Logger::getShared(name, false /*should_be_owned_by_shared_ptr_if_created*/);
|
||||
}
|
||||
|
||||
/** Create Logger with specified name, channel and logging level.
|
||||
* If Logger already exists, throws exception.
|
||||
* Logger is destroyed, when last shared ptr that refers to Logger with specified name is destroyed.
|
||||
|
@ -384,6 +384,10 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(S3PutObject, "Number of S3 API PutObject calls.") \
|
||||
M(S3GetObject, "Number of S3 API GetObject calls.") \
|
||||
\
|
||||
M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \
|
||||
M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \
|
||||
M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \
|
||||
M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \
|
||||
M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \
|
||||
M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \
|
||||
\
|
||||
@ -625,6 +629,8 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(InterfacePostgreSQLReceiveBytes, "Number of bytes received through PostgreSQL interfaces") \
|
||||
\
|
||||
M(ParallelReplicasUsedCount, "Number of replicas used to execute a query with task-based parallel replicas") \
|
||||
M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas") \
|
||||
M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but found to be unavailable during query execution with task-based parallel replicas") \
|
||||
|
||||
#ifdef APPLY_FOR_EXTERNAL_EVENTS
|
||||
#define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <base/constexpr_helpers.h>
|
||||
#include <base/demangle.h>
|
||||
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <Common/Dwarf.h>
|
||||
#include <Common/Elf.h>
|
||||
#include <Common/MemorySanitizer.h>
|
||||
@ -24,6 +25,15 @@
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
/// This header contains functions like `backtrace` and `backtrace_symbols`
|
||||
/// Which will be used for stack unwinding on Mac.
|
||||
/// Read: https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/backtrace.3.html
|
||||
#include "execinfo.h"
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
/// Currently this variable is set up once on server startup.
|
||||
@ -262,6 +272,33 @@ void StackTrace::forEachFrame(
|
||||
callback(current_inline_frame);
|
||||
}
|
||||
|
||||
callback(current_frame);
|
||||
}
|
||||
#elif defined(OS_DARWIN)
|
||||
UNUSED(fatal);
|
||||
|
||||
/// This function returns an array of string in a special (a little bit weird format)
|
||||
/// The frame number, library name, address in hex, mangled symbol name, `+` sign, the offset.
|
||||
char** strs = ::backtrace_symbols(frame_pointers.data(), static_cast<int>(size));
|
||||
SCOPE_EXIT_SAFE({free(strs);});
|
||||
|
||||
for (size_t i = offset; i < size; ++i)
|
||||
{
|
||||
StackTrace::Frame current_frame;
|
||||
|
||||
std::vector<std::string> split;
|
||||
boost::split(split, strs[i], isWhitespaceASCII);
|
||||
split.erase(
|
||||
std::remove_if(
|
||||
split.begin(), split.end(),
|
||||
[](const std::string & x) { return x.empty(); }),
|
||||
split.end());
|
||||
assert(split.size() == 6);
|
||||
|
||||
current_frame.virtual_addr = frame_pointers[i];
|
||||
current_frame.physical_addr = frame_pointers[i];
|
||||
current_frame.object = split[1];
|
||||
current_frame.symbol = split[3];
|
||||
callback(current_frame);
|
||||
}
|
||||
#else
|
||||
@ -306,7 +343,11 @@ StackTrace::StackTrace(const ucontext_t & signal_context)
|
||||
|
||||
void StackTrace::tryCapture()
|
||||
{
|
||||
#if defined(OS_DARWIN)
|
||||
size = backtrace(frame_pointers.data(), capacity);
|
||||
#else
|
||||
size = unw_backtrace(frame_pointers.data(), capacity);
|
||||
#endif
|
||||
__msan_unpoison(frame_pointers.data(), size * sizeof(frame_pointers[0]));
|
||||
}
|
||||
|
||||
@ -376,7 +417,7 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
|
||||
return callback("<Empty trace>");
|
||||
|
||||
size_t frame_index = stack_trace.offset;
|
||||
#if defined(__ELF__) && !defined(OS_FREEBSD)
|
||||
#if (defined(__ELF__) && !defined(OS_FREEBSD)) || defined(OS_DARWIN)
|
||||
size_t inline_frame_index = 0;
|
||||
auto callback_wrapper = [&](const StackTrace::Frame & frame)
|
||||
{
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Poco/NullChannel.h>
|
||||
#include <Poco/StreamChannel.h>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
|
||||
|
||||
TEST(Logger, Log)
|
||||
@ -100,3 +101,75 @@ TEST(Logger, SideEffects)
|
||||
|
||||
LOG_TRACE(log, "test no throw {}", getLogMessageParamOrThrow());
|
||||
}
|
||||
|
||||
TEST(Logger, SharedRawLogger)
|
||||
{
|
||||
{
|
||||
std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
auto stream_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(stream));
|
||||
|
||||
auto shared_logger = getLogger("Logger_1");
|
||||
shared_logger->setChannel(stream_channel.get());
|
||||
shared_logger->setLevel("trace");
|
||||
|
||||
LOG_TRACE(shared_logger, "SharedLogger1Log1");
|
||||
LOG_TRACE(getRawLogger("Logger_1"), "RawLogger1Log");
|
||||
LOG_TRACE(shared_logger, "SharedLogger1Log2");
|
||||
|
||||
auto actual = stream.str();
|
||||
EXPECT_EQ(actual, "SharedLogger1Log1\nRawLogger1Log\nSharedLogger1Log2\n");
|
||||
}
|
||||
{
|
||||
std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
auto stream_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(stream));
|
||||
|
||||
auto * raw_logger = getRawLogger("Logger_2");
|
||||
raw_logger->setChannel(stream_channel.get());
|
||||
raw_logger->setLevel("trace");
|
||||
|
||||
LOG_TRACE(getLogger("Logger_2"), "SharedLogger2Log1");
|
||||
LOG_TRACE(raw_logger, "RawLogger2Log");
|
||||
LOG_TRACE(getLogger("Logger_2"), "SharedLogger2Log2");
|
||||
|
||||
auto actual = stream.str();
|
||||
EXPECT_EQ(actual, "SharedLogger2Log1\nRawLogger2Log\nSharedLogger2Log2\n");
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Logger, SharedLoggersThreadSafety)
|
||||
{
|
||||
static size_t threads_count = std::thread::hardware_concurrency();
|
||||
static constexpr size_t loggers_count = 10;
|
||||
static constexpr size_t logger_get_count = 1000;
|
||||
|
||||
Poco::Logger::root();
|
||||
|
||||
std::vector<std::string> names;
|
||||
|
||||
Poco::Logger::names(names);
|
||||
size_t loggers_size_before = names.size();
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
|
||||
for (size_t thread_index = 0; thread_index < threads_count; ++thread_index)
|
||||
{
|
||||
threads.emplace_back([]()
|
||||
{
|
||||
for (size_t logger_index = 0; logger_index < loggers_count; ++logger_index)
|
||||
{
|
||||
for (size_t iteration = 0; iteration < logger_get_count; ++iteration)
|
||||
{
|
||||
getLogger("Logger_" + std::to_string(logger_index));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (auto & thread : threads)
|
||||
thread.join();
|
||||
|
||||
Poco::Logger::names(names);
|
||||
size_t loggers_size_after = names.size();
|
||||
|
||||
EXPECT_EQ(loggers_size_before, loggers_size_after);
|
||||
}
|
||||
|
@ -83,6 +83,7 @@ class IColumn;
|
||||
M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited. You ", 0) \
|
||||
M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
|
||||
M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \
|
||||
M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \
|
||||
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
|
||||
M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \
|
||||
M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \
|
||||
|
@ -92,7 +92,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
|
||||
{"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
|
||||
{"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
|
||||
{"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
|
||||
{"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"},
|
||||
{"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}}},
|
||||
{"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
|
||||
{"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
|
||||
{"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},
|
||||
|
@ -166,6 +166,9 @@ std::unique_ptr<AzureObjectStorageSettings> getAzureBlobStorageSettings(const Po
|
||||
config.getInt(config_prefix + ".max_single_read_retries", 3),
|
||||
config.getInt(config_prefix + ".max_single_download_retries", 3),
|
||||
config.getInt(config_prefix + ".list_object_keys_size", 1000),
|
||||
config.getUInt64(config_prefix + ".max_upload_part_size", 5ULL * 1024 * 1024 * 1024),
|
||||
config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size),
|
||||
config.getBool(config_prefix + ".use_native_copy", false),
|
||||
config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries)
|
||||
);
|
||||
}
|
||||
|
@ -92,10 +92,12 @@ private:
|
||||
AzureObjectStorage::AzureObjectStorage(
|
||||
const String & name_,
|
||||
AzureClientPtr && client_,
|
||||
SettingsPtr && settings_)
|
||||
SettingsPtr && settings_,
|
||||
const String & container_)
|
||||
: name(name_)
|
||||
, client(std::move(client_))
|
||||
, settings(std::move(settings_))
|
||||
, container(container_)
|
||||
, log(getLogger("AzureObjectStorage"))
|
||||
{
|
||||
}
|
||||
@ -376,7 +378,8 @@ std::unique_ptr<IObjectStorage> AzureObjectStorage::cloneObjectStorage(const std
|
||||
return std::make_unique<AzureObjectStorage>(
|
||||
name,
|
||||
getAzureBlobContainerClient(config, config_prefix),
|
||||
getAzureBlobStorageSettings(config, config_prefix, context)
|
||||
getAzureBlobStorageSettings(config, config_prefix, context),
|
||||
container
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -24,12 +24,18 @@ struct AzureObjectStorageSettings
|
||||
int max_single_read_retries_,
|
||||
int max_single_download_retries_,
|
||||
int list_object_keys_size_,
|
||||
size_t max_upload_part_size_,
|
||||
size_t max_single_part_copy_size_,
|
||||
bool use_native_copy_,
|
||||
size_t max_unexpected_write_error_retries_)
|
||||
: max_single_part_upload_size(max_single_part_upload_size_)
|
||||
, min_bytes_for_seek(min_bytes_for_seek_)
|
||||
, max_single_read_retries(max_single_read_retries_)
|
||||
, max_single_download_retries(max_single_download_retries_)
|
||||
, list_object_keys_size(list_object_keys_size_)
|
||||
, max_upload_part_size(max_upload_part_size_)
|
||||
, max_single_part_copy_size(max_single_part_copy_size_)
|
||||
, use_native_copy(use_native_copy_)
|
||||
, max_unexpected_write_error_retries (max_unexpected_write_error_retries_)
|
||||
{
|
||||
}
|
||||
@ -41,6 +47,10 @@ struct AzureObjectStorageSettings
|
||||
size_t max_single_read_retries = 3;
|
||||
size_t max_single_download_retries = 3;
|
||||
int list_object_keys_size = 1000;
|
||||
size_t min_upload_part_size = 16 * 1024 * 1024;
|
||||
size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024;
|
||||
size_t max_single_part_copy_size = 256 * 1024 * 1024;
|
||||
bool use_native_copy = false;
|
||||
size_t max_unexpected_write_error_retries = 4;
|
||||
};
|
||||
|
||||
@ -56,7 +66,8 @@ public:
|
||||
AzureObjectStorage(
|
||||
const String & name_,
|
||||
AzureClientPtr && client_,
|
||||
SettingsPtr && settings_);
|
||||
SettingsPtr && settings_,
|
||||
const String & container_);
|
||||
|
||||
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
|
||||
|
||||
@ -119,7 +130,7 @@ public:
|
||||
const std::string & config_prefix,
|
||||
ContextPtr context) override;
|
||||
|
||||
String getObjectsNamespace() const override { return ""; }
|
||||
String getObjectsNamespace() const override { return container ; }
|
||||
|
||||
std::unique_ptr<IObjectStorage> cloneObjectStorage(
|
||||
const std::string & new_namespace,
|
||||
@ -131,11 +142,19 @@ public:
|
||||
|
||||
bool isRemote() const override { return true; }
|
||||
|
||||
std::shared_ptr<const AzureObjectStorageSettings> getSettings() { return settings.get(); }
|
||||
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> getAzureBlobStorageClient() override
|
||||
{
|
||||
return client.get();
|
||||
}
|
||||
|
||||
private:
|
||||
const String name;
|
||||
/// client used to access the files in the Blob Storage cloud
|
||||
MultiVersion<Azure::Storage::Blobs::BlobContainerClient> client;
|
||||
MultiVersion<AzureObjectStorageSettings> settings;
|
||||
const String container;
|
||||
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Disks/ObjectStorages/IObjectStorage.h>
|
||||
#include <Interpreters/Cache/FileCacheKey.h>
|
||||
#include <Interpreters/Cache/FileCacheSettings.h>
|
||||
#include "config.h"
|
||||
|
||||
namespace Poco
|
||||
{
|
||||
@ -120,6 +121,13 @@ public:
|
||||
|
||||
static bool canUseReadThroughCache(const ReadSettings & settings);
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> getAzureBlobStorageClient() override
|
||||
{
|
||||
return object_storage->getAzureBlobStorageClient();
|
||||
}
|
||||
#endif
|
||||
|
||||
private:
|
||||
FileCacheKey getCacheKey(const std::string & path) const;
|
||||
|
||||
|
@ -23,11 +23,22 @@
|
||||
#include <Disks/DirectoryIterator.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
#include <Common/Exception.h>
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
#include <Common/MultiVersion.h>
|
||||
#include <azure/storage/blobs.hpp>
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
class ReadBufferFromFileBase;
|
||||
class WriteBufferFromFileBase;
|
||||
|
||||
@ -214,6 +225,14 @@ public:
|
||||
|
||||
virtual WriteSettings patchSettings(const WriteSettings & write_settings) const;
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
virtual std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> getAzureBlobStorageClient()
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for AzureBlobStorage");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
private:
|
||||
mutable std::mutex throttlers_mutex;
|
||||
ThrottlerPtr remote_read_throttler;
|
||||
|
@ -102,7 +102,7 @@ void checkS3Capabilities(
|
||||
if (s3_capabilities.support_batch_delete && !checkBatchRemove(storage, key_with_trailing_slash))
|
||||
{
|
||||
LOG_WARNING(
|
||||
&Poco::Logger::get("S3ObjectStorage"),
|
||||
getLogger("S3ObjectStorage"),
|
||||
"Storage for disk {} does not support batch delete operations, "
|
||||
"so `s3_capabilities.support_batch_delete` was automatically turned off during the access check. "
|
||||
"To remove this message set `s3_capabilities.support_batch_delete` for the disk to `false`.",
|
||||
@ -213,10 +213,12 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory)
|
||||
const ContextPtr & context,
|
||||
bool /* skip_access_check */) -> ObjectStoragePtr
|
||||
{
|
||||
String container_name = config.getString(config_prefix + ".container_name", "default-container");
|
||||
return std::make_unique<AzureObjectStorage>(
|
||||
name,
|
||||
getAzureBlobContainerClient(config, config_prefix),
|
||||
getAzureBlobStorageSettings(config, config_prefix, context));
|
||||
getAzureBlobStorageSettings(config, config_prefix, context),
|
||||
container_name);
|
||||
|
||||
});
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ WebObjectStorage::loadFiles(const String & path, const std::unique_lock<std::sha
|
||||
if (!inserted)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Loading data for {} more than once", file_path);
|
||||
|
||||
LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding file: {}, size: {}", file_path, size);
|
||||
LOG_TRACE(getLogger("DiskWeb"), "Adding file: {}, size: {}", file_path, size);
|
||||
loaded_files.emplace_back(file_path);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
@ -9,6 +10,10 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <base/range.h>
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
@ -75,6 +80,49 @@ struct L2Distance
|
||||
state.sum += other_state.sum;
|
||||
}
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
template <typename ResultType>
|
||||
AVX512_FUNCTION_SPECIFIC_ATTRIBUTE static void accumulateCombine(
|
||||
const ResultType * __restrict data_x,
|
||||
const ResultType * __restrict data_y,
|
||||
size_t i_max,
|
||||
size_t & i_x,
|
||||
size_t & i_y,
|
||||
State<ResultType> & state)
|
||||
{
|
||||
__m512 sums;
|
||||
if constexpr (std::is_same_v<ResultType, Float32>)
|
||||
sums = _mm512_setzero_ps();
|
||||
else
|
||||
sums = _mm512_setzero_pd();
|
||||
|
||||
const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
|
||||
|
||||
for (; i_x + n < i_max; i_x += n, i_y += n)
|
||||
{
|
||||
if constexpr (std::is_same_v<ResultType, Float32>)
|
||||
{
|
||||
__m512 x = _mm512_loadu_ps(data_x + i_x);
|
||||
__m512 y = _mm512_loadu_ps(data_y + i_y);
|
||||
__m512 differences = _mm512_sub_ps(x, y);
|
||||
sums = _mm512_fmadd_ps(differences, differences, sums);
|
||||
}
|
||||
else
|
||||
{
|
||||
__m512 x = _mm512_loadu_pd(data_x + i_x);
|
||||
__m512 y = _mm512_loadu_pd(data_y + i_y);
|
||||
__m512 differences = _mm512_sub_pd(x, y);
|
||||
sums = _mm512_fmadd_pd(differences, differences, sums);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<ResultType, Float32>)
|
||||
state.sum = _mm512_reduce_add_ps(sums);
|
||||
else
|
||||
state.sum = _mm512_reduce_add_pd(sums);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename ResultType>
|
||||
static ResultType finalize(const State<ResultType> & state, const ConstParams &)
|
||||
{
|
||||
@ -189,6 +237,70 @@ struct CosineDistance
|
||||
state.y_squared += other_state.y_squared;
|
||||
}
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
template <typename ResultType>
|
||||
AVX512_FUNCTION_SPECIFIC_ATTRIBUTE static void accumulateCombine(
|
||||
const ResultType * __restrict data_x,
|
||||
const ResultType * __restrict data_y,
|
||||
size_t i_max,
|
||||
size_t & i_x,
|
||||
size_t & i_y,
|
||||
State<ResultType> & state)
|
||||
{
|
||||
__m512 dot_products;
|
||||
__m512 x_squareds;
|
||||
__m512 y_squareds;
|
||||
|
||||
if constexpr (std::is_same_v<ResultType, Float32>)
|
||||
{
|
||||
dot_products = _mm512_setzero_ps();
|
||||
x_squareds = _mm512_setzero_ps();
|
||||
y_squareds = _mm512_setzero_ps();
|
||||
}
|
||||
else
|
||||
{
|
||||
dot_products = _mm512_setzero_pd();
|
||||
x_squareds = _mm512_setzero_pd();
|
||||
y_squareds = _mm512_setzero_pd();
|
||||
}
|
||||
|
||||
const size_t n = (std::is_same_v<ResultType, Float32>) ? 16 : 8;
|
||||
|
||||
for (; i_x + n < i_max; i_x += n, i_y += n)
|
||||
{
|
||||
if constexpr (std::is_same_v<ResultType, Float32>)
|
||||
{
|
||||
__m512 x = _mm512_loadu_ps(data_x + i_x);
|
||||
__m512 y = _mm512_loadu_ps(data_y + i_y);
|
||||
dot_products = _mm512_fmadd_ps(x, y, dot_products);
|
||||
x_squareds = _mm512_fmadd_ps(x, x, x_squareds);
|
||||
y_squareds = _mm512_fmadd_ps(y, y, y_squareds);
|
||||
}
|
||||
else
|
||||
{
|
||||
__m512 x = _mm512_loadu_pd(data_x + i_x);
|
||||
__m512 y = _mm512_loadu_pd(data_y + i_y);
|
||||
dot_products = _mm512_fmadd_pd(x, y, dot_products);
|
||||
x_squareds = _mm512_fmadd_pd(x, x, x_squareds);
|
||||
y_squareds = _mm512_fmadd_pd(y, y, y_squareds);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<ResultType, Float32>)
|
||||
{
|
||||
state.dot_prod = _mm512_reduce_add_ps(dot_products);
|
||||
state.x_squared = _mm512_reduce_add_ps(x_squareds);
|
||||
state.y_squared = _mm512_reduce_add_ps(y_squareds);
|
||||
}
|
||||
else
|
||||
{
|
||||
state.dot_prod = _mm512_reduce_add_pd(dot_products);
|
||||
state.x_squared = _mm512_reduce_add_pd(x_squareds);
|
||||
state.y_squared = _mm512_reduce_add_pd(y_squareds);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename ResultType>
|
||||
static ResultType finalize(const State<ResultType> & state, const ConstParams &)
|
||||
{
|
||||
@ -352,7 +464,7 @@ private:
|
||||
/// Check that arrays in both columns are the sames size
|
||||
for (size_t row = 0; row < offsets_x.size(); ++row)
|
||||
{
|
||||
if (unlikely(offsets_x[row] != offsets_y[row]))
|
||||
if (offsets_x[row] != offsets_y[row]) [[unlikely]]
|
||||
{
|
||||
ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0;
|
||||
throw Exception(
|
||||
@ -420,7 +532,7 @@ private:
|
||||
ColumnArray::Offset prev_offset = 0;
|
||||
for (size_t row : collections::range(0, offsets_y.size()))
|
||||
{
|
||||
if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset))
|
||||
if (offsets_x[0] != offsets_y[row] - prev_offset) [[unlikely]]
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
@ -438,14 +550,35 @@ private:
|
||||
auto & result_data = result->getData();
|
||||
|
||||
/// Do the actual computation
|
||||
ColumnArray::Offset prev = 0;
|
||||
size_t prev = 0;
|
||||
size_t row = 0;
|
||||
|
||||
for (auto off : offsets_y)
|
||||
{
|
||||
size_t i = 0;
|
||||
typename Kernel::template State<ResultType> state;
|
||||
|
||||
/// SIMD optimization: process multiple elements in both input arrays at once.
|
||||
/// To avoid combinatorial explosion of SIMD kernels, focus on
|
||||
/// - the two most common input/output types (Float32 x Float32) --> Float32 and (Float64 x Float64) --> Float64 instead of 10 x
|
||||
/// 10 input types x 2 output types,
|
||||
/// - const/non-const inputs instead of non-const/non-const inputs
|
||||
/// - the two most common metrics L2 and cosine distance,
|
||||
/// - the most powerful SIMD instruction set (AVX-512F).
|
||||
#if USE_MULTITARGET_CODE
|
||||
if constexpr (std::is_same_v<ResultType, FirstArgType> && std::is_same_v<ResultType, SecondArgType>) /// ResultType is Float32 or Float64
|
||||
{
|
||||
if constexpr (std::is_same_v<Kernel, L2Distance>
|
||||
|| std::is_same_v<Kernel, CosineDistance>)
|
||||
{
|
||||
if (isArchSupported(TargetArch::AVX512F))
|
||||
Kernel::template accumulateCombine<ResultType>(data_x.data(), data_y.data(), i + offsets_x[0], i, prev, state);
|
||||
}
|
||||
}
|
||||
#else
|
||||
/// Process chunks in vectorized manner
|
||||
static constexpr size_t VEC_SIZE = 4;
|
||||
typename Kernel::template State<ResultType> states[VEC_SIZE];
|
||||
size_t i = 0;
|
||||
for (; prev + VEC_SIZE < off; i += VEC_SIZE, prev += VEC_SIZE)
|
||||
{
|
||||
for (size_t s = 0; s < VEC_SIZE; ++s)
|
||||
@ -453,10 +586,9 @@ private:
|
||||
states[s], static_cast<ResultType>(data_x[i + s]), static_cast<ResultType>(data_y[prev + s]), kernel_params);
|
||||
}
|
||||
|
||||
typename Kernel::template State<ResultType> state;
|
||||
for (const auto & other_state : states)
|
||||
Kernel::template combine<ResultType>(state, other_state, kernel_params);
|
||||
|
||||
#endif
|
||||
/// Process the tail
|
||||
for (; prev < off; ++i, ++prev)
|
||||
{
|
||||
@ -466,6 +598,7 @@ private:
|
||||
result_data[row] = Kernel::finalize(state, kernel_params);
|
||||
row++;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
340
src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
Normal file
340
src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp
Normal file
@ -0,0 +1,340 @@
|
||||
#include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/LimitSeekableReadBuffer.h>
|
||||
#include <IO/SeekableReadBuffer.h>
|
||||
#include <IO/StdStreamFromReadBuffer.h>
|
||||
#include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
|
||||
#include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
|
||||
#include <Common/getRandomASCIIString.h>
|
||||
#include <IO/SharedThreadPools.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event AzureCopyObject;
|
||||
extern const Event AzureUploadPart;
|
||||
|
||||
extern const Event DiskAzureCopyObject;
|
||||
extern const Event DiskAzureUploadPart;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INVALID_CONFIG_PARAMETER;
|
||||
extern const int AZURE_BLOB_STORAGE_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class UploadHelper
|
||||
{
|
||||
public:
|
||||
UploadHelper(
|
||||
const CreateReadBuffer & create_read_buffer_,
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client_,
|
||||
size_t offset_,
|
||||
size_t total_size_,
|
||||
const String & dest_container_for_logging_,
|
||||
const String & dest_blob_,
|
||||
std::shared_ptr<const AzureObjectStorageSettings> settings_,
|
||||
ThreadPoolCallbackRunner<void> schedule_,
|
||||
bool for_disk_azure_blob_storage_,
|
||||
const Poco::Logger * log_)
|
||||
: create_read_buffer(create_read_buffer_)
|
||||
, client(client_)
|
||||
, offset (offset_)
|
||||
, total_size (total_size_)
|
||||
, dest_container_for_logging(dest_container_for_logging_)
|
||||
, dest_blob(dest_blob_)
|
||||
, settings(settings_)
|
||||
, schedule(schedule_)
|
||||
, for_disk_azure_blob_storage(for_disk_azure_blob_storage_)
|
||||
, log(log_)
|
||||
, max_single_part_upload_size(settings_->max_single_part_upload_size)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~UploadHelper() = default;
|
||||
|
||||
protected:
|
||||
std::function<std::unique_ptr<SeekableReadBuffer>()> create_read_buffer;
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
|
||||
size_t offset;
|
||||
size_t total_size;
|
||||
const String & dest_container_for_logging;
|
||||
const String & dest_blob;
|
||||
std::shared_ptr<const AzureObjectStorageSettings> settings;
|
||||
ThreadPoolCallbackRunner<void> schedule;
|
||||
bool for_disk_azure_blob_storage;
|
||||
const Poco::Logger * log;
|
||||
size_t max_single_part_upload_size;
|
||||
|
||||
struct UploadPartTask
|
||||
{
|
||||
size_t part_offset;
|
||||
size_t part_size;
|
||||
std::vector<std::string> block_ids;
|
||||
bool is_finished = false;
|
||||
std::exception_ptr exception;
|
||||
};
|
||||
|
||||
size_t normal_part_size;
|
||||
std::vector<std::string> block_ids;
|
||||
|
||||
std::list<UploadPartTask> TSA_GUARDED_BY(bg_tasks_mutex) bg_tasks;
|
||||
int num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0;
|
||||
int num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0;
|
||||
std::mutex bg_tasks_mutex;
|
||||
std::condition_variable bg_tasks_condvar;
|
||||
|
||||
void calculatePartSize()
|
||||
{
|
||||
auto max_upload_part_size = settings->max_upload_part_size;
|
||||
if (!max_upload_part_size)
|
||||
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be 0");
|
||||
/// We've calculated the size of a normal part (the final part can be smaller).
|
||||
normal_part_size = max_upload_part_size;
|
||||
}
|
||||
|
||||
public:
|
||||
void performCopy()
|
||||
{
|
||||
performMultipartUpload();
|
||||
}
|
||||
|
||||
void completeMultipartUpload()
|
||||
{
|
||||
auto block_blob_client = client->GetBlockBlobClient(dest_blob);
|
||||
block_blob_client.CommitBlockList(block_ids);
|
||||
}
|
||||
|
||||
void performMultipartUpload()
|
||||
{
|
||||
calculatePartSize();
|
||||
|
||||
size_t position = offset;
|
||||
size_t end_position = offset + total_size;
|
||||
|
||||
try
|
||||
{
|
||||
while (position < end_position)
|
||||
{
|
||||
size_t next_position = std::min(position + normal_part_size, end_position);
|
||||
size_t part_size = next_position - position; /// `part_size` is either `normal_part_size` or smaller if it's the final part.
|
||||
|
||||
uploadPart(position, part_size);
|
||||
|
||||
position = next_position;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
waitForAllBackgroundTasks();
|
||||
throw;
|
||||
}
|
||||
|
||||
waitForAllBackgroundTasks();
|
||||
completeMultipartUpload();
|
||||
}
|
||||
|
||||
|
||||
void uploadPart(size_t part_offset, size_t part_size)
|
||||
{
|
||||
LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, Size: {}", dest_container_for_logging, dest_blob, part_size);
|
||||
|
||||
if (!part_size)
|
||||
{
|
||||
LOG_TRACE(log, "Skipping writing an empty part.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (schedule)
|
||||
{
|
||||
UploadPartTask * task = nullptr;
|
||||
|
||||
{
|
||||
std::lock_guard lock(bg_tasks_mutex);
|
||||
task = &bg_tasks.emplace_back();
|
||||
++num_added_bg_tasks;
|
||||
}
|
||||
|
||||
/// Notify waiting thread when task finished
|
||||
auto task_finish_notify = [this, task]()
|
||||
{
|
||||
std::lock_guard lock(bg_tasks_mutex);
|
||||
task->is_finished = true;
|
||||
++num_finished_bg_tasks;
|
||||
|
||||
/// Notification under mutex is important here.
|
||||
/// Otherwise, WriteBuffer could be destroyed in between
|
||||
/// Releasing lock and condvar notification.
|
||||
bg_tasks_condvar.notify_one();
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
task->part_offset = part_offset;
|
||||
task->part_size = part_size;
|
||||
|
||||
schedule([this, task, task_finish_notify]()
|
||||
{
|
||||
try
|
||||
{
|
||||
processUploadPartRequest(*task);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
task->exception = std::current_exception();
|
||||
}
|
||||
task_finish_notify();
|
||||
}, Priority{});
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
task_finish_notify();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
UploadPartTask task;
|
||||
task.part_offset = part_offset;
|
||||
task.part_size = part_size;
|
||||
processUploadPartRequest(task);
|
||||
block_ids.insert(block_ids.end(),task.block_ids.begin(), task.block_ids.end());
|
||||
}
|
||||
}
|
||||
|
||||
void processUploadPartRequest(UploadPartTask & task)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::AzureUploadPart);
|
||||
if (for_disk_azure_blob_storage)
|
||||
ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart);
|
||||
|
||||
auto block_blob_client = client->GetBlockBlobClient(dest_blob);
|
||||
auto read_buffer = std::make_unique<LimitSeekableReadBuffer>(create_read_buffer(), task.part_offset, task.part_size);
|
||||
while (!read_buffer->eof())
|
||||
{
|
||||
auto size = read_buffer->available();
|
||||
if (size > 0)
|
||||
{
|
||||
auto block_id = getRandomASCIIString(64);
|
||||
Azure::Core::IO::MemoryBodyStream memory(reinterpret_cast<const uint8_t *>(read_buffer->position()), size);
|
||||
block_blob_client.StageBlock(block_id, memory);
|
||||
task.block_ids.emplace_back(block_id);
|
||||
read_buffer->ignore(size);
|
||||
LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, block_id: {}", dest_container_for_logging, dest_blob, block_id);
|
||||
}
|
||||
}
|
||||
std::lock_guard lock(bg_tasks_mutex); /// Protect bg_tasks from race
|
||||
LOG_TRACE(log, "Writing part finished. Container: {}, Blob: {}, Parts: {}", dest_container_for_logging, dest_blob, bg_tasks.size());
|
||||
}
|
||||
|
||||
|
||||
void waitForAllBackgroundTasks()
|
||||
{
|
||||
if (!schedule)
|
||||
return;
|
||||
|
||||
std::unique_lock lock(bg_tasks_mutex);
|
||||
/// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock
|
||||
bg_tasks_condvar.wait(lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); });
|
||||
|
||||
auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(bg_tasks);
|
||||
for (auto & task : tasks)
|
||||
{
|
||||
if (task.exception)
|
||||
std::rethrow_exception(task.exception);
|
||||
block_ids.insert(block_ids.end(),task.block_ids.begin(), task.block_ids.end());
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
void copyDataToAzureBlobStorageFile(
|
||||
const std::function<std::unique_ptr<SeekableReadBuffer>()> & create_read_buffer,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> dest_client,
|
||||
const String & dest_container_for_logging,
|
||||
const String & dest_blob,
|
||||
std::shared_ptr<const AzureObjectStorageSettings> settings,
|
||||
ThreadPoolCallbackRunner<void> schedule,
|
||||
bool for_disk_azure_blob_storage)
|
||||
{
|
||||
UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyDataToAzureBlobStorageFile")};
|
||||
helper.performCopy();
|
||||
}
|
||||
|
||||
|
||||
void copyAzureBlobStorageFile(
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> src_client,
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> dest_client,
|
||||
const String & src_container_for_logging,
|
||||
const String & src_blob,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
const String & dest_container_for_logging,
|
||||
const String & dest_blob,
|
||||
std::shared_ptr<const AzureObjectStorageSettings> settings,
|
||||
const ReadSettings & read_settings,
|
||||
ThreadPoolCallbackRunner<void> schedule,
|
||||
bool for_disk_azure_blob_storage)
|
||||
{
|
||||
|
||||
if (settings->use_native_copy)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::AzureCopyObject);
|
||||
if (for_disk_azure_blob_storage)
|
||||
ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);
|
||||
|
||||
auto block_blob_client_src = src_client->GetBlockBlobClient(src_blob);
|
||||
auto block_blob_client_dest = dest_client->GetBlockBlobClient(dest_blob);
|
||||
auto source_uri = block_blob_client_src.GetUrl();
|
||||
|
||||
if (size < settings->max_single_part_copy_size)
|
||||
{
|
||||
block_blob_client_dest.CopyFromUri(source_uri);
|
||||
}
|
||||
else
|
||||
{
|
||||
Azure::Storage::Blobs::StartBlobCopyOperation operation = block_blob_client_dest.StartCopyFromUri(source_uri);
|
||||
|
||||
// Wait for the operation to finish, checking for status every 100 second.
|
||||
auto copy_response = operation.PollUntilDone(std::chrono::milliseconds(100));
|
||||
auto properties_model = copy_response.Value;
|
||||
|
||||
if (properties_model.CopySource.HasValue())
|
||||
{
|
||||
throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy failed");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob);
|
||||
auto create_read_buffer = [&]
|
||||
{
|
||||
return std::make_unique<ReadBufferFromAzureBlobStorage>(src_client, src_blob, read_settings, settings->max_single_read_retries,
|
||||
settings->max_single_download_retries);
|
||||
};
|
||||
|
||||
UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, for_disk_azure_blob_storage, &Poco::Logger::get("copyAzureBlobStorageFile")};
|
||||
helper.performCopy();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
56
src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
Normal file
56
src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
Normal file
@ -0,0 +1,56 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
|
||||
#include <Storages/StorageAzureBlobCluster.h>
|
||||
#include <Storages/StorageAzureBlob.h>
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
#include <base/types.h>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class SeekableReadBuffer;
|
||||
|
||||
using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
|
||||
|
||||
/// Copies a file from AzureBlobStorage to AzureBlobStorage.
|
||||
/// The parameters `src_offset` and `src_size` specify a part in the source to copy.
|
||||
void copyAzureBlobStorageFile(
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> src_client,
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> dest_client,
|
||||
const String & src_container_for_logging,
|
||||
const String & src_blob,
|
||||
size_t src_offset,
|
||||
size_t src_size,
|
||||
const String & dest_container_for_logging,
|
||||
const String & dest_blob,
|
||||
std::shared_ptr<const AzureObjectStorageSettings> settings,
|
||||
const ReadSettings & read_settings,
|
||||
ThreadPoolCallbackRunner<void> schedule_ = {},
|
||||
bool for_disk_azure_blob_storage = false);
|
||||
|
||||
|
||||
/// Copies data from any seekable source to AzureBlobStorage.
|
||||
/// The same functionality can be done by using the function copyData() and the class WriteBufferFromS3
|
||||
/// however copyDataToS3File() is faster and spends less memory.
|
||||
/// The callback `create_read_buffer` can be called from multiple threads in parallel, so that should be thread-safe.
|
||||
/// The parameters `offset` and `size` specify a part in the source to copy.
|
||||
void copyDataToAzureBlobStorageFile(
|
||||
const std::function<std::unique_ptr<SeekableReadBuffer>()> & create_read_buffer,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client,
|
||||
const String & dest_container_for_logging,
|
||||
const String & dest_blob,
|
||||
std::shared_ptr<const AzureObjectStorageSettings> settings,
|
||||
ThreadPoolCallbackRunner<void> schedule_ = {},
|
||||
bool for_disk_azure_blob_storage = false);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -605,7 +605,7 @@ ActionsDAGPtr ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool rem
|
||||
return actions;
|
||||
}
|
||||
|
||||
static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * node, ColumnsWithTypeAndName arguments)
|
||||
static ColumnWithTypeAndName executeActionForPartialResult(const ActionsDAG::Node * node, ColumnsWithTypeAndName arguments, size_t input_rows_count)
|
||||
{
|
||||
ColumnWithTypeAndName res_column;
|
||||
res_column.type = node->result_type;
|
||||
@ -615,7 +615,7 @@ static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * nod
|
||||
{
|
||||
case ActionsDAG::ActionType::FUNCTION:
|
||||
{
|
||||
res_column.column = node->function->execute(arguments, res_column.type, 0, true);
|
||||
res_column.column = node->function->execute(arguments, res_column.type, input_rows_count, true);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -628,13 +628,24 @@ static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * nod
|
||||
if (!array)
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"ARRAY JOIN of not array nor map: {}", node->result_name);
|
||||
res_column.column = array->getDataPtr()->cloneEmpty();
|
||||
|
||||
ColumnPtr data;
|
||||
if (input_rows_count < array->size())
|
||||
data = array->getDataInRange(0, input_rows_count);
|
||||
else
|
||||
data = array->getDataPtr();
|
||||
|
||||
res_column.column = data;
|
||||
break;
|
||||
}
|
||||
|
||||
case ActionsDAG::ActionType::COLUMN:
|
||||
{
|
||||
res_column.column = node->column->cloneResized(0);
|
||||
auto column = node->column;
|
||||
if (input_rows_count < column->size())
|
||||
column = column->cloneResized(input_rows_count);
|
||||
|
||||
res_column.column = column;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -681,7 +692,7 @@ Block ActionsDAG::updateHeader(Block header) const
|
||||
ColumnsWithTypeAndName result_columns;
|
||||
try
|
||||
{
|
||||
result_columns = evaluatePartialResult(node_to_column, outputs, true);
|
||||
result_columns = evaluatePartialResult(node_to_column, outputs, /* input_rows_count= */ 0, /* throw_on_error= */ true);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
@ -710,8 +721,11 @@ Block ActionsDAG::updateHeader(Block header) const
|
||||
ColumnsWithTypeAndName ActionsDAG::evaluatePartialResult(
|
||||
IntermediateExecutionResult & node_to_column,
|
||||
const NodeRawConstPtrs & outputs,
|
||||
size_t input_rows_count,
|
||||
bool throw_on_error)
|
||||
{
|
||||
chassert(input_rows_count <= 1); /// evaluatePartialResult() should be used only to evaluate headers or constants
|
||||
|
||||
ColumnsWithTypeAndName result_columns;
|
||||
result_columns.reserve(outputs.size());
|
||||
|
||||
@ -768,7 +782,7 @@ ColumnsWithTypeAndName ActionsDAG::evaluatePartialResult(
|
||||
node->result_name);
|
||||
|
||||
if (node->type != ActionsDAG::ActionType::INPUT && has_all_arguments)
|
||||
node_to_column[node] = executeActionForHeader(node, std::move(arguments));
|
||||
node_to_column[node] = executeActionForPartialResult(node, std::move(arguments), input_rows_count);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -278,6 +278,7 @@ public:
|
||||
static ColumnsWithTypeAndName evaluatePartialResult(
|
||||
IntermediateExecutionResult & node_to_column,
|
||||
const NodeRawConstPtrs & outputs,
|
||||
size_t input_rows_count,
|
||||
bool throw_on_error);
|
||||
|
||||
/// For apply materialize() function for every output.
|
||||
|
@ -353,7 +353,18 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
|
||||
|
||||
auto [it, inserted] = shard.iterators.try_emplace(key.hash);
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, shard_num, flush_time_points, now);
|
||||
auto timeout_ms = getBusyWaitTimeoutMs(settings, shard, flush_time_points, now);
|
||||
if (timeout_ms != shard.busy_timeout_ms)
|
||||
{
|
||||
LOG_TRACE(
|
||||
log,
|
||||
"Asynchronous timeout {} from {} to {} for queue shard {}.",
|
||||
timeout_ms < shard.busy_timeout_ms ? "decreased" : "increased",
|
||||
shard.busy_timeout_ms.count(),
|
||||
timeout_ms.count(),
|
||||
size_t(shard_num));
|
||||
}
|
||||
|
||||
if (inserted)
|
||||
it->second = shard.queue.emplace(now + timeout_ms, Container{key, std::make_unique<InsertData>(timeout_ms)}).first;
|
||||
|
||||
@ -431,7 +442,6 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
|
||||
AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeoutMs(
|
||||
const Settings & settings,
|
||||
const QueueShard & shard,
|
||||
size_t shard_num,
|
||||
const QueueShardFlushTimeHistory::TimePoints & flush_time_points,
|
||||
std::chrono::steady_clock::time_point now) const
|
||||
{
|
||||
@ -460,13 +470,6 @@ AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeou
|
||||
auto timeout_ms = std::max(
|
||||
std::chrono::duration_cast<Milliseconds>(shard.busy_timeout_ms * (1.0 + increase_rate)),
|
||||
shard.busy_timeout_ms + Milliseconds(1));
|
||||
if (timeout_ms != shard.busy_timeout_ms)
|
||||
LOG_TRACE(
|
||||
log,
|
||||
"Async timeout increased from {} to {} for queue shard {}.",
|
||||
shard.busy_timeout_ms.count(),
|
||||
timeout_ms.count(),
|
||||
shard_num);
|
||||
|
||||
return normalize(timeout_ms);
|
||||
}
|
||||
@ -475,18 +478,7 @@ AsynchronousInsertQueue::Milliseconds AsynchronousInsertQueue::getBusyWaitTimeou
|
||||
/// long enough (exceeding the adjusted timeout).
|
||||
/// This ensures the timeout value converges to the minimum over time for non-frequent inserts.
|
||||
else if (last_insert_time + decreased_timeout_ms < now && t1 + decreased_timeout_ms < t2)
|
||||
{
|
||||
auto timeout_ms = decreased_timeout_ms;
|
||||
if (timeout_ms != shard.busy_timeout_ms)
|
||||
LOG_TRACE(
|
||||
log,
|
||||
"Async timeout decreased from {} to {} for queue shard {}.",
|
||||
shard.busy_timeout_ms.count(),
|
||||
timeout_ms.count(),
|
||||
shard_num);
|
||||
|
||||
return normalize(timeout_ms);
|
||||
}
|
||||
return normalize(decreased_timeout_ms);
|
||||
|
||||
return normalize(shard.busy_timeout_ms);
|
||||
}
|
||||
|
@ -248,7 +248,6 @@ private:
|
||||
Milliseconds getBusyWaitTimeoutMs(
|
||||
const Settings & settings,
|
||||
const QueueShard & shard,
|
||||
size_t shard_num,
|
||||
const QueueShardFlushTimeHistory::TimePoints & flush_time_points,
|
||||
std::chrono::steady_clock::time_point now) const;
|
||||
|
||||
|
@ -32,6 +32,7 @@ namespace ErrorCodes
|
||||
extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int CLUSTER_DOESNT_EXIST;
|
||||
extern const int UNEXPECTED_CLUSTER;
|
||||
}
|
||||
|
||||
namespace ClusterProxy
|
||||
@ -374,12 +375,12 @@ void executeQueryWithParallelReplicas(
|
||||
shard_num = column->getUInt(0);
|
||||
}
|
||||
|
||||
ClusterPtr new_cluster;
|
||||
const auto shard_count = not_optimized_cluster->getShardCount();
|
||||
ClusterPtr new_cluster = not_optimized_cluster;
|
||||
/// if got valid shard_num from query initiator, then parallel replicas scope is the specified shard
|
||||
/// shards are numbered in order of appearance in the cluster config
|
||||
if (shard_num > 0)
|
||||
{
|
||||
const auto shard_count = not_optimized_cluster->getShardCount();
|
||||
if (shard_num > shard_count)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
@ -395,17 +396,18 @@ void executeQueryWithParallelReplicas(
|
||||
|
||||
// get cluster for shard specified by shard_num
|
||||
// shard_num is 1-based, but getClusterWithSingleShard expects 0-based index
|
||||
auto single_shard_cluster = not_optimized_cluster->getClusterWithSingleShard(shard_num - 1);
|
||||
// convert cluster to representation expected by parallel replicas
|
||||
new_cluster = single_shard_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
|
||||
new_cluster = not_optimized_cluster->getClusterWithSingleShard(shard_num - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
|
||||
if (not_optimized_cluster->getShardCount() > 1)
|
||||
throw DB::Exception(
|
||||
ErrorCodes::UNEXPECTED_CLUSTER,
|
||||
"`cluster_for_parallel_replicas` setting refers to cluster with several shards. Expected a cluster with one shard");
|
||||
}
|
||||
|
||||
auto coordinator
|
||||
= std::make_shared<ParallelReplicasReadingCoordinator>(new_cluster->getShardCount(), settings.parallel_replicas_mark_segment_size);
|
||||
auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(
|
||||
new_cluster->getShardsInfo().begin()->getAllNodeCount(), settings.parallel_replicas_mark_segment_size);
|
||||
auto external_tables = new_context->getExternalTables();
|
||||
auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>(
|
||||
query_ast,
|
||||
|
@ -1076,15 +1076,22 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
|
||||
{
|
||||
const auto * kind = create.is_dictionary ? "Dictionary" : "Table";
|
||||
const auto * kind_upper = create.is_dictionary ? "DICTIONARY" : "TABLE";
|
||||
bool is_replicated_database_internal = database->getEngineName() == "Replicated" && getContext()->getClientInfo().is_replicated_database_internal;
|
||||
bool from_path = create.attach_from_path.has_value();
|
||||
bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
|
||||
|
||||
if (database->getEngineName() == "Replicated" && getContext()->getClientInfo().is_replicated_database_internal
|
||||
&& !internal)
|
||||
if (is_replicated_database_internal && !internal)
|
||||
{
|
||||
if (create.uuid == UUIDHelpers::Nil)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table UUID is not specified in DDL log");
|
||||
}
|
||||
|
||||
bool from_path = create.attach_from_path.has_value();
|
||||
if (create.refresh_strategy && database->getEngineName() != "Atomic")
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY,
|
||||
"Refreshable materialized view requires Atomic database engine, but database {} has engine {}", create.getDatabase(), database->getEngineName());
|
||||
/// TODO: Support Replicated databases, only with Shared/ReplicatedMergeTree.
|
||||
/// Figure out how to make the refreshed data appear all at once on other
|
||||
/// replicas; maybe a replicated SYSTEM SYNC REPLICA query before the rename?
|
||||
|
||||
if (database->getUUID() != UUIDHelpers::Nil)
|
||||
{
|
||||
@ -1108,7 +1115,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
|
||||
}
|
||||
else
|
||||
{
|
||||
bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
|
||||
bool has_uuid = create.uuid != UUIDHelpers::Nil || create.to_inner_uuid != UUIDHelpers::Nil;
|
||||
if (has_uuid && !is_on_cluster && !internal)
|
||||
{
|
||||
@ -1121,13 +1127,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
|
||||
"{} UUID specified, but engine of database {} is not Atomic", kind, create.getDatabase());
|
||||
}
|
||||
|
||||
if (create.refresh_strategy && database->getEngineName() != "Atomic")
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY,
|
||||
"Refreshable materialized view requires Atomic database engine, but database {} has engine {}", create.getDatabase(), database->getEngineName());
|
||||
/// TODO: Support Replicated databases, only with Shared/ReplicatedMergeTree.
|
||||
/// Figure out how to make the refreshed data appear all at once on other
|
||||
/// replicas; maybe a replicated SYSTEM SYNC REPLICA query before the rename?
|
||||
|
||||
/// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either
|
||||
/// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or
|
||||
/// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts.
|
||||
|
@ -77,6 +77,7 @@ public:
|
||||
const DataTypes & getElementsTypes() const { return set_elements_types; }
|
||||
|
||||
bool hasExplicitSetElements() const { return fill_set_elements || (!set_elements.empty() && set_elements.front()->size() == data.getTotalRowCount()); }
|
||||
bool hasSetElements() const { return !set_elements.empty(); }
|
||||
Columns getSetElements() const { checkIsCreated(); return { set_elements.begin(), set_elements.end() }; }
|
||||
|
||||
void checkColumnsNumber(size_t num_key_columns) const;
|
||||
|
@ -661,7 +661,7 @@ namespace
|
||||
const ActionsDAG::NodeRawConstPtrs & target_expr,
|
||||
ConjunctionMap && conjunction)
|
||||
{
|
||||
auto columns = ActionsDAG::evaluatePartialResult(conjunction, target_expr, false);
|
||||
auto columns = ActionsDAG::evaluatePartialResult(conjunction, target_expr, /* input_rows_count= */ 1, /* throw_on_error= */ false);
|
||||
for (const auto & column : columns)
|
||||
if (!column.column)
|
||||
return {};
|
||||
|
@ -663,15 +663,17 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
if (query_span && query_span->trace_id != UUID{})
|
||||
LOG_TRACE(getLogger("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id);
|
||||
|
||||
/// Used for logging query start time in system.query_log
|
||||
auto query_start_time = std::chrono::system_clock::now();
|
||||
|
||||
/// Used to set the watch in QueryStatus and the output formats. It is not based on query_start_time as that might be based on
|
||||
/// the value passed by the client
|
||||
/// Used for:
|
||||
/// * Setting the watch in QueryStatus (controls timeouts and progress) and the output formats
|
||||
/// * Logging query duration (system.query_log)
|
||||
Stopwatch start_watch{CLOCK_MONOTONIC};
|
||||
|
||||
const auto & client_info = context->getClientInfo();
|
||||
|
||||
if (!internal)
|
||||
if (!internal && client_info.initial_query_start_time == 0)
|
||||
{
|
||||
// If it's not an internal query and we don't see an initial_query_start_time yet, initialize it
|
||||
// to current time. Internal queries are those executed without an independent client context,
|
||||
@ -679,16 +681,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
// possible to have unset initial_query_start_time for non-internal and non-initial queries. For
|
||||
// example, the query is from an initiator that is running an old version of clickhouse.
|
||||
// On the other hand, if it's initialized then take it as the start of the query
|
||||
if (client_info.initial_query_start_time == 0)
|
||||
{
|
||||
context->setInitialQueryStartTime(query_start_time);
|
||||
}
|
||||
else
|
||||
{
|
||||
query_start_time = std::chrono::time_point<std::chrono::system_clock>(
|
||||
std::chrono::microseconds{client_info.initial_query_start_time_microseconds});
|
||||
}
|
||||
}
|
||||
|
||||
assert(internal || CurrentThread::get().getQueryContext());
|
||||
assert(internal || CurrentThread::get().getQueryContext()->getCurrentQueryId() == CurrentThread::getQueryId());
|
||||
@ -933,6 +927,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
reason = "asynchronous insert queue is not configured";
|
||||
else if (insert_query->select)
|
||||
reason = "insert query has select";
|
||||
else if (settings.deduplicate_blocks_in_dependent_materialized_views)
|
||||
reason = "dependent materialized views block deduplication is enabled";
|
||||
else if (insert_query->hasInlinedData())
|
||||
async_insert = true;
|
||||
|
||||
|
@ -71,6 +71,13 @@ namespace
|
||||
size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
|
||||
/// In all known cases secret arguments are consecutive
|
||||
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
|
||||
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
|
||||
std::vector<std::string> nested_maps;
|
||||
|
||||
bool hasSecrets() const
|
||||
{
|
||||
return count != 0 || !nested_maps.empty();
|
||||
}
|
||||
};
|
||||
|
||||
Result getResult() const { return result; }
|
||||
@ -127,6 +134,10 @@ namespace
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
findEncryptionFunctionSecretArguments();
|
||||
}
|
||||
else if (function.name == "url")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findMySQLFunctionSecretArguments()
|
||||
@ -143,6 +154,25 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
|
||||
/// always be at the end). Marks "headers" as secret, if found.
|
||||
size_t excludeS3OrURLNestedMaps()
|
||||
{
|
||||
size_t count = arguments->size();
|
||||
while (count > 0)
|
||||
{
|
||||
const ASTFunction * f = arguments->at(count - 1)->as<ASTFunction>();
|
||||
if (!f)
|
||||
break;
|
||||
if (f->name == "headers")
|
||||
result.nested_maps.push_back(f->name);
|
||||
else if (f->name != "extra_credentials")
|
||||
break;
|
||||
count -= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void findS3FunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
||||
@ -156,9 +186,10 @@ namespace
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// s3('url', NOSIGN, 'format' [, 'compression'])
|
||||
/// s3('url', 'format', 'structure' [, 'compression'])
|
||||
if ((url_arg_idx + 3 <= arguments->size()) && (arguments->size() <= url_arg_idx + 4))
|
||||
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
|
||||
@ -174,9 +205,16 @@ namespace
|
||||
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (url_arg_idx + 2 < count)
|
||||
markSecretArgument(url_arg_idx + 2);
|
||||
}
|
||||
|
||||
void findURLSecretArguments()
|
||||
{
|
||||
if (!isNamedCollectionName(0))
|
||||
excludeS3OrURLNestedMaps();
|
||||
}
|
||||
|
||||
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
|
||||
{
|
||||
if (arg_idx >= arguments->size())
|
||||
@ -347,6 +385,10 @@ namespace
|
||||
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
|
||||
findS3TableEngineSecretArguments();
|
||||
}
|
||||
else if (engine_name == "URL")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findExternalDistributedTableEngineSecretArguments()
|
||||
@ -373,9 +415,10 @@ namespace
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// S3('url', NOSIGN, 'format' [, 'compression'])
|
||||
/// S3('url', 'format', 'compression')
|
||||
if ((3 <= arguments->size()) && (arguments->size() <= 4))
|
||||
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((3 <= count) && (count <= 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(1, &second_arg))
|
||||
@ -383,7 +426,7 @@ namespace
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
return; /// The argument after 'url' is "NOSIGN".
|
||||
|
||||
if (arguments->size() == 3)
|
||||
if (count == 3)
|
||||
{
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
|
||||
@ -391,10 +434,11 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
/// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
|
||||
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (2 < count)
|
||||
markSecretArgument(2);
|
||||
}
|
||||
|
||||
@ -724,6 +768,25 @@ ASTSelectWithUnionQuery * ASTFunction::tryGetQueryArgument() const
|
||||
}
|
||||
|
||||
|
||||
static bool formatNamedArgWithHiddenValue(IAST * arg, const IAST::FormatSettings & settings, IAST::FormatState & state, IAST::FormatStateStacked frame)
|
||||
{
|
||||
const auto * equals_func = arg->as<ASTFunction>();
|
||||
if (!equals_func || (equals_func->name != "equals"))
|
||||
return false;
|
||||
const auto * expr_list = equals_func->arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
return false;
|
||||
const auto & equal_args = expr_list->children;
|
||||
if (equal_args.size() != 2)
|
||||
return false;
|
||||
|
||||
equal_args[0]->formatImpl(settings, state, frame);
|
||||
settings.ostr << (settings.hilite ? IAST::hilite_operator : "") << " = " << (settings.hilite ? IAST::hilite_none : "");
|
||||
settings.ostr << "'[HIDDEN]'";
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
frame.expression_list_prepend_whitespace = false;
|
||||
@ -1133,7 +1196,9 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
||||
if (argument->as<ASTSetQuery>())
|
||||
settings.ostr << "SETTINGS ";
|
||||
|
||||
if (!settings.show_secrets && (secret_arguments.start <= i) && (i < secret_arguments.start + secret_arguments.count))
|
||||
if (!settings.show_secrets)
|
||||
{
|
||||
if (secret_arguments.start <= i && i < secret_arguments.start + secret_arguments.count)
|
||||
{
|
||||
if (secret_arguments.are_named)
|
||||
{
|
||||
@ -1146,6 +1211,24 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
||||
continue;
|
||||
}
|
||||
|
||||
const ASTFunction * function = argument->as<ASTFunction>();
|
||||
if (function && function->arguments && std::count(secret_arguments.nested_maps.begin(), secret_arguments.nested_maps.end(), function->name) != 0)
|
||||
{
|
||||
/// headers('foo' = '[HIDDEN]', 'bar' = '[HIDDEN]')
|
||||
settings.ostr << (settings.hilite ? hilite_function : "") << function->name << (settings.hilite ? hilite_none : "") << "(";
|
||||
for (size_t j = 0; j < function->arguments->children.size(); ++j)
|
||||
{
|
||||
if (j != 0)
|
||||
settings.ostr << ", ";
|
||||
auto inner_arg = function->arguments->children[j];
|
||||
if (!formatNamedArgWithHiddenValue(inner_arg.get(), settings, state, nested_dont_need_parens))
|
||||
inner_arg->formatImpl(settings, state, nested_dont_need_parens);
|
||||
}
|
||||
settings.ostr << ")";
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if ((i == 1) && special_hilite_regexp
|
||||
&& highlightStringLiteralWithMetacharacters(argument, settings, "|()^$.[]?*+{:-"))
|
||||
{
|
||||
@ -1166,7 +1249,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
||||
|
||||
bool ASTFunction::hasSecretParts() const
|
||||
{
|
||||
return (FunctionSecretArgumentsFinder{*this}.getResult().count > 0) || childrenHaveSecretParts();
|
||||
return (FunctionSecretArgumentsFinder{*this}.getResult().hasSecrets()) || childrenHaveSecretParts();
|
||||
}
|
||||
|
||||
String getFunctionName(const IAST * ast)
|
||||
|
@ -100,7 +100,7 @@ public:
|
||||
merged_rows = 0;
|
||||
sum_blocks_granularity = 0;
|
||||
++total_chunks;
|
||||
total_allocated_bytes += chunk.allocatedBytes();
|
||||
total_allocated_bytes += chunk.bytes();
|
||||
need_flush = false;
|
||||
|
||||
return chunk;
|
||||
@ -122,7 +122,7 @@ public:
|
||||
{
|
||||
size_t merged_bytes = 0;
|
||||
for (const auto & column : columns)
|
||||
merged_bytes += column->allocatedBytes();
|
||||
merged_bytes += column->byteSize();
|
||||
if (merged_bytes >= max_block_size_bytes)
|
||||
return true;
|
||||
}
|
||||
|
@ -157,6 +157,34 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::Subqueries subque
|
||||
query_plan.unitePlans(std::move(creating_sets), std::move(plans));
|
||||
}
|
||||
|
||||
QueryPipelineBuilderPtr addCreatingSetsTransform(QueryPipelineBuilderPtr pipeline, PreparedSets::Subqueries subqueries, ContextPtr context)
|
||||
{
|
||||
DataStreams input_streams;
|
||||
input_streams.emplace_back(DataStream{pipeline->getHeader()});
|
||||
|
||||
QueryPipelineBuilders pipelines;
|
||||
pipelines.reserve(1 + subqueries.size());
|
||||
pipelines.push_back(std::move(pipeline));
|
||||
|
||||
auto plan_settings = QueryPlanOptimizationSettings::fromContext(context);
|
||||
auto pipeline_settings = BuildQueryPipelineSettings::fromContext(context);
|
||||
|
||||
for (auto & future_set : subqueries)
|
||||
{
|
||||
if (future_set->get())
|
||||
continue;
|
||||
|
||||
auto plan = future_set->build(context);
|
||||
if (!plan)
|
||||
continue;
|
||||
|
||||
input_streams.emplace_back(plan->getCurrentDataStream());
|
||||
pipelines.emplace_back(plan->buildQueryPipeline(plan_settings, pipeline_settings));
|
||||
}
|
||||
|
||||
return CreatingSetsStep(input_streams).updatePipeline(std::move(pipelines), pipeline_settings);
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<QueryPlan>> DelayedCreatingSetsStep::makePlansForSets(DelayedCreatingSetsStep && step)
|
||||
{
|
||||
std::vector<std::unique_ptr<QueryPlan>> plans;
|
||||
|
@ -72,4 +72,6 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::Subqueries subque
|
||||
|
||||
void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context);
|
||||
|
||||
QueryPipelineBuilderPtr addCreatingSetsTransform(QueryPipelineBuilderPtr pipeline, PreparedSets::Subqueries subqueries, ContextPtr context);
|
||||
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include <Processors/Sources/DelayedSource.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include "Common/logger_useful.h"
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
#include <Core/QueryProcessingStage.h>
|
||||
#include <Client/ConnectionPool.h>
|
||||
@ -375,10 +375,11 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
|
||||
, storage_limits(std::move(storage_limits_))
|
||||
, log(log_)
|
||||
{
|
||||
std::vector<String> description;
|
||||
chassert(cluster->getShardCount() == 1);
|
||||
|
||||
for (const auto & address : cluster->getShardsAddresses())
|
||||
description.push_back(fmt::format("Replica: {}", address[0].host_name));
|
||||
std::vector<String> description;
|
||||
for (const auto & pool : cluster->getShardsInfo().front().per_replica_pools)
|
||||
description.push_back(fmt::format("Replica: {}", pool->getHost()));
|
||||
|
||||
setStepDescription(boost::algorithm::join(description, ", "));
|
||||
}
|
||||
@ -399,51 +400,44 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
|
||||
const Settings & current_settings = context->getSettingsRef();
|
||||
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
|
||||
|
||||
const auto & shard = cluster->getShardsInfo().at(0);
|
||||
size_t all_replicas_count = current_settings.max_parallel_replicas;
|
||||
if (all_replicas_count > cluster->getShardsInfo().size())
|
||||
if (all_replicas_count > shard.getAllNodeCount())
|
||||
{
|
||||
LOG_INFO(getLogger("ReadFromParallelRemoteReplicasStep"),
|
||||
"The number of replicas requested ({}) is bigger than the real number available in the cluster ({}). "\
|
||||
"Will use the latter number to execute the query.", current_settings.max_parallel_replicas, cluster->getShardsInfo().size());
|
||||
all_replicas_count = cluster->getShardsInfo().size();
|
||||
LOG_INFO(
|
||||
getLogger("ReadFromParallelRemoteReplicasStep"),
|
||||
"The number of replicas requested ({}) is bigger than the real number available in the cluster ({}). "
|
||||
"Will use the latter number to execute the query.",
|
||||
current_settings.max_parallel_replicas,
|
||||
shard.getAllNodeCount());
|
||||
all_replicas_count = shard.getAllNodeCount();
|
||||
}
|
||||
|
||||
/// Find local shard. It might happen that there is no local shard, but that's fine
|
||||
for (const auto & shard: cluster->getShardsInfo())
|
||||
|
||||
std::vector<ConnectionPoolWithFailover::Base::ShuffledPool> shuffled_pool;
|
||||
if (all_replicas_count < shard.getAllNodeCount())
|
||||
{
|
||||
if (shard.isLocal())
|
||||
shuffled_pool = shard.pool->getShuffledPools(current_settings);
|
||||
shuffled_pool.resize(all_replicas_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// try to preserve replicas order if all replicas in cluster are used for query execution
|
||||
/// it's important for data locality during query execution
|
||||
auto priority_func = [](size_t i) { return Priority{static_cast<Int64>(i)}; };
|
||||
shuffled_pool = shard.pool->getShuffledPools(current_settings, priority_func);
|
||||
}
|
||||
|
||||
for (size_t i=0; i < all_replicas_count; ++i)
|
||||
{
|
||||
IConnections::ReplicaInfo replica_info
|
||||
{
|
||||
.all_replicas_count = all_replicas_count,
|
||||
/// `shard_num` will be equal to the number of the given replica in the cluster (set by `Cluster::getClusterWithReplicasAsShards`).
|
||||
/// we should use this number specifically because efficiency of data distribution by consistent hash depends on it.
|
||||
.number_of_current_replica = shard.shard_num - 1,
|
||||
.number_of_current_replica = i,
|
||||
};
|
||||
|
||||
addPipeForSingeReplica(pipes, shard.pool, replica_info);
|
||||
}
|
||||
}
|
||||
|
||||
auto current_shard = cluster->getShardsInfo().begin();
|
||||
while (pipes.size() != all_replicas_count)
|
||||
{
|
||||
if (current_shard->isLocal())
|
||||
{
|
||||
++current_shard;
|
||||
continue;
|
||||
}
|
||||
|
||||
IConnections::ReplicaInfo replica_info
|
||||
{
|
||||
.all_replicas_count = all_replicas_count,
|
||||
/// `shard_num` will be equal to the number of the given replica in the cluster (set by `Cluster::getClusterWithReplicasAsShards`).
|
||||
/// we should use this number specifically because efficiency of data distribution by consistent hash depends on it.
|
||||
.number_of_current_replica = current_shard->shard_num - 1,
|
||||
};
|
||||
|
||||
addPipeForSingeReplica(pipes, current_shard->pool, replica_info);
|
||||
++current_shard;
|
||||
addPipeForSingeReplica(pipes, shuffled_pool[i].pool, replica_info);
|
||||
}
|
||||
|
||||
auto pipe = Pipe::unitePipes(std::move(pipes));
|
||||
@ -456,7 +450,8 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
|
||||
}
|
||||
|
||||
|
||||
void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(Pipes & pipes, std::shared_ptr<ConnectionPoolWithFailover> pool, IConnections::ReplicaInfo replica_info)
|
||||
void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(
|
||||
Pipes & pipes, const ConnectionPoolPtr & pool, IConnections::ReplicaInfo replica_info)
|
||||
{
|
||||
bool add_agg_info = stage == QueryProcessingStage::WithMergeableState;
|
||||
bool add_totals = false;
|
||||
@ -476,7 +471,14 @@ void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(Pipes & pipes, s
|
||||
assert(output_stream);
|
||||
|
||||
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
|
||||
pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage,
|
||||
pool,
|
||||
query_string,
|
||||
output_stream->header,
|
||||
context,
|
||||
throttler,
|
||||
scalars,
|
||||
external_tables,
|
||||
stage,
|
||||
RemoteQueryExecutor::Extension{.parallel_reading_coordinator = coordinator, .replica_info = std::move(replica_info)});
|
||||
|
||||
remote_query_executor->setLogger(log);
|
||||
|
@ -9,10 +9,6 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ConnectionPoolWithFailover;
|
||||
using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>;
|
||||
|
||||
class Throttler;
|
||||
using ThrottlerPtr = std::shared_ptr<Throttler>;
|
||||
|
||||
@ -91,8 +87,7 @@ public:
|
||||
void enforceAggregationInOrder();
|
||||
|
||||
private:
|
||||
|
||||
void addPipeForSingeReplica(Pipes & pipes, std::shared_ptr<ConnectionPoolWithFailover> pool, IConnections::ReplicaInfo replica_info);
|
||||
void addPipeForSingeReplica(Pipes & pipes, const ConnectionPoolPtr & pool, IConnections::ReplicaInfo replica_info);
|
||||
|
||||
ClusterPtr cluster;
|
||||
ASTPtr query_ast;
|
||||
|
@ -11,8 +11,9 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
ITTLAlgorithm::ITTLAlgorithm(
|
||||
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
|
||||
: description(description_)
|
||||
const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
|
||||
: ttl_expressions(ttl_expressions_)
|
||||
, description(description_)
|
||||
, old_ttl_info(old_ttl_info_)
|
||||
, current_time(current_time_)
|
||||
, force(force_)
|
||||
|
@ -8,6 +8,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct TTLExpressions
|
||||
{
|
||||
ExpressionActionsPtr expression;
|
||||
ExpressionActionsPtr where_expression;
|
||||
};
|
||||
|
||||
/**
|
||||
* Represents the actions, which are required to do
|
||||
* with data, when TTL is expired: delete, aggregate, etc.
|
||||
@ -18,7 +24,7 @@ public:
|
||||
using TTLInfo = IMergeTreeDataPart::TTLInfo;
|
||||
using MutableDataPartPtr = MergeTreeMutableDataPartPtr;
|
||||
|
||||
ITTLAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
|
||||
ITTLAlgorithm(const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
|
||||
virtual ~ITTLAlgorithm() = default;
|
||||
|
||||
virtual void execute(Block & block) = 0;
|
||||
@ -39,6 +45,7 @@ protected:
|
||||
bool isTTLExpired(time_t ttl) const;
|
||||
UInt32 getTimestampByIndex(const IColumn * column, size_t index) const;
|
||||
|
||||
const TTLExpressions ttl_expressions;
|
||||
const TTLDescription description;
|
||||
const TTLInfo old_ttl_info;
|
||||
const time_t current_time;
|
||||
|
@ -5,13 +5,14 @@ namespace DB
|
||||
{
|
||||
|
||||
TTLAggregationAlgorithm::TTLAggregationAlgorithm(
|
||||
const TTLExpressions & ttl_expressions_,
|
||||
const TTLDescription & description_,
|
||||
const TTLInfo & old_ttl_info_,
|
||||
time_t current_time_,
|
||||
bool force_,
|
||||
const Block & header_,
|
||||
const MergeTreeData & storage_)
|
||||
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
|
||||
: ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
|
||||
, header(header_)
|
||||
{
|
||||
current_key_value.resize(description.group_by_keys.size());
|
||||
@ -75,8 +76,8 @@ void TTLAggregationAlgorithm::execute(Block & block)
|
||||
const auto & column_names = header.getNames();
|
||||
MutableColumns aggregate_columns = header.cloneEmptyColumns();
|
||||
|
||||
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
|
||||
auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
|
||||
auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
|
||||
auto where_column = executeExpressionAndGetColumn(ttl_expressions.where_expression, block, description.where_result_column);
|
||||
|
||||
size_t rows_aggregated = 0;
|
||||
size_t current_key_start = 0;
|
||||
@ -157,8 +158,8 @@ void TTLAggregationAlgorithm::execute(Block & block)
|
||||
/// If some rows were aggregated we have to recalculate ttl info's
|
||||
if (some_rows_were_aggregated)
|
||||
{
|
||||
auto ttl_column_after_aggregation = executeExpressionAndGetColumn(description.expression, block, description.result_column);
|
||||
auto where_column_after_aggregation = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
|
||||
auto ttl_column_after_aggregation = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
|
||||
auto where_column_after_aggregation = executeExpressionAndGetColumn(ttl_expressions.where_expression, block, description.where_result_column);
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
bool where_filter_passed = !where_column_after_aggregation || where_column_after_aggregation->getBool(i);
|
||||
|
@ -13,6 +13,7 @@ class TTLAggregationAlgorithm final : public ITTLAlgorithm
|
||||
{
|
||||
public:
|
||||
TTLAggregationAlgorithm(
|
||||
const TTLExpressions & ttl_expressions_,
|
||||
const TTLDescription & description_,
|
||||
const TTLInfo & old_ttl_info_,
|
||||
time_t current_time_,
|
||||
|
@ -4,6 +4,7 @@ namespace DB
|
||||
{
|
||||
|
||||
TTLColumnAlgorithm::TTLColumnAlgorithm(
|
||||
const TTLExpressions & ttl_expressions_,
|
||||
const TTLDescription & description_,
|
||||
const TTLInfo & old_ttl_info_,
|
||||
time_t current_time_,
|
||||
@ -12,7 +13,7 @@ TTLColumnAlgorithm::TTLColumnAlgorithm(
|
||||
const ExpressionActionsPtr & default_expression_,
|
||||
const String & default_column_name_,
|
||||
bool is_compact_part_)
|
||||
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
|
||||
: ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
|
||||
, column_name(column_name_)
|
||||
, default_expression(default_expression_)
|
||||
, default_column_name(default_column_name_)
|
||||
@ -49,7 +50,7 @@ void TTLColumnAlgorithm::execute(Block & block)
|
||||
if (default_column)
|
||||
default_column = default_column->convertToFullColumnIfConst();
|
||||
|
||||
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
|
||||
auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
|
||||
|
||||
auto & column_with_type = block.getByName(column_name);
|
||||
const IColumn * values_column = column_with_type.column.get();
|
||||
|
@ -11,6 +11,7 @@ class TTLColumnAlgorithm final : public ITTLAlgorithm
|
||||
{
|
||||
public:
|
||||
TTLColumnAlgorithm(
|
||||
const TTLExpressions & ttl_expressions_,
|
||||
const TTLDescription & description_,
|
||||
const TTLInfo & old_ttl_info_,
|
||||
time_t current_time_,
|
||||
|
@ -4,8 +4,8 @@ namespace DB
|
||||
{
|
||||
|
||||
TTLDeleteAlgorithm::TTLDeleteAlgorithm(
|
||||
const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
|
||||
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
|
||||
const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_)
|
||||
: ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
|
||||
{
|
||||
if (!isMinTTLExpired())
|
||||
new_ttl_info = old_ttl_info;
|
||||
@ -19,8 +19,8 @@ void TTLDeleteAlgorithm::execute(Block & block)
|
||||
if (!block || !isMinTTLExpired())
|
||||
return;
|
||||
|
||||
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
|
||||
auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column);
|
||||
auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
|
||||
auto where_column = executeExpressionAndGetColumn(ttl_expressions.where_expression, block, description.where_result_column);
|
||||
|
||||
MutableColumns result_columns;
|
||||
const auto & column_names = block.getNames();
|
||||
@ -54,7 +54,7 @@ void TTLDeleteAlgorithm::execute(Block & block)
|
||||
|
||||
void TTLDeleteAlgorithm::finalize(const MutableDataPartPtr & data_part) const
|
||||
{
|
||||
if (description.where_expression)
|
||||
if (ttl_expressions.where_expression)
|
||||
data_part->ttl_infos.rows_where_ttl[description.result_column] = new_ttl_info;
|
||||
else
|
||||
data_part->ttl_infos.table_ttl = new_ttl_info;
|
||||
|
@ -10,7 +10,7 @@ namespace DB
|
||||
class TTLDeleteAlgorithm final : public ITTLAlgorithm
|
||||
{
|
||||
public:
|
||||
TTLDeleteAlgorithm(const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
|
||||
TTLDeleteAlgorithm(const TTLExpressions & ttl_expressions_, const TTLDescription & description_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_);
|
||||
|
||||
void execute(Block & block) override;
|
||||
void finalize(const MutableDataPartPtr & data_part) const override;
|
||||
|
@ -4,13 +4,14 @@ namespace DB
|
||||
{
|
||||
|
||||
TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm(
|
||||
const TTLExpressions & ttl_expressions_,
|
||||
const TTLDescription & description_,
|
||||
const TTLUpdateField ttl_update_field_,
|
||||
const String ttl_update_key_,
|
||||
const TTLInfo & old_ttl_info_,
|
||||
time_t current_time_,
|
||||
bool force_)
|
||||
: ITTLAlgorithm(description_, old_ttl_info_, current_time_, force_)
|
||||
: ITTLAlgorithm(ttl_expressions_, description_, old_ttl_info_, current_time_, force_)
|
||||
, ttl_update_field(ttl_update_field_)
|
||||
, ttl_update_key(ttl_update_key_)
|
||||
{
|
||||
@ -21,7 +22,7 @@ void TTLUpdateInfoAlgorithm::execute(Block & block)
|
||||
if (!block)
|
||||
return;
|
||||
|
||||
auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column);
|
||||
auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = ITTLAlgorithm::getTimestampByIndex(ttl_column.get(), i);
|
||||
|
@ -20,6 +20,7 @@ class TTLUpdateInfoAlgorithm : public ITTLAlgorithm
|
||||
{
|
||||
public:
|
||||
TTLUpdateInfoAlgorithm(
|
||||
const TTLExpressions & ttl_expressions_,
|
||||
const TTLDescription & description_,
|
||||
const TTLUpdateField ttl_update_field_,
|
||||
const String ttl_update_key_,
|
||||
|
@ -145,10 +145,14 @@ void ColumnGathererStream::gather(Column & column_res)
|
||||
|
||||
next_required_source = -1;
|
||||
|
||||
while (row_source_pos < row_sources_end
|
||||
&& column_res.size() < block_preferred_size_rows
|
||||
&& column_res.allocatedBytes() < block_preferred_size_bytes)
|
||||
|
||||
/// We use do ... while here to ensure there will be at least one iteration of this loop.
|
||||
/// Because the column_res.byteSize() could be bigger than block_preferred_size_bytes already at this point.
|
||||
do
|
||||
{
|
||||
if (row_source_pos >= row_sources_end)
|
||||
break;
|
||||
|
||||
RowSourcePart row_source = *row_source_pos;
|
||||
size_t source_num = row_source.getSourceNum();
|
||||
Source & source = sources[source_num];
|
||||
@ -191,7 +195,7 @@ void ColumnGathererStream::gather(Column & column_res)
|
||||
}
|
||||
|
||||
source.pos += len;
|
||||
}
|
||||
} while (column_res.size() < block_preferred_size_rows && column_res.byteSize() < block_preferred_size_bytes);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -4,7 +4,24 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static TTLExpressions getExpressions(const TTLDescription & ttl_descr, PreparedSets::Subqueries & subqueries_for_sets, const ContextPtr & context)
|
||||
{
|
||||
auto expr = ttl_descr.buildExpression(context);
|
||||
auto expr_queries = expr.sets->getSubqueries();
|
||||
subqueries_for_sets.insert(subqueries_for_sets.end(), expr_queries.begin(), expr_queries.end());
|
||||
|
||||
auto where_expr = ttl_descr.buildWhereExpression(context);
|
||||
if (where_expr.sets)
|
||||
{
|
||||
auto where_expr_queries = where_expr.sets->getSubqueries();
|
||||
subqueries_for_sets.insert(subqueries_for_sets.end(), where_expr_queries.begin(), where_expr_queries.end());
|
||||
}
|
||||
|
||||
return {expr.expression, where_expr.expression};
|
||||
}
|
||||
|
||||
TTLCalcTransform::TTLCalcTransform(
|
||||
const ContextPtr & context,
|
||||
const Block & header_,
|
||||
const MergeTreeData & storage_,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
@ -21,33 +38,39 @@ TTLCalcTransform::TTLCalcTransform(
|
||||
{
|
||||
const auto & rows_ttl = metadata_snapshot_->getRowsTTL();
|
||||
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
|
||||
rows_ttl, TTLUpdateField::TABLE_TTL, rows_ttl.result_column, old_ttl_infos.table_ttl, current_time_, force_));
|
||||
getExpressions(rows_ttl, subqueries_for_sets, context), rows_ttl,
|
||||
TTLUpdateField::TABLE_TTL, rows_ttl.result_column, old_ttl_infos.table_ttl, current_time_, force_));
|
||||
}
|
||||
|
||||
for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
|
||||
where_ttl, TTLUpdateField::ROWS_WHERE_TTL, where_ttl.result_column, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
|
||||
getExpressions(where_ttl, subqueries_for_sets, context), where_ttl,
|
||||
TTLUpdateField::ROWS_WHERE_TTL, where_ttl.result_column, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
|
||||
|
||||
for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
|
||||
group_by_ttl, TTLUpdateField::GROUP_BY_TTL, group_by_ttl.result_column, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_));
|
||||
getExpressions(group_by_ttl, subqueries_for_sets, context), group_by_ttl,
|
||||
TTLUpdateField::GROUP_BY_TTL, group_by_ttl.result_column, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_));
|
||||
|
||||
if (metadata_snapshot_->hasAnyColumnTTL())
|
||||
{
|
||||
for (const auto & [name, description] : metadata_snapshot_->getColumnTTLs())
|
||||
{
|
||||
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
|
||||
description, TTLUpdateField::COLUMNS_TTL, name, old_ttl_infos.columns_ttl[name], current_time_, force_));
|
||||
getExpressions(description, subqueries_for_sets, context), description,
|
||||
TTLUpdateField::COLUMNS_TTL, name, old_ttl_infos.columns_ttl[name], current_time_, force_));
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
|
||||
move_ttl, TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
|
||||
getExpressions(move_ttl, subqueries_for_sets, context), move_ttl,
|
||||
TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
|
||||
|
||||
for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
|
||||
recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
|
||||
getExpressions(recompression_ttl, subqueries_for_sets, context), recompression_ttl,
|
||||
TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
|
||||
}
|
||||
|
||||
void TTLCalcTransform::consume(Chunk chunk)
|
||||
|
@ -15,6 +15,7 @@ class TTLCalcTransform : public IAccumulatingTransform
|
||||
{
|
||||
public:
|
||||
TTLCalcTransform(
|
||||
const ContextPtr & context,
|
||||
const Block & header_,
|
||||
const MergeTreeData & storage_,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
@ -23,6 +24,8 @@ public:
|
||||
bool force_
|
||||
);
|
||||
|
||||
PreparedSets::Subqueries getSubqueries() { return std::move(subqueries_for_sets); }
|
||||
|
||||
String getName() const override { return "TTL_CALC"; }
|
||||
Status prepare() override;
|
||||
|
||||
@ -35,6 +38,7 @@ protected:
|
||||
|
||||
private:
|
||||
std::vector<TTLAlgorithmPtr> algorithms;
|
||||
PreparedSets::Subqueries subqueries_for_sets;
|
||||
|
||||
/// ttl_infos and empty_columns are updating while reading
|
||||
const MergeTreeData::MutableDataPartPtr & data_part;
|
||||
|
@ -16,7 +16,24 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static TTLExpressions getExpressions(const TTLDescription & ttl_descr, PreparedSets::Subqueries & subqueries_for_sets, const ContextPtr & context)
|
||||
{
|
||||
auto expr = ttl_descr.buildExpression(context);
|
||||
auto expr_queries = expr.sets->getSubqueries();
|
||||
subqueries_for_sets.insert(subqueries_for_sets.end(), expr_queries.begin(), expr_queries.end());
|
||||
|
||||
auto where_expr = ttl_descr.buildWhereExpression(context);
|
||||
if (where_expr.sets)
|
||||
{
|
||||
auto where_expr_queries = where_expr.sets->getSubqueries();
|
||||
subqueries_for_sets.insert(subqueries_for_sets.end(), where_expr_queries.begin(), where_expr_queries.end());
|
||||
}
|
||||
|
||||
return {expr.expression, where_expr.expression};
|
||||
}
|
||||
|
||||
TTLTransform::TTLTransform(
|
||||
const ContextPtr & context,
|
||||
const Block & header_,
|
||||
const MergeTreeData & storage_,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
@ -33,10 +50,11 @@ TTLTransform::TTLTransform(
|
||||
{
|
||||
const auto & rows_ttl = metadata_snapshot_->getRowsTTL();
|
||||
auto algorithm = std::make_unique<TTLDeleteAlgorithm>(
|
||||
rows_ttl, old_ttl_infos.table_ttl, current_time_, force_);
|
||||
getExpressions(rows_ttl, subqueries_for_sets, context), rows_ttl,
|
||||
old_ttl_infos.table_ttl, current_time_, force_);
|
||||
|
||||
/// Skip all data if table ttl is expired for part
|
||||
if (algorithm->isMaxTTLExpired() && !rows_ttl.where_expression)
|
||||
if (algorithm->isMaxTTLExpired() && !rows_ttl.where_expression_ast)
|
||||
all_data_dropped = true;
|
||||
|
||||
delete_algorithm = algorithm.get();
|
||||
@ -45,11 +63,13 @@ TTLTransform::TTLTransform(
|
||||
|
||||
for (const auto & where_ttl : metadata_snapshot_->getRowsWhereTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLDeleteAlgorithm>(
|
||||
where_ttl, old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
|
||||
getExpressions(where_ttl, subqueries_for_sets, context), where_ttl,
|
||||
old_ttl_infos.rows_where_ttl[where_ttl.result_column], current_time_, force_));
|
||||
|
||||
for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLAggregationAlgorithm>(
|
||||
group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_,
|
||||
getExpressions(group_by_ttl, subqueries_for_sets, context), group_by_ttl,
|
||||
old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_,
|
||||
getInputPort().getHeader(), storage_));
|
||||
|
||||
if (metadata_snapshot_->hasAnyColumnTTL())
|
||||
@ -75,18 +95,21 @@ TTLTransform::TTLTransform(
|
||||
}
|
||||
|
||||
algorithms.emplace_back(std::make_unique<TTLColumnAlgorithm>(
|
||||
description, old_ttl_infos.columns_ttl[name], current_time_,
|
||||
getExpressions(description, subqueries_for_sets, context), description,
|
||||
old_ttl_infos.columns_ttl[name], current_time_,
|
||||
force_, name, default_expression, default_column_name, isCompactPart(data_part)));
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & move_ttl : metadata_snapshot_->getMoveTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
|
||||
move_ttl, TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
|
||||
getExpressions(move_ttl, subqueries_for_sets, context), move_ttl,
|
||||
TTLUpdateField::MOVES_TTL, move_ttl.result_column, old_ttl_infos.moves_ttl[move_ttl.result_column], current_time_, force_));
|
||||
|
||||
for (const auto & recompression_ttl : metadata_snapshot_->getRecompressionTTLs())
|
||||
algorithms.emplace_back(std::make_unique<TTLUpdateInfoAlgorithm>(
|
||||
recompression_ttl, TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
|
||||
getExpressions(recompression_ttl, subqueries_for_sets, context), recompression_ttl,
|
||||
TTLUpdateField::RECOMPRESSION_TTL, recompression_ttl.result_column, old_ttl_infos.recompression_ttl[recompression_ttl.result_column], current_time_, force_));
|
||||
}
|
||||
|
||||
Block reorderColumns(Block block, const Block & header)
|
||||
|
@ -16,6 +16,7 @@ class TTLTransform : public IAccumulatingTransform
|
||||
{
|
||||
public:
|
||||
TTLTransform(
|
||||
const ContextPtr & context,
|
||||
const Block & header_,
|
||||
const MergeTreeData & storage_,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
@ -28,6 +29,8 @@ public:
|
||||
|
||||
Status prepare() override;
|
||||
|
||||
PreparedSets::Subqueries getSubqueries() { return std::move(subqueries_for_sets); }
|
||||
|
||||
protected:
|
||||
void consume(Chunk chunk) override;
|
||||
Chunk generate() override;
|
||||
@ -40,6 +43,8 @@ private:
|
||||
const TTLDeleteAlgorithm * delete_algorithm = nullptr;
|
||||
bool all_data_dropped = false;
|
||||
|
||||
PreparedSets::Subqueries subqueries_for_sets;
|
||||
|
||||
/// ttl_infos and empty_columns are updating while reading
|
||||
const MergeTreeData::MutableDataPartPtr & data_part;
|
||||
LoggerPtr log;
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include "Core/Protocol.h"
|
||||
#include <Core/Protocol.h>
|
||||
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
|
||||
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
|
||||
#include <Processors/Sources/SourceFromSingleChunk.h>
|
||||
@ -17,6 +17,7 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/InternalTextLogsQueue.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <Client/ConnectionEstablisher.h>
|
||||
#include <Client/MultiplexedConnections.h>
|
||||
#include <Client/HedgedConnections.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
|
||||
@ -29,6 +30,7 @@ namespace ProfileEvents
|
||||
extern const Event SuspendSendingQueryToShard;
|
||||
extern const Event ReadTaskRequestsReceived;
|
||||
extern const Event MergeTreeReadTaskRequestsReceived;
|
||||
extern const Event ParallelReplicasAvailableCount;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -62,6 +64,55 @@ RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
{
|
||||
}
|
||||
|
||||
RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
ConnectionPoolPtr pool,
|
||||
const String & query_,
|
||||
const Block & header_,
|
||||
ContextPtr context_,
|
||||
ThrottlerPtr throttler,
|
||||
const Scalars & scalars_,
|
||||
const Tables & external_tables_,
|
||||
QueryProcessingStage::Enum stage_,
|
||||
std::optional<Extension> extension_)
|
||||
: RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_)
|
||||
{
|
||||
create_connections = [this, pool, throttler, extension_](AsyncCallback)
|
||||
{
|
||||
const Settings & current_settings = context->getSettingsRef();
|
||||
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
|
||||
|
||||
ConnectionPoolWithFailover::TryResult result;
|
||||
std::string fail_message;
|
||||
if (main_table)
|
||||
{
|
||||
auto table_name = main_table.getQualifiedName();
|
||||
|
||||
ConnectionEstablisher connection_establisher(pool, &timeouts, current_settings, log, &table_name);
|
||||
connection_establisher.run(result, fail_message);
|
||||
}
|
||||
else
|
||||
{
|
||||
ConnectionEstablisher connection_establisher(pool, &timeouts, current_settings, log, nullptr);
|
||||
connection_establisher.run(result, fail_message);
|
||||
}
|
||||
|
||||
std::vector<IConnectionPool::Entry> connection_entries;
|
||||
if (!result.entry.isNull() && result.is_usable)
|
||||
{
|
||||
if (extension_ && extension_->parallel_reading_coordinator)
|
||||
ProfileEvents::increment(ProfileEvents::ParallelReplicasAvailableCount);
|
||||
|
||||
connection_entries.emplace_back(std::move(result.entry));
|
||||
}
|
||||
|
||||
auto res = std::make_unique<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler);
|
||||
if (extension_ && extension_->replica_info)
|
||||
res->setReplicaInfo(*extension_->replica_info);
|
||||
|
||||
return res;
|
||||
};
|
||||
}
|
||||
|
||||
RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
Connection & connection,
|
||||
const String & query_,
|
||||
|
@ -52,6 +52,18 @@ public:
|
||||
std::optional<IConnections::ReplicaInfo> replica_info = {};
|
||||
};
|
||||
|
||||
/// Takes a connection pool for a node (not cluster)
|
||||
RemoteQueryExecutor(
|
||||
ConnectionPoolPtr pool,
|
||||
const String & query_,
|
||||
const Block & header_,
|
||||
ContextPtr context_,
|
||||
ThrottlerPtr throttler = nullptr,
|
||||
const Scalars & scalars_ = Scalars(),
|
||||
const Tables & external_tables_ = Tables(),
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete,
|
||||
std::optional<Extension> extension_ = std::nullopt);
|
||||
|
||||
/// Takes already set connection.
|
||||
RemoteQueryExecutor(
|
||||
Connection & connection,
|
||||
|
@ -76,7 +76,7 @@ namespace
|
||||
static std::once_flag once_flag;
|
||||
std::call_once(once_flag, [&config]
|
||||
{
|
||||
static LoggerPtr logger = getLogger("grpc");
|
||||
static LoggerRawPtr logger = getRawLogger("grpc");
|
||||
gpr_set_log_function([](gpr_log_func_args* args)
|
||||
{
|
||||
if (args->severity == GPR_LOG_SEVERITY_DEBUG)
|
||||
@ -622,7 +622,7 @@ namespace
|
||||
class Call
|
||||
{
|
||||
public:
|
||||
Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerPtr log_);
|
||||
Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerRawPtr log_);
|
||||
~Call();
|
||||
|
||||
void start(const std::function<void(void)> & on_finish_call_callback);
|
||||
@ -664,7 +664,7 @@ namespace
|
||||
const CallType call_type;
|
||||
std::unique_ptr<BaseResponder> responder;
|
||||
IServer & iserver;
|
||||
LoggerPtr log = nullptr;
|
||||
LoggerRawPtr log = nullptr;
|
||||
|
||||
std::optional<Session> session;
|
||||
ContextMutablePtr query_context;
|
||||
@ -726,7 +726,7 @@ namespace
|
||||
};
|
||||
// NOLINTEND(clang-analyzer-optin.performance.Padding)
|
||||
|
||||
Call::Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerPtr log_)
|
||||
Call::Call(CallType call_type_, std::unique_ptr<BaseResponder> responder_, IServer & iserver_, LoggerRawPtr log_)
|
||||
: call_type(call_type_), responder(std::move(responder_)), iserver(iserver_), log(log_)
|
||||
{
|
||||
}
|
||||
@ -1851,7 +1851,7 @@ private:
|
||||
GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & address_to_listen_)
|
||||
: iserver(iserver_)
|
||||
, address_to_listen(address_to_listen_)
|
||||
, log(getLogger("GRPCServer"))
|
||||
, log(getRawLogger("GRPCServer"))
|
||||
, runner(std::make_unique<Runner>(*this))
|
||||
{}
|
||||
|
||||
|
@ -3,10 +3,11 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_GRPC
|
||||
|
||||
#include "clickhouse_grpc.grpc.pb.h"
|
||||
#include <Poco/Net/SocketAddress.h>
|
||||
#include <base/types.h>
|
||||
#include <Common/Logger.h>
|
||||
#include "clickhouse_grpc.grpc.pb.h"
|
||||
|
||||
namespace Poco { class Logger; }
|
||||
|
||||
@ -47,7 +48,7 @@ private:
|
||||
|
||||
IServer & iserver;
|
||||
const Poco::Net::SocketAddress address_to_listen;
|
||||
LoggerPtr log;
|
||||
LoggerRawPtr log;
|
||||
GRPCService grpc_service;
|
||||
std::unique_ptr<grpc::Server> grpc_server;
|
||||
std::unique_ptr<grpc::ServerCompletionQueue> queue;
|
||||
|
@ -933,7 +933,7 @@ void TCPHandler::processInsertQuery()
|
||||
if (auto table = DatabaseCatalog::instance().tryGetTable(insert_query.table_id, query_context))
|
||||
async_insert_enabled |= table->areAsynchronousInsertsEnabled();
|
||||
|
||||
if (insert_queue && async_insert_enabled && !insert_query.select)
|
||||
if (insert_queue && async_insert_enabled && !insert_query.select && !settings.deduplicate_blocks_in_dependent_materialized_views)
|
||||
{
|
||||
auto result = processAsyncInsertQuery(*insert_queue);
|
||||
if (result.status == AsynchronousInsertQueue::PushResult::OK)
|
||||
|
@ -33,6 +33,9 @@
|
||||
#include <Processors/Transforms/TTLCalcTransform.h>
|
||||
#include <Processors/Transforms/DistinctSortedTransform.h>
|
||||
#include <Processors/Transforms/DistinctTransform.h>
|
||||
#include <Processors/QueryPlan/CreatingSetsStep.h>
|
||||
#include <Interpreters/PreparedSets.h>
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -1056,13 +1059,14 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
|
||||
break;
|
||||
}
|
||||
|
||||
auto res_pipe = Pipe::unitePipes(std::move(pipes));
|
||||
res_pipe.addTransform(std::move(merged_transform));
|
||||
auto builder = std::make_unique<QueryPipelineBuilder>();
|
||||
builder->init(Pipe::unitePipes(std::move(pipes)));
|
||||
builder->addTransform(std::move(merged_transform));
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!sort_description.empty())
|
||||
{
|
||||
res_pipe.addSimpleTransform([&](const Block & header_)
|
||||
builder->addSimpleTransform([&](const Block & header_)
|
||||
{
|
||||
auto transform = std::make_shared<CheckSortedTransform>(header_, sort_description);
|
||||
return transform;
|
||||
@ -1084,26 +1088,34 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
|
||||
}
|
||||
|
||||
if (DistinctSortedTransform::isApplicable(header, sort_description, global_ctx->deduplicate_by_columns))
|
||||
res_pipe.addTransform(std::make_shared<DistinctSortedTransform>(
|
||||
res_pipe.getHeader(), sort_description, SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
|
||||
builder->addTransform(std::make_shared<DistinctSortedTransform>(
|
||||
builder->getHeader(), sort_description, SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
|
||||
else
|
||||
res_pipe.addTransform(std::make_shared<DistinctTransform>(
|
||||
res_pipe.getHeader(), SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
|
||||
builder->addTransform(std::make_shared<DistinctTransform>(
|
||||
builder->getHeader(), SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
|
||||
}
|
||||
|
||||
PreparedSets::Subqueries subqueries;
|
||||
|
||||
if (ctx->need_remove_expired_values)
|
||||
res_pipe.addTransform(std::make_shared<TTLTransform>(
|
||||
res_pipe.getHeader(), *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl));
|
||||
{
|
||||
auto transform = std::make_shared<TTLTransform>(global_ctx->context, builder->getHeader(), *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl);
|
||||
subqueries = transform->getSubqueries();
|
||||
builder->addTransform(std::move(transform));
|
||||
}
|
||||
|
||||
if (global_ctx->metadata_snapshot->hasSecondaryIndices())
|
||||
{
|
||||
const auto & indices = global_ctx->metadata_snapshot->getSecondaryIndices();
|
||||
res_pipe.addTransform(std::make_shared<ExpressionTransform>(
|
||||
res_pipe.getHeader(), indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())));
|
||||
res_pipe.addTransform(std::make_shared<MaterializingTransform>(res_pipe.getHeader()));
|
||||
builder->addTransform(std::make_shared<ExpressionTransform>(
|
||||
builder->getHeader(), indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())));
|
||||
builder->addTransform(std::make_shared<MaterializingTransform>(builder->getHeader()));
|
||||
}
|
||||
|
||||
global_ctx->merged_pipeline = QueryPipeline(std::move(res_pipe));
|
||||
if (!subqueries.empty())
|
||||
builder = addCreatingSetsTransform(std::move(builder), std::move(subqueries), global_ctx->context);
|
||||
|
||||
global_ctx->merged_pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
|
||||
/// Dereference unique_ptr and pass horizontal_stage_progress by reference
|
||||
global_ctx->merged_pipeline.setProgressCallback(MergeProgressCallback(global_ctx->merge_list_element_ptr, global_ctx->watch_prev_elapsed, *global_ctx->horizontal_stage_progress));
|
||||
/// Is calculated inside MergeProgressCallback.
|
||||
|
@ -132,13 +132,18 @@ void buildScatterSelector(
|
||||
|
||||
/// Computes ttls and updates ttl infos
|
||||
void updateTTL(
|
||||
const ContextPtr context,
|
||||
const TTLDescription & ttl_entry,
|
||||
IMergeTreeDataPart::TTLInfos & ttl_infos,
|
||||
DB::MergeTreeDataPartTTLInfo & ttl_info,
|
||||
const Block & block,
|
||||
bool update_part_min_max_ttls)
|
||||
{
|
||||
auto ttl_column = ITTLAlgorithm::executeExpressionAndGetColumn(ttl_entry.expression, block, ttl_entry.result_column);
|
||||
auto expr_and_set = ttl_entry.buildExpression(context);
|
||||
for (auto & subquery : expr_and_set.sets->getSubqueries())
|
||||
subquery->buildSetInplace(context);
|
||||
|
||||
auto ttl_column = ITTLAlgorithm::executeExpressionAndGetColumn(expr_and_set.expression, block, ttl_entry.result_column);
|
||||
|
||||
if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(ttl_column.get()))
|
||||
{
|
||||
@ -507,7 +512,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
|
||||
DB::IMergeTreeDataPart::TTLInfos move_ttl_infos;
|
||||
const auto & move_ttl_entries = metadata_snapshot->getMoveTTLs();
|
||||
for (const auto & ttl_entry : move_ttl_entries)
|
||||
updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
|
||||
updateTTL(context, ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false);
|
||||
|
||||
ReservationPtr reservation = data.reserveSpacePreferringTTLRules(metadata_snapshot, expected_size, move_ttl_infos, time(nullptr), 0, true);
|
||||
VolumePtr volume = data.getStoragePolicy()->getVolume(0);
|
||||
@ -562,20 +567,20 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
|
||||
}
|
||||
|
||||
if (metadata_snapshot->hasRowsTTL())
|
||||
updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
|
||||
updateTTL(context, metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true);
|
||||
|
||||
for (const auto & ttl_entry : metadata_snapshot->getGroupByTTLs())
|
||||
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true);
|
||||
updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.group_by_ttl[ttl_entry.result_column], block, true);
|
||||
|
||||
for (const auto & ttl_entry : metadata_snapshot->getRowsWhereTTLs())
|
||||
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true);
|
||||
updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.rows_where_ttl[ttl_entry.result_column], block, true);
|
||||
|
||||
for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs())
|
||||
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
|
||||
updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true);
|
||||
|
||||
const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs();
|
||||
for (const auto & ttl_entry : recompression_ttl_entries)
|
||||
updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.recompression_ttl[ttl_entry.result_column], block, false);
|
||||
updateTTL(context, ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.recompression_ttl[ttl_entry.result_column], block, false);
|
||||
|
||||
new_data_part->ttl_infos.update(move_ttl_infos);
|
||||
|
||||
|
@ -15,6 +15,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
@ -217,7 +218,20 @@ MergeTreeIndexPtr minmaxIndexCreator(
|
||||
return std::make_shared<MergeTreeIndexMinMax>(index);
|
||||
}
|
||||
|
||||
void minmaxIndexValidator(const IndexDescription & /* index */, bool /* attach */)
|
||||
void minmaxIndexValidator(const IndexDescription & index, bool attach)
|
||||
{
|
||||
if (attach)
|
||||
return;
|
||||
|
||||
for (const auto & column : index.sample_block)
|
||||
{
|
||||
if (!column.type->isComparable())
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Data type of argument for minmax index must be comparable, got {} type for column {} instead",
|
||||
column.type->getName(), column.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Interpreters/SquashingTransform.h>
|
||||
#include <Interpreters/MergeTreeTransaction.h>
|
||||
#include <Interpreters/PreparedSets.h>
|
||||
#include <Processors/Transforms/TTLTransform.h>
|
||||
#include <Processors/Transforms/TTLCalcTransform.h>
|
||||
#include <Processors/Transforms/DistinctSortedTransform.h>
|
||||
@ -16,6 +17,7 @@
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Processors/Transforms/MaterializingTransform.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
#include <Processors/QueryPlan/CreatingSetsStep.h>
|
||||
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataWriter.h>
|
||||
#include <Storages/MutationCommands.h>
|
||||
@ -1552,21 +1554,34 @@ private:
|
||||
if (!ctx->mutating_pipeline_builder.initialized())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot mutate part columns with uninitialized mutations stream. It's a bug");
|
||||
|
||||
QueryPipelineBuilder builder(std::move(ctx->mutating_pipeline_builder));
|
||||
auto builder = std::make_unique<QueryPipelineBuilder>(std::move(ctx->mutating_pipeline_builder));
|
||||
|
||||
if (ctx->metadata_snapshot->hasPrimaryKey() || ctx->metadata_snapshot->hasSecondaryIndices())
|
||||
{
|
||||
builder.addTransform(std::make_shared<ExpressionTransform>(
|
||||
builder.getHeader(), ctx->data->getPrimaryKeyAndSkipIndicesExpression(ctx->metadata_snapshot, skip_indices)));
|
||||
builder->addTransform(std::make_shared<ExpressionTransform>(
|
||||
builder->getHeader(), ctx->data->getPrimaryKeyAndSkipIndicesExpression(ctx->metadata_snapshot, skip_indices)));
|
||||
|
||||
builder.addTransform(std::make_shared<MaterializingTransform>(builder.getHeader()));
|
||||
builder->addTransform(std::make_shared<MaterializingTransform>(builder->getHeader()));
|
||||
}
|
||||
|
||||
PreparedSets::Subqueries subqueries;
|
||||
|
||||
if (ctx->execute_ttl_type == ExecuteTTLType::NORMAL)
|
||||
builder.addTransform(std::make_shared<TTLTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
|
||||
{
|
||||
auto transform = std::make_shared<TTLTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
|
||||
subqueries = transform->getSubqueries();
|
||||
builder->addTransform(std::move(transform));
|
||||
}
|
||||
|
||||
if (ctx->execute_ttl_type == ExecuteTTLType::RECALCULATE)
|
||||
builder.addTransform(std::make_shared<TTLCalcTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
|
||||
{
|
||||
auto transform = std::make_shared<TTLCalcTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
|
||||
subqueries = transform->getSubqueries();
|
||||
builder->addTransform(std::move(transform));
|
||||
}
|
||||
|
||||
if (!subqueries.empty())
|
||||
builder = addCreatingSetsTransform(std::move(builder), std::move(subqueries), ctx->context);
|
||||
|
||||
ctx->minmax_idx = std::make_shared<IMergeTreeDataPart::MinMaxIndex>();
|
||||
|
||||
@ -1600,7 +1615,7 @@ private:
|
||||
ctx->context->getWriteSettings(),
|
||||
computed_granularity);
|
||||
|
||||
ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
|
||||
ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
|
||||
ctx->mutating_pipeline.setProgressCallback(ctx->progress_callback);
|
||||
/// Is calculated inside MergeProgressCallback.
|
||||
ctx->mutating_pipeline.disableProfileEventUpdate();
|
||||
@ -1796,13 +1811,25 @@ private:
|
||||
|
||||
if (ctx->mutating_pipeline_builder.initialized())
|
||||
{
|
||||
QueryPipelineBuilder builder(std::move(ctx->mutating_pipeline_builder));
|
||||
auto builder = std::make_unique<QueryPipelineBuilder>(std::move(ctx->mutating_pipeline_builder));
|
||||
PreparedSets::Subqueries subqueries;
|
||||
|
||||
if (ctx->execute_ttl_type == ExecuteTTLType::NORMAL)
|
||||
builder.addTransform(std::make_shared<TTLTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
|
||||
{
|
||||
auto transform = std::make_shared<TTLTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
|
||||
subqueries = transform->getSubqueries();
|
||||
builder->addTransform(std::move(transform));
|
||||
}
|
||||
|
||||
if (ctx->execute_ttl_type == ExecuteTTLType::RECALCULATE)
|
||||
builder.addTransform(std::make_shared<TTLCalcTransform>(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true));
|
||||
{
|
||||
auto transform = std::make_shared<TTLCalcTransform>(ctx->context, builder->getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true);
|
||||
subqueries = transform->getSubqueries();
|
||||
builder->addTransform(std::move(transform));
|
||||
}
|
||||
|
||||
if (!subqueries.empty())
|
||||
builder = addCreatingSetsTransform(std::move(builder), std::move(subqueries), ctx->context);
|
||||
|
||||
ctx->out = std::make_shared<MergedColumnOnlyOutputStream>(
|
||||
ctx->new_data_part,
|
||||
@ -1816,7 +1843,7 @@ private:
|
||||
&ctx->source_part->index_granularity_info
|
||||
);
|
||||
|
||||
ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
|
||||
ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
|
||||
ctx->mutating_pipeline.setProgressCallback(ctx->progress_callback);
|
||||
/// Is calculated inside MergeProgressCallback.
|
||||
ctx->mutating_pipeline.disableProfileEventUpdate();
|
||||
|
@ -97,11 +97,9 @@ extern const Event ParallelReplicasCollectingOwnedSegmentsMicroseconds;
|
||||
extern const Event ParallelReplicasReadAssignedMarks;
|
||||
extern const Event ParallelReplicasReadUnassignedMarks;
|
||||
extern const Event ParallelReplicasReadAssignedForStealingMarks;
|
||||
}
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event ParallelReplicasUsedCount;
|
||||
extern const Event ParallelReplicasUnavailableCount;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -1025,6 +1023,8 @@ ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelR
|
||||
|
||||
void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica_number)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::ParallelReplicasUnavailableCount);
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!pimpl)
|
||||
|
@ -297,7 +297,7 @@ void registerStorageAzureBlob(StorageFactory & factory)
|
||||
|
||||
return std::make_shared<StorageAzureBlob>(
|
||||
std::move(configuration),
|
||||
std::make_unique<AzureObjectStorage>("AzureBlobStorage", std::move(client), std::move(settings)),
|
||||
std::make_unique<AzureObjectStorage>("AzureBlobStorage", std::move(client), std::move(settings),configuration.container),
|
||||
args.getContext(),
|
||||
args.table_id,
|
||||
args.columns,
|
||||
|
@ -200,7 +200,7 @@ TTLDescription StorageInMemoryMetadata::getRowsTTL() const
|
||||
|
||||
bool StorageInMemoryMetadata::hasRowsTTL() const
|
||||
{
|
||||
return table_ttl.rows_ttl.expression != nullptr;
|
||||
return table_ttl.rows_ttl.expression_ast != nullptr;
|
||||
}
|
||||
|
||||
TTLDescriptions StorageInMemoryMetadata::getRowsWhereTTLs() const
|
||||
@ -258,9 +258,8 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
|
||||
NameSet required_ttl_columns;
|
||||
NameSet updated_ttl_columns;
|
||||
|
||||
auto add_dependent_columns = [&updated_columns](const auto & expression, auto & to_set)
|
||||
auto add_dependent_columns = [&updated_columns](const Names & required_columns, auto & to_set)
|
||||
{
|
||||
auto required_columns = expression->getRequiredColumns();
|
||||
for (const auto & dependency : required_columns)
|
||||
{
|
||||
if (updated_columns.contains(dependency))
|
||||
@ -276,18 +275,18 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
|
||||
for (const auto & index : getSecondaryIndices())
|
||||
{
|
||||
if (has_dependency(index.name, ColumnDependency::SKIP_INDEX))
|
||||
add_dependent_columns(index.expression, indices_columns);
|
||||
add_dependent_columns(index.expression->getRequiredColumns(), indices_columns);
|
||||
}
|
||||
|
||||
for (const auto & projection : getProjections())
|
||||
{
|
||||
if (has_dependency(projection.name, ColumnDependency::PROJECTION))
|
||||
add_dependent_columns(&projection, projections_columns);
|
||||
add_dependent_columns(projection.getRequiredColumns(), projections_columns);
|
||||
}
|
||||
|
||||
auto add_for_rows_ttl = [&](const auto & expression, auto & to_set)
|
||||
{
|
||||
if (add_dependent_columns(expression, to_set) && include_ttl_target)
|
||||
if (add_dependent_columns(expression.getNames(), to_set) && include_ttl_target)
|
||||
{
|
||||
/// Filter all columns, if rows TTL expression have to be recalculated.
|
||||
for (const auto & column : getColumns().getAllPhysical())
|
||||
@ -296,25 +295,25 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
|
||||
};
|
||||
|
||||
if (hasRowsTTL())
|
||||
add_for_rows_ttl(getRowsTTL().expression, required_ttl_columns);
|
||||
add_for_rows_ttl(getRowsTTL().expression_columns, required_ttl_columns);
|
||||
|
||||
for (const auto & entry : getRowsWhereTTLs())
|
||||
add_for_rows_ttl(entry.expression, required_ttl_columns);
|
||||
add_for_rows_ttl(entry.expression_columns, required_ttl_columns);
|
||||
|
||||
for (const auto & entry : getGroupByTTLs())
|
||||
add_for_rows_ttl(entry.expression, required_ttl_columns);
|
||||
add_for_rows_ttl(entry.expression_columns, required_ttl_columns);
|
||||
|
||||
for (const auto & entry : getRecompressionTTLs())
|
||||
add_dependent_columns(entry.expression, required_ttl_columns);
|
||||
add_dependent_columns(entry.expression_columns.getNames(), required_ttl_columns);
|
||||
|
||||
for (const auto & [name, entry] : getColumnTTLs())
|
||||
{
|
||||
if (add_dependent_columns(entry.expression, required_ttl_columns) && include_ttl_target)
|
||||
if (add_dependent_columns(entry.expression_columns.getNames(), required_ttl_columns) && include_ttl_target)
|
||||
updated_ttl_columns.insert(name);
|
||||
}
|
||||
|
||||
for (const auto & entry : getMoveTTLs())
|
||||
add_dependent_columns(entry.expression, required_ttl_columns);
|
||||
add_dependent_columns(entry.expression_columns.getNames(), required_ttl_columns);
|
||||
|
||||
//TODO what about rows_where_ttl and group_by_ttl ??
|
||||
|
||||
|
@ -756,16 +756,23 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
|
||||
auto join_tree_type = query_node->getJoinTree()->getNodeType();
|
||||
auto modified_query = query_node->cloneAndReplace(original_table_expression, replacement_table_expression);
|
||||
|
||||
// For the case when join tree is just a table or a table function we don't need to do anything more.
|
||||
if (join_tree_type == QueryTreeNodeType::TABLE || join_tree_type == QueryTreeNodeType::TABLE_FUNCTION)
|
||||
return modified_query;
|
||||
|
||||
// JOIN needs to be removed because StorageMerge should produce not joined data.
|
||||
// GROUP BY should be removed as well.
|
||||
|
||||
auto * modified_query_node = modified_query->as<QueryNode>();
|
||||
|
||||
// Remove the JOIN statement. As a result query will have a form like: SELECT * FROM <table> ...
|
||||
modified_query = modified_query->cloneAndReplace(modified_query_node->getJoinTree(), replacement_table_expression);
|
||||
modified_query_node = modified_query->as<QueryNode>();
|
||||
|
||||
query_node = modified_query->as<QueryNode>();
|
||||
|
||||
// For backward compatibility we need to leave all filters related to this table.
|
||||
// It may lead to some incorrect result.
|
||||
if (query_node->hasPrewhere())
|
||||
replaceFilterExpression(query_node->getPrewhere(), replacement_table_expression, context);
|
||||
if (query_node->hasWhere())
|
||||
@ -779,6 +786,9 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
|
||||
projection.clear();
|
||||
NamesAndTypes projection_columns;
|
||||
|
||||
// Select only required columns from the table, because projection list may contain:
|
||||
// 1. aggregate functions
|
||||
// 2. expressions referencing other tables of JOIN
|
||||
for (auto const & column_name : required_column_names)
|
||||
{
|
||||
QueryTreeNodePtr fake_node = std::make_shared<IdentifierNode>(Identifier{column_name});
|
||||
@ -791,6 +801,8 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin(
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Required column '{}' is not resolved", column_name);
|
||||
auto fake_column = resolved_column->getColumn();
|
||||
|
||||
// Identifier is resolved to ColumnNode, but we need to get rid of ALIAS columns
|
||||
// and also fix references to source expression (now column is referencing original table expression).
|
||||
ApplyAliasColumnExpressionsVisitor visitor(replacement_table_expression);
|
||||
visitor.visit(fake_node);
|
||||
|
||||
@ -860,7 +872,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_
|
||||
|
||||
QueryTreeNodePtr column_node;
|
||||
|
||||
|
||||
// Replace all references to ALIAS columns in the query by expressions.
|
||||
if (is_alias)
|
||||
{
|
||||
QueryTreeNodePtr fake_node = std::make_shared<IdentifierNode>(Identifier{column});
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <Interpreters/FunctionNameNormalizer.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -110,7 +111,10 @@ using FindAggregateFunctionVisitor = InDepthNodeVisitor<FindAggregateFunctionFin
|
||||
TTLDescription::TTLDescription(const TTLDescription & other)
|
||||
: mode(other.mode)
|
||||
, expression_ast(other.expression_ast ? other.expression_ast->clone() : nullptr)
|
||||
, expression_columns(other.expression_columns)
|
||||
, result_column(other.result_column)
|
||||
, where_expression_ast(other.where_expression_ast ? other.where_expression_ast->clone() : nullptr)
|
||||
, where_expression_columns(other.where_expression_columns)
|
||||
, where_result_column(other.where_result_column)
|
||||
, group_by_keys(other.group_by_keys)
|
||||
, set_parts(other.set_parts)
|
||||
@ -120,11 +124,6 @@ TTLDescription::TTLDescription(const TTLDescription & other)
|
||||
, if_exists(other.if_exists)
|
||||
, recompression_codec(other.recompression_codec)
|
||||
{
|
||||
if (other.expression)
|
||||
expression = other.expression->clone();
|
||||
|
||||
if (other.where_expression)
|
||||
where_expression = other.where_expression->clone();
|
||||
}
|
||||
|
||||
TTLDescription & TTLDescription::operator=(const TTLDescription & other)
|
||||
@ -138,17 +137,15 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other)
|
||||
else
|
||||
expression_ast.reset();
|
||||
|
||||
if (other.expression)
|
||||
expression = other.expression->clone();
|
||||
else
|
||||
expression.reset();
|
||||
|
||||
expression_columns = other.expression_columns;
|
||||
result_column = other.result_column;
|
||||
if (other.where_expression)
|
||||
where_expression = other.where_expression->clone();
|
||||
else
|
||||
where_expression.reset();
|
||||
|
||||
if (other.where_expression_ast)
|
||||
where_expression_ast = other.where_expression_ast->clone();
|
||||
else
|
||||
where_expression_ast.reset();
|
||||
|
||||
where_expression_columns = other.where_expression_columns;
|
||||
where_result_column = other.where_result_column;
|
||||
group_by_keys = other.group_by_keys;
|
||||
set_parts = other.set_parts;
|
||||
@ -165,6 +162,44 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other)
|
||||
return * this;
|
||||
}
|
||||
|
||||
static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndTypesList & columns, const ContextPtr & context)
|
||||
{
|
||||
ExpressionAndSets result;
|
||||
auto ttl_string = queryToString(ast);
|
||||
auto syntax_analyzer_result = TreeRewriter(context).analyze(ast, columns);
|
||||
ExpressionAnalyzer analyzer(ast, syntax_analyzer_result, context);
|
||||
auto dag = analyzer.getActionsDAG(false);
|
||||
|
||||
const auto * col = &dag->findInOutputs(ast->getColumnName());
|
||||
if (col->result_name != ttl_string)
|
||||
col = &dag->addAlias(*col, ttl_string);
|
||||
|
||||
dag->getOutputs() = {col};
|
||||
dag->removeUnusedActions();
|
||||
|
||||
result.expression = std::make_shared<ExpressionActions>(dag, ExpressionActionsSettings::fromContext(context));
|
||||
result.sets = analyzer.getPreparedSets();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ExpressionAndSets TTLDescription::buildExpression(const ContextPtr & context) const
|
||||
{
|
||||
auto ast = expression_ast->clone();
|
||||
return buildExpressionAndSets(ast, expression_columns, context);
|
||||
}
|
||||
|
||||
ExpressionAndSets TTLDescription::buildWhereExpression(const ContextPtr & context) const
|
||||
{
|
||||
if (where_expression_ast)
|
||||
{
|
||||
auto ast = where_expression_ast->clone();
|
||||
return buildExpressionAndSets(ast, where_expression_columns, context);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
TTLDescription TTLDescription::getTTLFromAST(
|
||||
const ASTPtr & definition_ast,
|
||||
const ColumnsDescription & columns,
|
||||
@ -182,9 +217,12 @@ TTLDescription TTLDescription::getTTLFromAST(
|
||||
result.expression_ast = definition_ast->clone();
|
||||
|
||||
auto ttl_ast = result.expression_ast->clone();
|
||||
auto syntax_analyzer_result = TreeRewriter(context).analyze(ttl_ast, columns.getAllPhysical());
|
||||
result.expression = ExpressionAnalyzer(ttl_ast, syntax_analyzer_result, context).getActions(false);
|
||||
result.result_column = ttl_ast->getColumnName();
|
||||
auto expression = buildExpressionAndSets(ttl_ast, columns.getAllPhysical(), context).expression;
|
||||
result.expression_columns = expression->getRequiredColumnsWithTypes();
|
||||
|
||||
result.result_column = expression->getSampleBlock().safeGetByPosition(0).name;
|
||||
|
||||
ExpressionActionsPtr where_expression;
|
||||
|
||||
if (ttl_element == nullptr) /// columns TTL
|
||||
{
|
||||
@ -202,9 +240,10 @@ TTLDescription TTLDescription::getTTLFromAST(
|
||||
{
|
||||
if (ASTPtr where_expr_ast = ttl_element->where())
|
||||
{
|
||||
auto where_syntax_result = TreeRewriter(context).analyze(where_expr_ast, columns.getAllPhysical());
|
||||
result.where_expression = ExpressionAnalyzer(where_expr_ast, where_syntax_result, context).getActions(false);
|
||||
result.where_result_column = where_expr_ast->getColumnName();
|
||||
result.where_expression_ast = where_expr_ast->clone();
|
||||
where_expression = buildExpressionAndSets(where_expr_ast, columns.getAllPhysical(), context).expression;
|
||||
result.where_expression_columns = where_expression->getRequiredColumnsWithTypes();
|
||||
result.where_result_column = where_expression->getSampleBlock().safeGetByPosition(0).name;
|
||||
}
|
||||
}
|
||||
else if (ttl_element->mode == TTLMode::GROUP_BY)
|
||||
@ -229,17 +268,17 @@ TTLDescription TTLDescription::getTTLFromAST(
|
||||
for (const auto & ast : ttl_element->group_by_assignments)
|
||||
{
|
||||
const auto assignment = ast->as<const ASTAssignment &>();
|
||||
auto expression = assignment.expression();
|
||||
auto ass_expression = assignment.expression();
|
||||
|
||||
FindAggregateFunctionVisitor::Data data{false};
|
||||
FindAggregateFunctionVisitor(data).visit(expression);
|
||||
FindAggregateFunctionVisitor(data).visit(ass_expression);
|
||||
|
||||
if (!data.has_aggregate_function)
|
||||
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
|
||||
"Invalid expression for assignment of column {}. Should contain an aggregate function", assignment.column_name);
|
||||
|
||||
expression = addTypeConversionToAST(std::move(expression), columns.getPhysical(assignment.column_name).type->getName());
|
||||
aggregations.emplace_back(assignment.column_name, std::move(expression));
|
||||
ass_expression = addTypeConversionToAST(std::move(ass_expression), columns.getPhysical(assignment.column_name).type->getName());
|
||||
aggregations.emplace_back(assignment.column_name, std::move(ass_expression));
|
||||
aggregation_columns_set.insert(assignment.column_name);
|
||||
}
|
||||
|
||||
@ -297,7 +336,7 @@ TTLDescription TTLDescription::getTTLFromAST(
|
||||
}
|
||||
}
|
||||
|
||||
checkTTLExpression(result.expression, result.result_column, is_attach || context->getSettingsRef().allow_suspicious_ttl_expressions);
|
||||
checkTTLExpression(expression, result.result_column, is_attach || context->getSettingsRef().allow_suspicious_ttl_expressions);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -350,7 +389,7 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
|
||||
auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key, is_attach);
|
||||
if (ttl.mode == TTLMode::DELETE)
|
||||
{
|
||||
if (!ttl.where_expression)
|
||||
if (!ttl.where_expression_ast)
|
||||
{
|
||||
if (have_unconditional_delete_ttl)
|
||||
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "More than one DELETE TTL expression without WHERE expression is not allowed");
|
||||
|
@ -35,6 +35,15 @@ struct TTLAggregateDescription
|
||||
|
||||
using TTLAggregateDescriptions = std::vector<TTLAggregateDescription>;
|
||||
|
||||
class PreparedSets;
|
||||
using PreparedSetsPtr = std::shared_ptr<PreparedSets>;
|
||||
|
||||
struct ExpressionAndSets
|
||||
{
|
||||
ExpressionActionsPtr expression;
|
||||
PreparedSetsPtr sets;
|
||||
};
|
||||
|
||||
/// Common struct for TTL record in storage
|
||||
struct TTLDescription
|
||||
{
|
||||
@ -44,9 +53,10 @@ struct TTLDescription
|
||||
/// TTL d + INTERVAL 1 DAY
|
||||
/// ^~~~~~~~~~~~~~~~~~~^
|
||||
ASTPtr expression_ast;
|
||||
NamesAndTypesList expression_columns;
|
||||
|
||||
/// Expression actions evaluated from AST
|
||||
ExpressionActionsPtr expression;
|
||||
ExpressionAndSets buildExpression(const ContextPtr & context) const;
|
||||
|
||||
/// Result column of this TTL expression
|
||||
String result_column;
|
||||
@ -54,7 +64,9 @@ struct TTLDescription
|
||||
/// WHERE part in TTL expression
|
||||
/// TTL ... WHERE x % 10 == 0 and y > 5
|
||||
/// ^~~~~~~~~~~~~~~~~~~~~~^
|
||||
ExpressionActionsPtr where_expression;
|
||||
ASTPtr where_expression_ast;
|
||||
NamesAndTypesList where_expression_columns;
|
||||
ExpressionAndSets buildWhereExpression(const ContextPtr & context) const;
|
||||
|
||||
/// Name of result column from WHERE expression
|
||||
String where_result_column;
|
||||
|
@ -262,7 +262,7 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex
|
||||
auto client = StorageAzureBlob::createClient(configuration, !is_insert_query);
|
||||
auto settings = StorageAzureBlob::createSettings(context);
|
||||
|
||||
auto object_storage = std::make_unique<AzureObjectStorage>("AzureBlobStorageTableFunction", std::move(client), std::move(settings));
|
||||
auto object_storage = std::make_unique<AzureObjectStorage>("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container);
|
||||
return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false);
|
||||
}
|
||||
|
||||
@ -293,7 +293,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct
|
||||
|
||||
StoragePtr storage = std::make_shared<StorageAzureBlob>(
|
||||
configuration,
|
||||
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
|
||||
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
|
||||
context,
|
||||
StorageID(getDatabaseName(), table_name),
|
||||
columns,
|
||||
|
@ -40,7 +40,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
|
||||
/// On worker node this filename won't contains globs
|
||||
storage = std::make_shared<StorageAzureBlob>(
|
||||
configuration,
|
||||
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
|
||||
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
|
||||
context,
|
||||
StorageID(getDatabaseName(), table_name),
|
||||
columns,
|
||||
@ -55,7 +55,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
|
||||
storage = std::make_shared<StorageAzureBlobCluster>(
|
||||
cluster_name,
|
||||
configuration,
|
||||
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
|
||||
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
|
||||
StorageID(getDatabaseName(), table_name),
|
||||
columns,
|
||||
ConstraintsDescription{},
|
||||
|
348
tests/ci/ci.py
348
tests/ci/ci.py
@ -1,5 +1,8 @@
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
from copy import deepcopy
|
||||
from dataclasses import asdict, dataclass
|
||||
from enum import Enum
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@ -7,16 +10,14 @@ import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import asdict, dataclass
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Union
|
||||
|
||||
import docker_images_helper
|
||||
import upload_result_helper
|
||||
from build_check import get_release_or_pr
|
||||
from ci_config import CI_CONFIG, Build, JobNames, Labels
|
||||
from ci_utils import GHActions, is_hex
|
||||
from ci_config import CI_CONFIG, Build, Labels, JobNames
|
||||
from ci_utils import GHActions, is_hex, normalize_string
|
||||
from clickhouse_helper import (
|
||||
CiLogsCredentials,
|
||||
ClickHouseHelper,
|
||||
@ -48,7 +49,7 @@ from git_helper import GIT_PREFIX, Git
|
||||
from git_helper import Runner as GitRunner
|
||||
from github import Github
|
||||
from pr_info import PRInfo
|
||||
from report import SUCCESS, BuildResult, JobReport
|
||||
from report import ERROR, SUCCESS, BuildResult, JobReport
|
||||
from s3_helper import S3Helper
|
||||
from version_helper import get_version_from_repo
|
||||
|
||||
@ -88,6 +89,7 @@ class CiCache:
|
||||
class RecordType(Enum):
|
||||
SUCCESSFUL = "successful"
|
||||
PENDING = "pending"
|
||||
FAILED = "failed"
|
||||
|
||||
@dataclass
|
||||
class Record:
|
||||
@ -249,6 +251,13 @@ class CiCache:
|
||||
)
|
||||
return record
|
||||
|
||||
def print_status(self):
|
||||
for record_type in self.RecordType:
|
||||
GHActions.print_in_group(
|
||||
f"Cache records: [{record_type}]", list(self.records[record_type])
|
||||
)
|
||||
return self
|
||||
|
||||
def update(self):
|
||||
"""
|
||||
Pulls cache records from s3. Only records name w/o content.
|
||||
@ -260,9 +269,6 @@ class CiCache:
|
||||
path = self.cache_s3_paths[job_type]
|
||||
records = self.s3.list_prefix(f"{path}{prefix}", S3_BUILDS_BUCKET)
|
||||
records = [record.split("/")[-1] for record in records]
|
||||
GHActions.print_in_group(
|
||||
f"Cache records: [{record_type}] in [{job_type.value}]", records
|
||||
)
|
||||
for file in records:
|
||||
record = self._parse_record_file_name(
|
||||
record_type=record_type, file_name=file
|
||||
@ -384,6 +390,9 @@ class CiCache:
|
||||
if record_type == self.RecordType.SUCCESSFUL:
|
||||
assert isinstance(status, CommitStatusData)
|
||||
status.dump_to_file(record_file)
|
||||
elif record_type == self.RecordType.FAILED:
|
||||
assert isinstance(status, CommitStatusData)
|
||||
status.dump_to_file(record_file)
|
||||
elif record_type == self.RecordType.PENDING:
|
||||
assert isinstance(status, PendingState)
|
||||
with open(record_file, "w") as json_file:
|
||||
@ -488,6 +497,16 @@ class CiCache:
|
||||
self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch
|
||||
)
|
||||
|
||||
def is_failed(
|
||||
self, job: str, batch: int, num_batches: int, release_branch: bool
|
||||
) -> bool:
|
||||
"""
|
||||
checks if a given job have already been done with failure
|
||||
"""
|
||||
return self.exist(
|
||||
self.RecordType.FAILED, job, batch, num_batches, release_branch
|
||||
)
|
||||
|
||||
def is_pending(
|
||||
self, job: str, batch: int, num_batches: int, release_branch: bool
|
||||
) -> bool:
|
||||
@ -495,8 +514,9 @@ class CiCache:
|
||||
check pending record in the cache for a given job
|
||||
@release_branch - checks that "release" attribute is set for a record
|
||||
"""
|
||||
if self.is_successful(job, batch, num_batches, release_branch):
|
||||
# successful record is present - not pending
|
||||
if self.is_successful(
|
||||
job, batch, num_batches, release_branch
|
||||
) or self.is_failed(job, batch, num_batches, release_branch):
|
||||
return False
|
||||
|
||||
return self.exist(
|
||||
@ -524,6 +544,27 @@ class CiCache:
|
||||
release_branch,
|
||||
)
|
||||
|
||||
def push_failed(
|
||||
self,
|
||||
job: str,
|
||||
batch: int,
|
||||
num_batches: int,
|
||||
job_status: CommitStatusData,
|
||||
release_branch: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Pushes a cache record of type Failed (CommitStatusData)
|
||||
@release_branch adds "release" attribute to a record
|
||||
"""
|
||||
self.push(
|
||||
self.RecordType.FAILED,
|
||||
job,
|
||||
[batch],
|
||||
num_batches,
|
||||
job_status,
|
||||
release_branch,
|
||||
)
|
||||
|
||||
def push_pending(
|
||||
self, job: str, batches: List[int], num_batches: int, release_branch: bool
|
||||
) -> None:
|
||||
@ -591,46 +632,85 @@ class CiCache:
|
||||
bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path
|
||||
)
|
||||
|
||||
# def await_jobs(self, jobs_with_params: Dict[str, Dict[str, Any]]) -> List[str]:
|
||||
# if not jobs_with_params:
|
||||
# return []
|
||||
# print(f"Start awaiting jobs [{list(jobs_with_params)}]")
|
||||
# poll_interval_sec = 180
|
||||
# start_at = int(time.time())
|
||||
# TIMEOUT = 3000
|
||||
# expired_sec = 0
|
||||
# done_jobs = [] # type: List[str]
|
||||
# while expired_sec < TIMEOUT and jobs_with_params:
|
||||
# time.sleep(poll_interval_sec)
|
||||
# self.update()
|
||||
# pending_finished: List[str] = []
|
||||
# for job_name in jobs_with_params:
|
||||
# num_batches = jobs_with_params[job_name]["num_batches"]
|
||||
# for batch in jobs_with_params[job_name]["batches"]:
|
||||
# if self.is_pending(job_name, batch, num_batches):
|
||||
# continue
|
||||
# print(
|
||||
# f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore"
|
||||
# )
|
||||
# pending_finished.append(job_name)
|
||||
# if pending_finished:
|
||||
# # restart timer
|
||||
# start_at = int(time.time())
|
||||
# expired_sec = 0
|
||||
# # remove finished jobs from awaiting list
|
||||
# for job in pending_finished:
|
||||
# del jobs_with_params[job]
|
||||
# done_jobs.append(job)
|
||||
# else:
|
||||
# expired_sec = int(time.time()) - start_at
|
||||
# print(f" ...awaiting continues... time left [{TIMEOUT - expired_sec}]")
|
||||
# if done_jobs:
|
||||
# print(
|
||||
# f"Awaiting OK. Left jobs: [{list(jobs_with_params)}], finished jobs: [{done_jobs}]"
|
||||
# )
|
||||
# else:
|
||||
# print("Awaiting FAILED. No job has finished.")
|
||||
# return done_jobs
|
||||
def await_jobs(
|
||||
self, jobs_with_params: Dict[str, Dict[str, Any]], is_release_branch: bool
|
||||
) -> Dict[str, List[int]]:
|
||||
"""
|
||||
await pending jobs to be finished
|
||||
@jobs_with_params - jobs to await. {JOB_NAME: {"batches": [BATCHES...], "num_batches": NUM_BATCHES}}
|
||||
returns successfully finished jobs: {JOB_NAME: [BATCHES...]}
|
||||
"""
|
||||
if not jobs_with_params:
|
||||
return {}
|
||||
poll_interval_sec = 300
|
||||
TIMEOUT = 3600
|
||||
await_finished: Dict[str, List[int]] = {}
|
||||
round_cnt = 0
|
||||
while len(jobs_with_params) > 4 and round_cnt < 5:
|
||||
round_cnt += 1
|
||||
GHActions.print_in_group(
|
||||
f"Wait pending jobs, round [{round_cnt}]:", list(jobs_with_params)
|
||||
)
|
||||
# this is initial approach to wait pending jobs:
|
||||
# start waiting for the next TIMEOUT seconds if there are more than X(=4) jobs to wait
|
||||
# wait TIMEOUT seconds in rounds. Y(=5) is the max number of rounds
|
||||
expired_sec = 0
|
||||
start_at = int(time.time())
|
||||
while expired_sec < TIMEOUT and jobs_with_params:
|
||||
time.sleep(poll_interval_sec)
|
||||
self.update()
|
||||
jobs_with_params_copy = deepcopy(jobs_with_params)
|
||||
for job_name in jobs_with_params:
|
||||
num_batches = jobs_with_params[job_name]["num_batches"]
|
||||
job_config = CI_CONFIG.get_job_config(job_name)
|
||||
for batch in jobs_with_params[job_name]["batches"]:
|
||||
if self.is_pending(
|
||||
job_name,
|
||||
batch,
|
||||
num_batches,
|
||||
release_branch=is_release_branch
|
||||
and job_config.required_on_release_branch,
|
||||
):
|
||||
continue
|
||||
print(
|
||||
f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore"
|
||||
)
|
||||
|
||||
# some_job_ready = True
|
||||
jobs_with_params_copy[job_name]["batches"].remove(batch)
|
||||
if not jobs_with_params_copy[job_name]["batches"]:
|
||||
del jobs_with_params_copy[job_name]
|
||||
|
||||
if not self.is_successful(
|
||||
job_name,
|
||||
batch,
|
||||
num_batches,
|
||||
release_branch=is_release_branch
|
||||
and job_config.required_on_release_branch,
|
||||
):
|
||||
print(
|
||||
f"NOTE: Job [{job_name}:{batch}] finished but no success - remove from awaiting list, do not add to ready"
|
||||
)
|
||||
continue
|
||||
if job_name in await_finished:
|
||||
await_finished[job_name].append(batch)
|
||||
else:
|
||||
await_finished[job_name] = [batch]
|
||||
jobs_with_params = jobs_with_params_copy
|
||||
expired_sec = int(time.time()) - start_at
|
||||
print(
|
||||
f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]"
|
||||
)
|
||||
if await_finished:
|
||||
GHActions.print_in_group(
|
||||
f"Finished jobs, round [{round_cnt}]:",
|
||||
[f"{job}:{batches}" for job, batches in await_finished.items()],
|
||||
)
|
||||
GHActions.print_in_group(
|
||||
"Remaining jobs:",
|
||||
[f"{job}:{params['batches']}" for job, params in jobs_with_params.items()],
|
||||
)
|
||||
return await_finished
|
||||
|
||||
|
||||
def get_check_name(check_name: str, batch: int, num_batches: int) -> str:
|
||||
@ -734,6 +814,12 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
|
||||
default=False,
|
||||
help="skip fetching data about job runs, used in --configure action (for debugging and nigthly ci)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Used with --run, force the job to run, omitting the ci cache",
|
||||
)
|
||||
# FIXME: remove, not used
|
||||
parser.add_argument(
|
||||
"--rebuild-all-binaries",
|
||||
@ -832,7 +918,10 @@ def _pre_action(s3, indata, pr_info):
|
||||
ci_cache = CiCache(s3, indata["jobs_data"]["digests"])
|
||||
|
||||
# for release/master branches reports must be from the same branches
|
||||
report_prefix = pr_info.head_ref if pr_info.number == 0 else ""
|
||||
report_prefix = normalize_string(pr_info.head_ref) if pr_info.number == 0 else ""
|
||||
print(
|
||||
f"Use report prefix [{report_prefix}], pr_num [{pr_info.number}], head_ref [{pr_info.head_ref}]"
|
||||
)
|
||||
reports_files = ci_cache.download_build_reports(file_prefix=report_prefix)
|
||||
print(f"Pre action done. Report files [{reports_files}] have been downloaded")
|
||||
|
||||
@ -883,8 +972,19 @@ def _mark_success_action(
|
||||
job, batch, num_batches, job_status, pr_info.is_release_branch()
|
||||
)
|
||||
print(f"Job [{job}] is ok")
|
||||
elif job_status:
|
||||
print(f"Job [{job}] is not ok, status [{job_status.status}]")
|
||||
elif job_status and not job_status.is_ok():
|
||||
ci_cache.push_failed(
|
||||
job, batch, num_batches, job_status, pr_info.is_release_branch()
|
||||
)
|
||||
print(f"Job [{job}] is failed with status [{job_status.status}]")
|
||||
else:
|
||||
job_status = CommitStatusData(
|
||||
description="dummy description", status=ERROR, report_url="dummy url"
|
||||
)
|
||||
ci_cache.push_failed(
|
||||
job, batch, num_batches, job_status, pr_info.is_release_branch()
|
||||
)
|
||||
print(f"No CommitStatusData for [{job}], push dummy failure to ci_cache")
|
||||
|
||||
|
||||
def _print_results(result: Any, outfile: Optional[str], pretty: bool = False) -> None:
|
||||
@ -992,8 +1092,8 @@ def _configure_jobs(
|
||||
jobs_to_do: List[str] = []
|
||||
jobs_to_skip: List[str] = []
|
||||
digests: Dict[str, str] = {}
|
||||
print("::group::Job Digests")
|
||||
|
||||
print("::group::Job Digests")
|
||||
for job in CI_CONFIG.job_generator():
|
||||
digest = job_digester.get_job_digest(CI_CONFIG.get_digest_config(job))
|
||||
digests[job] = digest
|
||||
@ -1003,7 +1103,8 @@ def _configure_jobs(
|
||||
## b. check what we need to run
|
||||
ci_cache = None
|
||||
if not ci_cache_disabled:
|
||||
ci_cache = CiCache(s3, digests)
|
||||
ci_cache = CiCache(s3, digests).update()
|
||||
ci_cache.print_status()
|
||||
|
||||
jobs_to_wait: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
@ -1012,10 +1113,13 @@ def _configure_jobs(
|
||||
job_config = CI_CONFIG.get_job_config(job)
|
||||
num_batches: int = job_config.num_batches
|
||||
batches_to_do: List[int] = []
|
||||
add_to_skip = False
|
||||
|
||||
for batch in range(num_batches): # type: ignore
|
||||
if job_config.pr_only and pr_info.is_release_branch():
|
||||
continue
|
||||
if job_config.release_only and not pr_info.is_release_branch():
|
||||
continue
|
||||
if job_config.run_by_label:
|
||||
# this job controlled by label, add to todo if its label is set in pr
|
||||
if job_config.run_by_label in pr_info.labels:
|
||||
@ -1036,7 +1140,13 @@ def _configure_jobs(
|
||||
batches_to_do.append(batch)
|
||||
|
||||
# check if it's pending in the cache
|
||||
if ci_cache.is_pending(job, batch, num_batches, release_branch=False):
|
||||
if ci_cache.is_pending(
|
||||
job,
|
||||
batch,
|
||||
num_batches,
|
||||
release_branch=pr_info.is_release_branch()
|
||||
and job_config.required_on_release_branch,
|
||||
):
|
||||
if job in jobs_to_wait:
|
||||
jobs_to_wait[job]["batches"].append(batch)
|
||||
else:
|
||||
@ -1044,10 +1154,12 @@ def _configure_jobs(
|
||||
"batches": [batch],
|
||||
"num_batches": num_batches,
|
||||
}
|
||||
else:
|
||||
add_to_skip = True
|
||||
|
||||
if batches_to_do:
|
||||
jobs_to_do.append(job)
|
||||
elif not job_config.run_by_label:
|
||||
elif add_to_skip:
|
||||
# treat job as being skipped only if it's controlled by digest
|
||||
jobs_to_skip.append(job)
|
||||
jobs_params[job] = {
|
||||
@ -1119,29 +1231,19 @@ def _configure_jobs(
|
||||
"digests": digests,
|
||||
"jobs_to_do": jobs_to_do,
|
||||
"jobs_to_skip": jobs_to_skip,
|
||||
"jobs_to_wait": jobs_to_wait,
|
||||
"jobs_to_wait": {
|
||||
job: params for job, params in jobs_to_wait.items() if job in jobs_to_do
|
||||
},
|
||||
"jobs_params": {
|
||||
job: params for job, params in jobs_params.items() if job in jobs_to_do
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None:
|
||||
if indata["ci_flags"][Labels.NO_CI_CACHE]:
|
||||
print("CI cache is disabled - skip restoring commit statuses from CI cache")
|
||||
return
|
||||
job_digests = indata["jobs_data"]["digests"]
|
||||
ci_cache = CiCache(s3, job_digests).update().fetch_records_data()
|
||||
|
||||
# create GH status
|
||||
pr_info = PRInfo()
|
||||
commit = get_commit(Github(get_best_robot_token(), per_page=100), pr_info.sha)
|
||||
|
||||
def _run_create_status(job: str, batch: int, num_batches: int) -> None:
|
||||
job_status = ci_cache.get_successful(job, batch, num_batches)
|
||||
if not job_status:
|
||||
return
|
||||
print(f"Going to re-create GH status for job [{job}] sha [{pr_info.sha}]")
|
||||
def _create_gh_status(
|
||||
commit: Any, job: str, batch: int, num_batches: int, job_status: CommitStatusData
|
||||
) -> None:
|
||||
print(f"Going to re-create GH status for job [{job}]")
|
||||
assert job_status.status == SUCCESS, "BUG!"
|
||||
commit.create_status(
|
||||
state=job_status.status,
|
||||
@ -1153,15 +1255,40 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None:
|
||||
context=get_check_name(job, batch=batch, num_batches=num_batches),
|
||||
)
|
||||
|
||||
|
||||
def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None:
|
||||
if indata["ci_flags"][Labels.NO_CI_CACHE]:
|
||||
print("CI cache is disabled - skip restoring commit statuses from CI cache")
|
||||
return
|
||||
job_digests = indata["jobs_data"]["digests"]
|
||||
jobs_to_skip = indata["jobs_data"]["jobs_to_skip"]
|
||||
jobs_to_do = indata["jobs_data"]["jobs_to_do"]
|
||||
ci_cache = CiCache(s3, job_digests).update().fetch_records_data().print_status()
|
||||
|
||||
# create GH status
|
||||
pr_info = PRInfo()
|
||||
commit = get_commit(Github(get_best_robot_token(), per_page=100), pr_info.sha)
|
||||
|
||||
def _concurrent_create_status(job: str, batch: int, num_batches: int) -> None:
|
||||
job_status = ci_cache.get_successful(job, batch, num_batches)
|
||||
if not job_status:
|
||||
return
|
||||
_create_gh_status(commit, job, batch, num_batches, job_status)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
futures = []
|
||||
for job in job_digests:
|
||||
if job not in jobs_to_skip and job not in jobs_to_do:
|
||||
# no need to create status for job that are not supposed to be executed
|
||||
continue
|
||||
if CI_CONFIG.is_build_job(job):
|
||||
# no GH status for build jobs
|
||||
continue
|
||||
num_batches = CI_CONFIG.get_job_config(job).num_batches
|
||||
for batch in range(num_batches):
|
||||
future = executor.submit(_run_create_status, job, batch, num_batches)
|
||||
future = executor.submit(
|
||||
_concurrent_create_status, job, batch, num_batches
|
||||
)
|
||||
futures.append(future)
|
||||
done, _ = concurrent.futures.wait(futures)
|
||||
for future in done:
|
||||
@ -1194,7 +1321,7 @@ def _upload_build_artifacts(
|
||||
(
|
||||
get_release_or_pr(pr_info, get_version_from_repo())[1],
|
||||
pr_info.sha,
|
||||
CI_CONFIG.normalize_string(build_name),
|
||||
normalize_string(build_name),
|
||||
"performance.tar.zst",
|
||||
)
|
||||
)
|
||||
@ -1509,19 +1636,37 @@ def main() -> int:
|
||||
if not args.skip_jobs and pr_info.has_changes_in_documentation_only():
|
||||
_update_config_for_docs_only(jobs_data)
|
||||
|
||||
# TODO: await pending jobs
|
||||
# wait for pending jobs to be finished, await_jobs is a long blocking call if any job has to be awaited
|
||||
# awaited_jobs = ci_cache.await_jobs(jobs_data.get("jobs_to_wait", {}))
|
||||
# for job in awaited_jobs:
|
||||
# jobs_to_do = jobs_data["jobs_to_do"]
|
||||
# if job in jobs_to_do:
|
||||
# jobs_to_do.remove(job)
|
||||
# else:
|
||||
# assert False, "BUG"
|
||||
if not args.skip_jobs:
|
||||
ci_cache = CiCache(s3, jobs_data["digests"])
|
||||
|
||||
if (
|
||||
pr_info.is_release_branch()
|
||||
or pr_info.event.get("pull_request", {})
|
||||
.get("user", {})
|
||||
.get("login", "not_maxknv")
|
||||
== "maxknv"
|
||||
):
|
||||
# wait for pending jobs to be finished, await_jobs is a long blocking call
|
||||
# wait pending jobs (for now only on release/master branches)
|
||||
ready_jobs_batches_dict = ci_cache.await_jobs(
|
||||
jobs_data.get("jobs_to_wait", {}), pr_info.is_release_branch()
|
||||
)
|
||||
jobs_to_do = jobs_data["jobs_to_do"]
|
||||
jobs_to_skip = jobs_data["jobs_to_skip"]
|
||||
jobs_params = jobs_data["jobs_params"]
|
||||
for job, batches in ready_jobs_batches_dict.items():
|
||||
if job not in jobs_params:
|
||||
print(f"WARNING: Job [{job}] is not in the params list")
|
||||
continue
|
||||
for batch in batches:
|
||||
jobs_params[job]["batches"].remove(batch)
|
||||
if not jobs_params[job]["batches"]:
|
||||
jobs_to_do.remove(job)
|
||||
jobs_to_skip.append(job)
|
||||
del jobs_params[job]
|
||||
|
||||
# set planned jobs as pending in the CI cache if on the master
|
||||
if pr_info.is_master() and not args.skip_jobs:
|
||||
ci_cache = CiCache(s3, jobs_data["digests"])
|
||||
if pr_info.is_master():
|
||||
for job in jobs_data["jobs_to_do"]:
|
||||
config = CI_CONFIG.get_job_config(job)
|
||||
if config.run_always or config.run_by_label:
|
||||
@ -1534,6 +1679,9 @@ def main() -> int:
|
||||
release_branch=pr_info.is_release_branch(),
|
||||
)
|
||||
|
||||
if "jobs_to_wait" in jobs_data:
|
||||
del jobs_data["jobs_to_wait"]
|
||||
|
||||
# conclude results
|
||||
result["git_ref"] = git_ref
|
||||
result["version"] = version
|
||||
@ -1608,25 +1756,17 @@ def main() -> int:
|
||||
check_name, args.batch, job_config.num_batches
|
||||
)
|
||||
assert job_status, "BUG"
|
||||
commit.create_status(
|
||||
state=job_status.status,
|
||||
target_url=job_status.report_url,
|
||||
description=format_description(
|
||||
f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: "
|
||||
f"{job_status.description}"
|
||||
),
|
||||
context=get_check_name(
|
||||
_create_gh_status(
|
||||
commit,
|
||||
check_name,
|
||||
batch=args.batch,
|
||||
num_batches=job_config.num_batches,
|
||||
),
|
||||
args.batch,
|
||||
job_config.num_batches,
|
||||
job_status,
|
||||
)
|
||||
previous_status = job_status.status
|
||||
print("::group::Commit Status Data")
|
||||
print(job_status)
|
||||
print("::endgroup::")
|
||||
GHActions.print_in_group("Commit Status Data", job_status)
|
||||
|
||||
if previous_status:
|
||||
if previous_status and not args.force:
|
||||
print(
|
||||
f"Commit status or Build Report is already present - job will be skipped with status: [{previous_status}]"
|
||||
)
|
||||
@ -1648,7 +1788,7 @@ def main() -> int:
|
||||
if CI_CONFIG.is_build_job(args.job_name):
|
||||
assert (
|
||||
indata
|
||||
), "--infile with config must be provided for POST action of a build type job [{args.job_name}]"
|
||||
), f"--infile with config must be provided for POST action of a build type job [{args.job_name}]"
|
||||
build_name = args.job_name
|
||||
s3_path_prefix = "/".join(
|
||||
(
|
||||
@ -1676,7 +1816,7 @@ def main() -> int:
|
||||
(
|
||||
get_release_or_pr(pr_info, get_version_from_repo())[0],
|
||||
pr_info.sha,
|
||||
CI_CONFIG.normalize_string(
|
||||
normalize_string(
|
||||
job_report.check_name or _get_ext_check_name(args.job_name)
|
||||
),
|
||||
)
|
||||
|
@ -22,6 +22,8 @@ class Labels(metaclass=WithIter):
|
||||
CI_SET_ARM = "ci_set_arm"
|
||||
CI_SET_INTEGRATION = "ci_set_integration"
|
||||
|
||||
libFuzzer = "libFuzzer"
|
||||
|
||||
|
||||
class Build(metaclass=WithIter):
|
||||
PACKAGE_RELEASE = "package_release"
|
||||
@ -138,8 +140,6 @@ class JobNames(metaclass=WithIter):
|
||||
DOCS_CHECK = "Docs check"
|
||||
BUGFIX_VALIDATE = "tests bugfix validate check"
|
||||
|
||||
MARK_RELEASE_READY = "Mark Commit Release Ready"
|
||||
|
||||
|
||||
# dynamically update JobName with Build jobs
|
||||
for attr_name in dir(Build):
|
||||
@ -193,6 +193,8 @@ class JobConfig:
|
||||
required_on_release_branch: bool = False
|
||||
# job is for pr workflow only
|
||||
pr_only: bool = False
|
||||
# job is for release/master branches only
|
||||
release_only: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -790,6 +792,7 @@ CI_CONFIG = CiConfig(
|
||||
name=Build.FUZZERS,
|
||||
compiler="clang-17",
|
||||
package_type="fuzzers",
|
||||
job_config=JobConfig(run_by_label=Labels.libFuzzer),
|
||||
),
|
||||
},
|
||||
builds_report_config={
|
||||
@ -823,9 +826,6 @@ CI_CONFIG = CiConfig(
|
||||
),
|
||||
},
|
||||
other_jobs_configs={
|
||||
JobNames.MARK_RELEASE_READY: TestConfig(
|
||||
"", job_config=JobConfig(required_on_release_branch=True)
|
||||
),
|
||||
JobNames.DOCKER_SERVER: TestConfig(
|
||||
"",
|
||||
job_config=JobConfig(
|
||||
@ -909,13 +909,6 @@ CI_CONFIG = CiConfig(
|
||||
JobNames.STATEFUL_TEST_AARCH64: TestConfig(
|
||||
Build.PACKAGE_AARCH64, job_config=JobConfig(**stateful_test_common_params) # type: ignore
|
||||
),
|
||||
# FIXME: delete?
|
||||
# "Stateful tests (release, DatabaseOrdinary)": TestConfig(
|
||||
# Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore
|
||||
# ),
|
||||
# "Stateful tests (release, DatabaseReplicated)": TestConfig(
|
||||
# Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore
|
||||
# ),
|
||||
# Stateful tests for parallel replicas
|
||||
JobNames.STATEFUL_TEST_PARALLEL_REPL_RELEASE: TestConfig(
|
||||
Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore
|
||||
@ -997,16 +990,16 @@ CI_CONFIG = CiConfig(
|
||||
Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore
|
||||
),
|
||||
JobNames.UPGRADE_TEST_ASAN: TestConfig(
|
||||
Build.PACKAGE_ASAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore
|
||||
Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore
|
||||
),
|
||||
JobNames.UPGRADE_TEST_TSAN: TestConfig(
|
||||
Build.PACKAGE_TSAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore
|
||||
Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore
|
||||
),
|
||||
JobNames.UPGRADE_TEST_MSAN: TestConfig(
|
||||
Build.PACKAGE_MSAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore
|
||||
Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore
|
||||
),
|
||||
JobNames.UPGRADE_TEST_DEBUG: TestConfig(
|
||||
Build.PACKAGE_DEBUG, job_config=JobConfig(**upgrade_test_common_params) # type: ignore
|
||||
Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore
|
||||
),
|
||||
JobNames.INTEGRATION_TEST_ASAN: TestConfig(
|
||||
Build.PACKAGE_ASAN,
|
||||
@ -1033,7 +1026,7 @@ CI_CONFIG = CiConfig(
|
||||
job_config=JobConfig(num_batches=4, **integration_test_common_params), # type: ignore
|
||||
),
|
||||
JobNames.INTEGRATION_TEST_FLAKY: TestConfig(
|
||||
Build.PACKAGE_ASAN, job_config=JobConfig(**integration_test_common_params) # type: ignore
|
||||
Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **integration_test_common_params) # type: ignore
|
||||
),
|
||||
JobNames.COMPATIBILITY_TEST: TestConfig(
|
||||
Build.PACKAGE_RELEASE,
|
||||
@ -1080,7 +1073,7 @@ CI_CONFIG = CiConfig(
|
||||
JobNames.STATELESS_TEST_FLAKY_ASAN: TestConfig(
|
||||
# replace to non-default
|
||||
Build.PACKAGE_ASAN,
|
||||
job_config=JobConfig(**{**statless_test_common_params, "timeout": 3600}), # type: ignore
|
||||
job_config=JobConfig(pr_only=True, **{**statless_test_common_params, "timeout": 3600}), # type: ignore
|
||||
),
|
||||
JobNames.JEPSEN_KEEPER: TestConfig(
|
||||
Build.BINARY_RELEASE,
|
||||
@ -1116,7 +1109,7 @@ CI_CONFIG = CiConfig(
|
||||
),
|
||||
JobNames.CLCIKBENCH_TEST: TestConfig(Build.PACKAGE_RELEASE),
|
||||
JobNames.CLCIKBENCH_TEST_ARM: TestConfig(Build.PACKAGE_AARCH64),
|
||||
JobNames.LIBFUZZER_TEST: TestConfig(Build.FUZZERS), # type: ignore
|
||||
JobNames.LIBFUZZER_TEST: TestConfig(Build.FUZZERS, job_config=JobConfig(run_by_label=Labels.libFuzzer)), # type: ignore
|
||||
},
|
||||
)
|
||||
CI_CONFIG.validate()
|
||||
|
@ -1,6 +1,6 @@
|
||||
from contextlib import contextmanager
|
||||
import os
|
||||
from typing import List, Union, Iterator
|
||||
from typing import Any, List, Union, Iterator
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@ -27,9 +27,22 @@ def is_hex(s):
|
||||
return False
|
||||
|
||||
|
||||
def normalize_string(string: str) -> str:
|
||||
lowercase_string = string.lower()
|
||||
normalized_string = (
|
||||
lowercase_string.replace(" ", "_")
|
||||
.replace("-", "_")
|
||||
.replace("/", "_")
|
||||
.replace("(", "")
|
||||
.replace(")", "")
|
||||
.replace(",", "")
|
||||
)
|
||||
return normalized_string
|
||||
|
||||
|
||||
class GHActions:
|
||||
@staticmethod
|
||||
def print_in_group(group_name: str, lines: Union[str, List[str]]) -> None:
|
||||
def print_in_group(group_name: str, lines: Union[Any, List[Any]]) -> None:
|
||||
lines = list(lines)
|
||||
print(f"::group::{group_name}")
|
||||
for line in lines:
|
||||
|
@ -370,6 +370,9 @@ class CommitStatusData:
|
||||
def is_ok(self):
|
||||
return self.status == SUCCESS
|
||||
|
||||
def is_failure(self):
|
||||
return self.status == FAILURE
|
||||
|
||||
@staticmethod
|
||||
def cleanup():
|
||||
STATUS_FILE_PATH.unlink(missing_ok=True)
|
||||
|
@ -44,6 +44,7 @@ TRUSTED_CONTRIBUTORS = {
|
||||
"kitaisreal",
|
||||
"k-morozov", # Konstantin Morozov, Yandex Cloud
|
||||
"justindeguzman", # ClickHouse, Inc
|
||||
"jrdi", # ClickHouse contributor, TinyBird
|
||||
]
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user