Compare commits

...

38 Commits

Author SHA1 Message Date
Yakov Olkhovskiy
90f3f0de05
Merge 19e2197582 into d793e06860 2024-09-16 14:38:30 +00:00
Yakov Olkhovskiy
19e2197582
fix 2024-09-16 10:38:28 -04:00
Rich Raposa
d793e06860
Merge pull request #69622 from Olexandr88/patch-1
Docs: Update index
2024-09-16 13:12:30 +00:00
Yakov Olkhovskiy
d223c4547f
fix after master merge 2024-09-16 08:35:05 -04:00
vdimir
1986fb1418
Merge pull request #68595 from ClickHouse/vdimir/fix_function_printf_style
Fix style in Functions/printf.cpp
2024-09-16 12:34:31 +00:00
Yakov Olkhovskiy
58993d3f3b
Merge branch 'master' into refactor-secret-finder 2024-09-16 08:33:16 -04:00
Mikhail f. Shiryaev
f36408a666
Merge pull request #69599 from ClickHouse/local-debug
Improve debug step in actions
2024-09-16 10:20:12 +00:00
Yarik Briukhovetskyi
de85f5f251
empty commit (I've changed the changelog entry) 2024-09-16 12:15:11 +02:00
Oleksandr
85af661b9c
Docs: Update index 2024-09-16 12:57:24 +03:00
Antonio Andelic
b42c6491e4
Merge pull request #69578 from ClickHouse/issues/68932/enable_in_ci
enable removeRecursive in CI
2024-09-16 09:03:11 +00:00
Robert Schulze
1a4c7b7c61
Merge pull request #69493 from ucasfl/vector-index-insert
Speedup insert performance of vector similarity index by parallelization
2024-09-16 08:54:02 +00:00
Oleksandr
14feba8443
Docs: Update index 2024-09-16 11:42:09 +03:00
vdimir
4c4a051d5e
Merge pull request #69075 from kirillgarbar/remove-stale-moving-parts
Remove stale moving parts without zookeeper
2024-09-16 08:02:05 +00:00
Vitaly Baranov
a55cc03973
Merge pull request #69611 from vitlibar/masking-sensitive-info-in-gcs-table-function
Masking sensitive info in gcs() table function
2024-09-16 07:58:17 +00:00
Robert Schulze
37411bf240
Fix sizing with unconstrained thread pool size 2024-09-15 15:06:14 +00:00
Yakov Olkhovskiy
6f63a7b213 fix tidy 2024-09-14 16:46:48 +00:00
Yakov Olkhovskiy
56cfa74a14 fix 2024-09-14 13:32:52 +00:00
Yakov Olkhovskiy
dbb1d043fe unification of FunctionSecretArgumentsFinder 2024-09-14 05:46:08 +00:00
Vitaly Baranov
a461d20af9 Masking sensitive info in gcs() table function. 2024-09-13 23:03:56 +02:00
Mikhail f. Shiryaev
b55d0b54ea
Merge steps together to minimize grouping 2024-09-13 17:35:09 +02:00
Mikhail f. Shiryaev
418ef3f8bc
Use local debug action in every action 2024-09-13 17:20:49 +02:00
Mikhail f. Shiryaev
b420bbf855
Improve debug action 2024-09-13 17:17:10 +02:00
Кирилл Гарбар
6a7cfd13f7 Set PRESERVE_BLOBS if part is fetched from another replica 2024-09-13 15:25:17 +03:00
Mikhail Artemenko
baf6aaef1d fix tests 2024-09-13 11:32:33 +00:00
Robert Schulze
9ca149a487
Fix GWP-asan crash 2024-09-13 11:07:09 +00:00
Mikhail Artemenko
042194e3f6 enable removeRecursive in CI 2024-09-13 08:50:28 +00:00
Кирилл Гарбар
120e38c72a Merge remote-tracking branch 'kirillgarbar/master' into remove-stale-moving-parts 2024-09-12 19:26:58 +03:00
Robert Schulze
38b5ea9066
Fix docs 2024-09-12 12:43:27 +00:00
Robert Schulze
fe5e061fff
Some fixups 2024-09-12 10:38:14 +00:00
flynn
f6b965872f Merge branch 'master' of github.com:ClickHouse/ClickHouse into vector-index-insert 2024-09-12 06:40:33 +00:00
flynn
22c3b71196 Make vector similarity index creation thread pool globally 2024-09-12 03:54:25 +00:00
flynn
7425d4aa1a remove blank line 2024-09-11 10:12:42 +00:00
flynn
cf12e3924f Speedup insert data with vector similarity index by add data to index parallel 2024-09-11 09:31:46 +00:00
vdimir
cfc931160d
Merge branch 'master' into vdimir/fix_function_printf_style 2024-09-02 16:05:02 +02:00
Кирилл Гарбар
b2c4b771d8 Minor fixes 2024-08-29 19:33:04 +03:00
Кирилл Гарбар
edf4e09fb2 Remove stale moving parts without zookeeper 2024-08-29 18:46:06 +03:00
vdimir
07f44fdb89
Merge branch 'master' into vdimir/fix_function_printf_style 2024-08-22 11:23:50 +02:00
vdimir
2fcbe2465a
Fix style in Functions/printf.cpp 2024-08-20 09:07:15 +00:00
34 changed files with 980 additions and 911 deletions

View File

@ -4,15 +4,31 @@ description: Prints workflow debug info
runs: runs:
using: "composite" using: "composite"
steps: steps:
- name: Print envs - name: Envs, event.json and contexts
shell: bash shell: bash
run: | run: |
echo "::group::Envs" echo '::group::Environment variables'
env env | sort
echo "::endgroup::" echo '::endgroup::'
- name: Print Event.json
shell: bash echo '::group::event.json'
run: |
echo "::group::Event.json"
python3 -m json.tool "$GITHUB_EVENT_PATH" python3 -m json.tool "$GITHUB_EVENT_PATH"
echo "::endgroup::" echo '::endgroup::'
cat << 'EOF'
::group::github context
${{ toJSON(github) }}
::endgroup::
::group::env context
${{ toJSON(env) }}
::endgroup::
::group::runner context
${{ toJSON(runner) }}
::endgroup::
::group::job context
${{ toJSON(job) }}
::endgroup::
EOF

View File

@ -27,6 +27,8 @@ jobs:
clear-repository: true # to ensure correct digests clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version fetch-depth: 0 # to get version
filter: tree:0 filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Labels check - name: Labels check
run: | run: |
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -33,6 +33,8 @@ jobs:
clear-repository: true clear-repository: true
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0 fetch-depth: 0
- name: Debug Info
uses: ./.github/actions/debug
- name: Cherry pick - name: Cherry pick
run: | run: |
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -56,13 +56,13 @@ jobs:
GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
runs-on: [self-hosted, release-maker] runs-on: [self-hosted, release-maker]
steps: steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0 fetch-depth: 0
- name: Debug Info
uses: ./.github/actions/debug
- name: Prepare Release Info - name: Prepare Release Info
shell: bash shell: bash
run: | run: |

View File

@ -11,6 +11,7 @@ name: Build docker images
required: false required: false
type: boolean type: boolean
default: false default: false
jobs: jobs:
DockerBuildAarch64: DockerBuildAarch64:
runs-on: [self-hosted, style-checker-aarch64] runs-on: [self-hosted, style-checker-aarch64]

View File

@ -8,20 +8,21 @@ on: # yamllint disable-line rule:truthy
schedule: schedule:
- cron: '0 */6 * * *' - cron: '0 */6 * * *'
workflow_dispatch: workflow_dispatch:
jobs: jobs:
RunConfig: RunConfig:
runs-on: [self-hosted, style-checker-aarch64] runs-on: [self-hosted, style-checker-aarch64]
outputs: outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }} data: ${{ steps.runconfig.outputs.CI_DATA }}
steps: steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true # to ensure correct digests clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version fetch-depth: 0 # to get version
filter: tree:0 filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: PrepareRunConfig - name: PrepareRunConfig
id: runconfig id: runconfig
run: | run: |

View File

@ -15,14 +15,14 @@ jobs:
outputs: outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }} data: ${{ steps.runconfig.outputs.CI_DATA }}
steps: steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true # to ensure correct digests clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version fetch-depth: 0 # to get version
filter: tree:0 filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Merge sync PR - name: Merge sync PR
run: | run: |
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -14,14 +14,14 @@ jobs:
outputs: outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }} data: ${{ steps.runconfig.outputs.CI_DATA }}
steps: steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true # to ensure correct digests clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get a version fetch-depth: 0 # to get a version
filter: tree:0 filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Cancel PR workflow - name: Cancel PR workflow
run: | run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run

View File

@ -15,14 +15,14 @@ jobs:
outputs: outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }} data: ${{ steps.runconfig.outputs.CI_DATA }}
steps: steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true # to ensure correct digests clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version fetch-depth: 0 # to get version
filter: tree:0 filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: PrepareRunConfig - name: PrepareRunConfig
id: runconfig id: runconfig
run: | run: |

View File

@ -25,14 +25,14 @@ jobs:
outputs: outputs:
data: ${{ steps.runconfig.outputs.CI_DATA }} data: ${{ steps.runconfig.outputs.CI_DATA }}
steps: steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true # to ensure correct digests clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get a version fetch-depth: 0 # to get a version
filter: tree:0 filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Cancel previous Sync PR workflow - name: Cancel previous Sync PR workflow
run: | run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run

View File

@ -24,6 +24,8 @@ jobs:
clear-repository: true # to ensure correct digests clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version fetch-depth: 0 # to get version
filter: tree:0 filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Labels check - name: Labels check
run: | run: |
cd "$GITHUB_WORKSPACE/tests/ci" cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -62,8 +62,6 @@ jobs:
env: env:
GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}} GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}
steps: steps:
- name: DebugInfo
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code - name: Check out repository code
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
@ -72,6 +70,8 @@ jobs:
submodules: ${{inputs.submodules}} submodules: ${{inputs.submodules}}
fetch-depth: ${{inputs.checkout_depth}} fetch-depth: ${{inputs.checkout_depth}}
filter: tree:0 filter: tree:0
- name: Debug Info
uses: ./.github/actions/debug
- name: Set build envs - name: Set build envs
run: | run: |
cat >> "$GITHUB_ENV" << 'EOF' cat >> "$GITHUB_ENV" << 'EOF'

View File

@ -13,16 +13,17 @@ Here is a complete list of available database engines. Follow the links for more
- [Atomic](../../engines/database-engines/atomic.md) - [Atomic](../../engines/database-engines/atomic.md)
- [MySQL](../../engines/database-engines/mysql.md) - [Lazy](../../engines/database-engines/lazy.md)
- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md)
- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) - [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md)
- [Lazy](../../engines/database-engines/lazy.md) - [MySQL](../../engines/database-engines/mysql.md)
- [PostgreSQL](../../engines/database-engines/postgresql.md) - [PostgreSQL](../../engines/database-engines/postgresql.md)
- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md)
- [Replicated](../../engines/database-engines/replicated.md) - [Replicated](../../engines/database-engines/replicated.md)
- [SQLite](../../engines/database-engines/sqlite.md) - [SQLite](../../engines/database-engines/sqlite.md)

View File

@ -107,6 +107,10 @@ The vector similarity index currently does not work with per-table, non-default
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml. [here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
::: :::
Vector index creation is known to be slow. To speed the process up, index creation can be parallelized. The maximum number of threads can be
configured using server configuration
setting [max_build_vector_similarity_index_thread_pool_size](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters_max_build_vector_similarity_index_thread_pool_size).
ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
requests. requests.

View File

@ -491,6 +491,14 @@ Type: Double
Default: 0.9 Default: 0.9
## max_build_vector_similarity_index_thread_pool_size {#server_configuration_parameters_max_build_vector_similarity_index_thread_pool_size}
The maximum number of threads to use for building vector indexes. 0 means all cores.
Type: UInt64
Default: 16
## cgroups_memory_usage_observer_wait_time ## cgroups_memory_usage_observer_wait_time
Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see

View File

@ -3,195 +3,27 @@
#include <Parsers/FunctionSecretArgumentsFinder.h> #include <Parsers/FunctionSecretArgumentsFinder.h>
#include <Analyzer/ConstantNode.h> #include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/IdentifierNode.h> #include <Analyzer/IdentifierNode.h>
#include <Analyzer/ListNode.h>
#include <Common/KnownObjectNames.h>
#include <Core/QualifiedTableName.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB namespace DB
{ {
class FunctionTreeNode : public AbstractFunction
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderTreeNode
{ {
public: public:
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_) : function(function_), arguments(function.getArguments()) class ArgumentTreeNode : public Argument
{ {
if (arguments.getNodes().empty()) public:
return; explicit ArgumentTreeNode(const IQueryTreeNode * argument_) : argument(argument_) {}
std::unique_ptr<AbstractFunction> getFunction() const override
findFunctionSecretArguments(); {
if (const auto * f = argument->as<FunctionNode>())
return std::make_unique<FunctionTreeNode>(*f);
return nullptr;
} }
bool isIdentifier() const override { return argument->as<IdentifierNode>(); }
struct Result bool tryGetString(String * res, bool allow_identifier) const override
{
/// Result constructed by default means no arguments will be hidden.
size_t start = static_cast<size_t>(-1);
size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
/// In all known cases secret arguments are consecutive
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
std::vector<std::string> nested_maps;
bool hasSecrets() const
{
return count != 0 || !nested_maps.empty();
}
};
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
private:
const FunctionNode & function;
const ListNode & arguments;
FunctionSecretArgumentsFinder::Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= arguments.getNodes().size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findFunctionSecretArguments()
{
const auto & name = function.getFunctionName();
if ((name == "mysql") || (name == "postgresql") || (name == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((name == "s3") || (name == "cosn") || (name == "oss") ||
(name == "deltaLake") || (name == "hudi") || (name == "iceberg"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (name == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((name == "remote") || (name == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((name == "encrypt") || (name == "decrypt") ||
(name == "aes_encrypt_mysql") || (name == "aes_decrypt_mysql") ||
(name == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (name == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
const auto & nodes = arguments.getNodes();
size_t count = nodes.size();
while (count > 0)
{
const FunctionNode * f = nodes.at(count - 1)->as<FunctionNode>();
if (!f)
break;
if (f->getFunctionName() == "headers")
result.nested_maps.push_back(f->getFunctionName());
else if (f->getFunctionName() != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= arguments.getNodes().size())
return false;
return tryGetStringFromArgument(arguments.getNodes()[arg_idx], res, allow_identifier);
}
static bool tryGetStringFromArgument(const QueryTreeNodePtr argument, String * res, bool allow_identifier = true)
{ {
if (const auto * literal = argument->as<ConstantNode>()) if (const auto * literal = argument->as<ConstantNode>())
{ {
@ -214,159 +46,46 @@ private:
return false; return false;
} }
private:
const IQueryTreeNode * argument = nullptr;
};
void findRemoteFunctionSecretArguments() class ArgumentsTreeNode : public Arguments
{ {
if (isNamedCollectionName(0)) public:
explicit ArgumentsTreeNode(const QueryTreeNodes * arguments_) : arguments(arguments_) {}
size_t size() const override { return arguments ? arguments->size() : 0; }
std::unique_ptr<Argument> at(size_t n) const override { return std::make_unique<ArgumentTreeNode>(arguments->at(n).get()); }
private:
const QueryTreeNodes * arguments = nullptr;
};
explicit FunctionTreeNode(const FunctionNode & function_) : function(&function_)
{ {
/// remote(named_collection, ..., password = 'password', ...) if (const auto & nodes = function->getArguments().getNodes(); !nodes.empty())
findSecretNamedArgument("password", 1); arguments = std::make_unique<ArgumentsTreeNode>(&nodes);
return;
} }
String name() const override { return function->getFunctionName(); }
private:
const FunctionNode * function = nullptr;
};
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of /// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// remote('addresses_expr', db.table) /// That involves passwords and secret keys.
if (arguments.getNodes().size() < 3) class FunctionSecretArgumentsFinderTreeNode : public FunctionSecretArgumentsFinder
{
public:
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_)
: FunctionSecretArgumentsFinder(std::make_unique<FunctionTreeNode>(function_))
{
if (!function->hasArguments())
return; return;
size_t arg_num = 1; findOrdinaryFunctionSecretArguments();
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
const auto * table_function = arguments.getNodes()[arg_num]->as<FunctionNode>();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->getFunctionName()))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
} }
/// Skip the current argument (which is either a database name or a qualified table name). FunctionSecretArgumentsFinder::Result getResult() const { return result; }
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (arguments.getNodes().empty())
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = arguments.getNodes().size() - 1;
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (arguments.getNodes().size() <= arg_idx)
return false;
const auto * identifier = arguments.getNodes()[arg_idx]->as<IdentifierNode>();
return identifier != nullptr;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < arguments.getNodes().size(); ++i)
{
const auto & argument = arguments.getNodes()[i];
const auto * equals_func = argument->as<FunctionNode>();
if (!equals_func || (equals_func->getFunctionName() != "equals"))
continue;
const auto * expr_list = equals_func->getArguments().as<ListNode>();
if (!expr_list)
continue;
const auto & equal_args = expr_list->getNodes();
if (equal_args.size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(equal_args[0], &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
}; };
} }

View File

@ -178,6 +178,9 @@
M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \ M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \ M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \ M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \
M(BuildVectorSimilarityIndexThreads, "Number of threads in the build vector similarity index thread pool.") \
M(BuildVectorSimilarityIndexThreadsActive, "Number of threads in the build vector similarity index thread pool running a task.") \
M(BuildVectorSimilarityIndexThreadsScheduled, "Number of queued or active jobs in the build vector similarity index thread pool.") \
\ \
M(DiskPlainRewritableAzureDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for AzureObjectStorage.") \ M(DiskPlainRewritableAzureDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for AzureObjectStorage.") \
M(DiskPlainRewritableLocalDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for LocalObjectStorage.") \ M(DiskPlainRewritableLocalDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for LocalObjectStorage.") \

View File

@ -63,6 +63,7 @@ static struct InitFiu
REGULAR(keepermap_fail_drop_data) \ REGULAR(keepermap_fail_drop_data) \
REGULAR(lazy_pipe_fds_fail_close) \ REGULAR(lazy_pipe_fds_fail_close) \
PAUSEABLE(infinite_sleep) \ PAUSEABLE(infinite_sleep) \
PAUSEABLE(stop_moving_part_before_swap_with_active) \
namespace FailPoints namespace FailPoints

View File

@ -50,7 +50,7 @@ namespace DB
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \ M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
M(String, default_database, "default", "Default database name.", 0) \ M(String, default_database, "default", "Default database name.", 0) \
M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \ M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \ M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting.", 0) \
M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \ M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \ M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \ M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \
@ -65,6 +65,7 @@ namespace DB
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \ M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
M(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \ M(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \
M(UInt64, max_build_vector_similarity_index_thread_pool_size, 16, "The maximum number of threads to use to build vector similarity indexes. 0 means all cores.", 0) \
\ \
/* Database Catalog */ \ /* Database Catalog */ \
M(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \ M(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \

View File

@ -50,13 +50,6 @@ private:
return executeNonconstant(input); return executeNonconstant(input);
} }
[[maybe_unused]] String toString() const
{
WriteBufferFromOwnString buf;
buf << "format:" << format << ", rows:" << rows << ", is_literal:" << is_literal << ", input:" << input.dumpStructure() << "\n";
return buf.str();
}
private: private:
ColumnWithTypeAndName executeLiteral(std::string_view literal) const ColumnWithTypeAndName executeLiteral(std::string_view literal) const
{ {
@ -231,9 +224,7 @@ public:
const auto & instruction = instructions[i]; const auto & instruction = instructions[i];
try try
{ {
// std::cout << "instruction[" << i << "]:" << instructions[i].toString() << std::endl;
concat_args[i] = instruction.execute(); concat_args[i] = instruction.execute();
// std::cout << "concat_args[" << i << "]:" << concat_args[i].dumpStructure() << std::endl;
} }
catch (const fmt::v9::format_error & e) catch (const fmt::v9::format_error & e)
{ {
@ -358,7 +349,14 @@ private:
REGISTER_FUNCTION(Printf) REGISTER_FUNCTION(Printf)
{ {
factory.registerFunction<FunctionPrintf>(); factory.registerFunction<FunctionPrintf>(
FunctionDocumentation{.description=R"(
The `printf` function formats the given string with the values (strings, integers, floating-points etc.) listed in the arguments, similar to printf function in C++.
The format string can contain format specifiers starting with `%` character.
Anything not contained in `%` and the following format specifier is considered literal text and copied verbatim into the output.
Literal `%` character can be escaped by `%%`.)", .examples{{"sum", "select printf('%%%s %s %d', 'Hello', 'World', 2024);", "%Hello World 2024"}}, .categories{"String"}
});
} }
} }

View File

@ -10,6 +10,7 @@
#include <Common/SensitiveDataMasker.h> #include <Common/SensitiveDataMasker.h>
#include <Common/Macros.h> #include <Common/Macros.h>
#include <Common/EventNotifier.h> #include <Common/EventNotifier.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/Stopwatch.h> #include <Common/Stopwatch.h>
#include <Common/formatReadable.h> #include <Common/formatReadable.h>
#include <Common/Throttler.h> #include <Common/Throttler.h>
@ -121,7 +122,6 @@
#include <Interpreters/InterpreterSelectWithUnionQuery.h> #include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <base/defines.h> #include <base/defines.h>
namespace fs = std::filesystem; namespace fs = std::filesystem;
namespace ProfileEvents namespace ProfileEvents
@ -164,6 +164,9 @@ namespace CurrentMetrics
extern const Metric TablesLoaderForegroundThreadsActive; extern const Metric TablesLoaderForegroundThreadsActive;
extern const Metric TablesLoaderForegroundThreadsScheduled; extern const Metric TablesLoaderForegroundThreadsScheduled;
extern const Metric IOWriterThreadsScheduled; extern const Metric IOWriterThreadsScheduled;
extern const Metric BuildVectorSimilarityIndexThreads;
extern const Metric BuildVectorSimilarityIndexThreadsActive;
extern const Metric BuildVectorSimilarityIndexThreadsScheduled;
extern const Metric AttachedTable; extern const Metric AttachedTable;
extern const Metric AttachedView; extern const Metric AttachedView;
extern const Metric AttachedDictionary; extern const Metric AttachedDictionary;
@ -297,6 +300,8 @@ struct ContextSharedPart : boost::noncopyable
mutable std::unique_ptr<ThreadPool> load_marks_threadpool; /// Threadpool for loading marks cache. mutable std::unique_ptr<ThreadPool> load_marks_threadpool; /// Threadpool for loading marks cache.
mutable OnceFlag prefetch_threadpool_initialized; mutable OnceFlag prefetch_threadpool_initialized;
mutable std::unique_ptr<ThreadPool> prefetch_threadpool; /// Threadpool for loading marks cache. mutable std::unique_ptr<ThreadPool> prefetch_threadpool; /// Threadpool for loading marks cache.
mutable OnceFlag build_vector_similarity_index_threadpool_initialized;
mutable std::unique_ptr<ThreadPool> build_vector_similarity_index_threadpool; /// Threadpool for vector-similarity index creation.
mutable UncompressedCachePtr index_uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks for MergeTree indices. mutable UncompressedCachePtr index_uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks for MergeTree indices.
mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex); /// Cache of query results. mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex); /// Cache of query results.
mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices. mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices.
@ -3297,6 +3302,21 @@ size_t Context::getPrefetchThreadpoolSize() const
return config.getUInt(".prefetch_threadpool_pool_size", 100); return config.getUInt(".prefetch_threadpool_pool_size", 100);
} }
ThreadPool & Context::getBuildVectorSimilarityIndexThreadPool() const
{
callOnce(shared->build_vector_similarity_index_threadpool_initialized, [&] {
size_t pool_size = shared->server_settings.max_build_vector_similarity_index_thread_pool_size > 0
? shared->server_settings.max_build_vector_similarity_index_thread_pool_size
: getNumberOfPhysicalCPUCores();
shared->build_vector_similarity_index_threadpool = std::make_unique<ThreadPool>(
CurrentMetrics::BuildVectorSimilarityIndexThreads,
CurrentMetrics::BuildVectorSimilarityIndexThreadsActive,
CurrentMetrics::BuildVectorSimilarityIndexThreadsScheduled,
pool_size);
});
return *shared->build_vector_similarity_index_threadpool;
}
BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
{ {
callOnce(shared->buffer_flush_schedule_pool_initialized, [&] { callOnce(shared->buffer_flush_schedule_pool_initialized, [&] {

View File

@ -1097,6 +1097,8 @@ public:
/// and make a prefetch by putting a read task to threadpoolReader. /// and make a prefetch by putting a read task to threadpoolReader.
size_t getPrefetchThreadpoolSize() const; size_t getPrefetchThreadpoolSize() const;
ThreadPool & getBuildVectorSimilarityIndexThreadPool() const;
/// Settings for MergeTree background tasks stored in config.xml /// Settings for MergeTree background tasks stored in config.xml
BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const; BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const;
BackgroundTaskSchedulingSettings getBackgroundMoveTaskSchedulingSettings() const; BackgroundTaskSchedulingSettings getBackgroundMoveTaskSchedulingSettings() const;

View File

@ -1,10 +1,42 @@
#pragma once #pragma once
#include <vector> #include <Common/KnownObjectNames.h>
#include <Core/QualifiedTableName.h>
#include <base/defines.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB namespace DB
{ {
class AbstractFunction
{
friend class FunctionSecretArgumentsFinder;
public:
class Argument
{
public:
virtual ~Argument() = default;
virtual std::unique_ptr<AbstractFunction> getFunction() const = 0;
virtual bool isIdentifier() const = 0;
virtual bool tryGetString(String * res, bool allow_identifier) const = 0;
};
class Arguments
{
public:
virtual ~Arguments() = default;
virtual size_t size() const = 0;
virtual std::unique_ptr<Argument> at(size_t n) const = 0;
};
virtual ~AbstractFunction() = default;
virtual String name() const = 0;
bool hasArguments() const { return !!arguments; }
protected:
std::unique_ptr<Arguments> arguments;
};
class FunctionSecretArgumentsFinder class FunctionSecretArgumentsFinder
{ {
public: public:
@ -23,6 +55,485 @@ public:
return count != 0 || !nested_maps.empty(); return count != 0 || !nested_maps.empty();
} }
}; };
explicit FunctionSecretArgumentsFinder(std::unique_ptr<AbstractFunction> && function_) : function(std::move(function_)) {}
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
protected:
const std::unique_ptr<AbstractFunction> function;
Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= function->arguments->size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findOrdinaryFunctionSecretArguments()
{
if ((function->name() == "mysql") || (function->name() == "postgresql") || (function->name() == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((function->name() == "s3") || (function->name() == "cosn") || (function->name() == "oss") ||
(function->name() == "deltaLake") || (function->name() == "hudi") || (function->name() == "iceberg") ||
(function->name() == "gcs"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function->name() == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if (function->name() == "azureBlobStorage")
{
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function->name() == "azureBlobStorageCluster")
{
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((function->name() == "remote") || (function->name() == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((function->name() == "encrypt") || (function->name() == "decrypt") ||
(function->name() == "aes_encrypt_mysql") || (function->name() == "aes_decrypt_mysql") ||
(function->name() == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (function->name() == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
size_t count = function->arguments->size();
while (count > 0)
{
const auto f = function->arguments->at(count - 1)->getFunction();
if (!f)
break;
if (f->name() == "headers")
result.nested_maps.push_back(f->name());
else if (f->name() != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
size_t count = function->arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
if (url_arg_idx + 4 < count)
markSecretArgument(url_arg_idx + 4);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= function->arguments->size())
return false;
return tryGetStringFromArgument(*function->arguments->at(arg_idx), res, allow_identifier);
}
static bool tryGetStringFromArgument(const AbstractFunction::Argument & argument, String * res, bool allow_identifier = true)
{
return argument.tryGetString(res, allow_identifier);
}
void findRemoteFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
}
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (function->arguments->size() < 3)
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
auto table_function = function->arguments->at(arg_num)->getFunction();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name()))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (function->arguments->size() == 0)
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = function->arguments->size() - 1;
}
void findTableEngineSecretArguments()
{
const String & engine_name = function->name();
if (engine_name == "ExternalDistributed")
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
findExternalDistributedTableEngineSecretArguments();
}
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
{
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
{
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
findS3TableEngineSecretArguments();
}
else if (engine_name == "URL")
{
findURLSecretArguments();
}
}
void findExternalDistributedTableEngineSecretArguments()
{
if (isNamedCollectionName(1))
{
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 2);
}
else
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
markSecretArgument(5);
}
}
void findS3TableEngineSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((3 <= count) && (count <= 4))
{
String second_arg;
if (tryGetStringFromArgument(1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (count == 3)
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
}
}
}
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (2 < count)
markSecretArgument(2);
}
void findDatabaseEngineSecretArguments()
{
const String & engine_name = function->name();
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL"))
{
/// MySQL('host:port', 'database', 'user', 'password')
/// PostgreSQL('host:port', 'database', 'user', 'password')
findMySQLDatabaseSecretArguments();
}
else if (engine_name == "S3")
{
/// S3('url', 'access_key_id', 'secret_access_key')
findS3DatabaseSecretArguments();
}
}
void findMySQLDatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// MySQL(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// MySQL('host:port', 'database', 'user', 'password')
markSecretArgument(3);
}
}
void findS3DatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'password', ...)
findSecretNamedArgument("secret_access_key", 1);
}
else
{
/// S3('url', 'access_key_id', 'secret_access_key')
markSecretArgument(2);
}
}
void findBackupNameSecretArguments()
{
const String & engine_name = function->name();
if (engine_name == "S3")
{
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
markSecretArgument(2);
}
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (function->arguments->size() <= arg_idx)
return false;
return function->arguments->at(arg_idx)->isIdentifier();
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < function->arguments->size(); ++i)
{
const auto & argument = function->arguments->at(i);
const auto equals_func = argument->getFunction();
if (!equals_func || (equals_func->name() != "equals"))
continue;
if (!equals_func->arguments || equals_func->arguments->size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(*equals_func->arguments->at(0), &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
}; };
} }

View File

@ -1,35 +1,97 @@
#pragma once #pragma once
#include <Parsers/FunctionSecretArgumentsFinder.h> #include <Parsers/FunctionSecretArgumentsFinder.h>
#include <Core/QualifiedTableName.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h> #include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h> #include <Parsers/ASTIdentifier.h>
#include <Common/KnownObjectNames.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB namespace DB
{ {
class FunctionAST : public AbstractFunction
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderAST
{ {
public: public:
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_) : function(function_) class ArgumentAST : public Argument
{ {
if (!function.arguments) public:
explicit ArgumentAST(const IAST * argument_) : argument(argument_) {}
std::unique_ptr<AbstractFunction> getFunction() const override
{
if (const auto * f = argument->as<ASTFunction>())
return std::make_unique<FunctionAST>(*f);
return nullptr;
}
bool isIdentifier() const override { return argument->as<ASTIdentifier>(); }
bool tryGetString(String * res, bool allow_identifier) const override
{
if (const auto * literal = argument->as<ASTLiteral>())
{
if (literal->value.getType() != Field::Types::String)
return false;
if (res)
*res = literal->value.safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument->as<ASTIdentifier>())
{
if (res)
*res = id->name();
return true;
}
}
return false;
}
private:
const IAST * argument = nullptr;
};
class ArgumentsAST : public Arguments
{
public:
explicit ArgumentsAST(const ASTs * arguments_) : arguments(arguments_) {}
size_t size() const override { return arguments ? arguments->size() : 0; }
std::unique_ptr<Argument> at(size_t n) const override
{
return std::make_unique<ArgumentAST>(arguments->at(n).get());
}
private:
const ASTs * arguments = nullptr;
};
explicit FunctionAST(const ASTFunction & function_) : function(&function_)
{
if (!function->arguments)
return; return;
const auto * expr_list = function.arguments->as<ASTExpressionList>(); const auto * expr_list = function->arguments->as<ASTExpressionList>();
if (!expr_list) if (!expr_list)
return; return;
arguments = &expr_list->children; arguments = std::make_unique<ArgumentsAST>(&expr_list->children);
switch (function.kind) }
String name() const override { return function->name; }
private:
const ASTFunction * function = nullptr;
};
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderAST : public FunctionSecretArgumentsFinder
{
public:
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_)
: FunctionSecretArgumentsFinder(std::make_unique<FunctionAST>(function_))
{
if (!function->hasArguments())
return;
switch (function_.kind)
{ {
case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break;
case ASTFunction::Kind::WINDOW_FUNCTION: break; case ASTFunction::Kind::WINDOW_FUNCTION: break;
@ -43,506 +105,7 @@ public:
} }
FunctionSecretArgumentsFinder::Result getResult() const { return result; } FunctionSecretArgumentsFinder::Result getResult() const { return result; }
private:
const ASTFunction & function;
const ASTs * arguments = nullptr;
FunctionSecretArgumentsFinder::Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= arguments->size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findOrdinaryFunctionSecretArguments()
{
if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") ||
(function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function.name == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if (function.name == "azureBlobStorage")
{
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function.name == "azureBlobStorageCluster")
{
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((function.name == "remote") || (function.name == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((function.name == "encrypt") || (function.name == "decrypt") ||
(function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") ||
(function.name == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (function.name == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
size_t count = arguments->size();
while (count > 0)
{
const ASTFunction * f = arguments->at(count - 1)->as<ASTFunction>();
if (!f)
break;
if (f->name == "headers")
result.nested_maps.push_back(f->name);
else if (f->name != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
size_t count = arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
if (url_arg_idx + 4 < count)
markSecretArgument(url_arg_idx + 4);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= arguments->size())
return false;
return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier);
}
static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true)
{
if (const auto * literal = argument.as<ASTLiteral>())
{
if (literal->value.getType() != Field::Types::String)
return false;
if (res)
*res = literal->value.safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument.as<ASTIdentifier>())
{
if (res)
*res = id->name();
return true;
}
}
return false;
}
void findRemoteFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
}
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (arguments->size() < 3)
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
const auto * table_function = (*arguments)[arg_num]->as<ASTFunction>();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (arguments->empty())
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = arguments->size() - 1;
}
void findTableEngineSecretArguments()
{
const String & engine_name = function.name;
if (engine_name == "ExternalDistributed")
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
findExternalDistributedTableEngineSecretArguments();
}
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
{
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
{
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
findS3TableEngineSecretArguments();
}
else if (engine_name == "URL")
{
findURLSecretArguments();
}
}
void findExternalDistributedTableEngineSecretArguments()
{
if (isNamedCollectionName(1))
{
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 2);
}
else
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
markSecretArgument(5);
}
}
void findS3TableEngineSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((3 <= count) && (count <= 4))
{
String second_arg;
if (tryGetStringFromArgument(1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (count == 3)
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
}
}
}
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (2 < count)
markSecretArgument(2);
}
void findDatabaseEngineSecretArguments()
{
const String & engine_name = function.name;
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL"))
{
/// MySQL('host:port', 'database', 'user', 'password')
/// PostgreSQL('host:port', 'database', 'user', 'password')
findMySQLDatabaseSecretArguments();
}
else if (engine_name == "S3")
{
/// S3('url', 'access_key_id', 'secret_access_key')
findS3DatabaseSecretArguments();
}
}
void findMySQLDatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// MySQL(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// MySQL('host:port', 'database', 'user', 'password')
markSecretArgument(3);
}
}
void findS3DatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'password', ...)
findSecretNamedArgument("secret_access_key", 1);
}
else
{
/// S3('url', 'access_key_id', 'secret_access_key')
markSecretArgument(2);
}
}
void findBackupNameSecretArguments()
{
const String & engine_name = function.name;
if (engine_name == "S3")
{
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
markSecretArgument(2);
}
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (arguments->size() <= arg_idx)
return false;
const auto * identifier = (*arguments)[arg_idx]->as<ASTIdentifier>();
return identifier != nullptr;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < arguments->size(); ++i)
{
const auto & argument = (*arguments)[i];
const auto * equals_func = argument->as<ASTFunction>();
if (!equals_func || (equals_func->name != "equals"))
continue;
const auto * expr_list = equals_func->arguments->as<ASTExpressionList>();
if (!expr_list)
continue;
const auto & equal_args = expr_list->children;
if (equal_args.size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(*equal_args[0], &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
}; };
} }

View File

@ -5,9 +5,11 @@
#include <Columns/ColumnArray.h> #include <Columns/ColumnArray.h>
#include <Common/BitHelpers.h> #include <Common/BitHelpers.h>
#include <Common/formatReadable.h> #include <Common/formatReadable.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Core/Field.h> #include <Core/Field.h>
#include <Core/ServerSettings.h>
#include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeArray.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
@ -29,7 +31,6 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int FORMAT_VERSION_TOO_OLD; extern const int FORMAT_VERSION_TOO_OLD;
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
extern const int INCORRECT_DATA; extern const int INCORRECT_DATA;
@ -131,8 +132,7 @@ void USearchIndexWithSerialization::deserialize(ReadBuffer & istr)
/// See the comment in MergeTreeIndexGranuleVectorSimilarity::deserializeBinary why we throw here /// See the comment in MergeTreeIndexGranuleVectorSimilarity::deserializeBinary why we throw here
throw Exception(ErrorCodes::INCORRECT_DATA, "Could not load vector similarity index. Please drop the index and create it again. Error: {}", String(result.error.release())); throw Exception(ErrorCodes::INCORRECT_DATA, "Could not load vector similarity index. Please drop the index and create it again. Error: {}", String(result.error.release()));
if (!try_reserve(limits())) try_reserve(limits());
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index");
} }
USearchIndexWithSerialization::Statistics USearchIndexWithSerialization::getStatistics() const USearchIndexWithSerialization::Statistics USearchIndexWithSerialization::getStatistics() const
@ -270,20 +270,49 @@ void updateImpl(const ColumnArray * column_array, const ColumnArray::Offsets & c
throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column with vector similarity index must have equal length"); throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column with vector similarity index must have equal length");
/// Reserving space is mandatory /// Reserving space is mandatory
if (!index->try_reserve(roundUpToPowerOfTwoOrZero(index->size() + rows))) size_t max_thread_pool_size = Context::getGlobalContextInstance()->getServerSettings().max_build_vector_similarity_index_thread_pool_size;
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for vector similarity index"); if (max_thread_pool_size == 0)
max_thread_pool_size = getNumberOfPhysicalCPUCores();
unum::usearch::index_limits_t limits(roundUpToPowerOfTwoOrZero(index->size() + rows), max_thread_pool_size);
index->reserve(limits);
for (size_t row = 0; row < rows; ++row) /// Vector index creation is slooooow. Add the new rows in parallel. The threadpool is global to avoid oversubscription when multiple
/// indexes are build simultaneously (e.g. multiple merges run at the same time).
auto & thread_pool = Context::getGlobalContextInstance()->getBuildVectorSimilarityIndexThreadPool();
auto add_vector_to_index = [&](USearchIndex::vector_key_t key, size_t row, ThreadGroupPtr thread_group)
{
SCOPE_EXIT_SAFE(
if (thread_group)
CurrentThread::detachFromGroupIfNotDetached();
);
if (thread_group)
CurrentThread::attachToGroupIfDetached(thread_group);
/// add is thread-safe
if (auto result = index->add(key, &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
{ {
if (auto result = index->add(static_cast<USearchIndex::vector_key_t>(index->size()), &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
throw Exception(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index. Error: {}", String(result.error.release())); throw Exception(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index. Error: {}", String(result.error.release()));
}
else else
{ {
ProfileEvents::increment(ProfileEvents::USearchAddCount); ProfileEvents::increment(ProfileEvents::USearchAddCount);
ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, result.visited_members); ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, result.visited_members);
ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, result.computed_distances); ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, result.computed_distances);
} }
};
size_t index_size = index->size();
for (size_t row = 0; row < rows; ++row)
{
auto key = static_cast<USearchIndex::vector_key_t>(index_size + row);
auto task = [group = CurrentThread::getGroup(), &add_vector_to_index, key, row] { add_vector_to_index(key, row, group); };
thread_pool.scheduleOrThrowOnError(task);
} }
thread_pool.wait();
} }
} }

View File

@ -1,6 +1,7 @@
#include <Storages/MergeTree/MergeTreeData.h> #include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreePartsMover.h> #include <Storages/MergeTree/MergeTreePartsMover.h>
#include <Storages/MergeTree/MergeTreeSettings.h> #include <Storages/MergeTree/MergeTreeSettings.h>
#include <Common/FailPoint.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <set> #include <set>
@ -15,6 +16,11 @@ namespace ErrorCodes
extern const int DIRECTORY_ALREADY_EXISTS; extern const int DIRECTORY_ALREADY_EXISTS;
} }
namespace FailPoints
{
extern const char stop_moving_part_before_swap_with_active[];
}
namespace namespace
{ {
@ -226,6 +232,7 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
cloned_part.temporary_directory_lock = data->getTemporaryPartDirectoryHolder(part->name); cloned_part.temporary_directory_lock = data->getTemporaryPartDirectoryHolder(part->name);
MutableDataPartStoragePtr cloned_part_storage; MutableDataPartStoragePtr cloned_part_storage;
bool preserve_blobs = false;
if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication) if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)
{ {
/// Try zero-copy replication and fallback to default copy if it's not possible /// Try zero-copy replication and fallback to default copy if it's not possible
@ -253,6 +260,7 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
if (zero_copy_part) if (zero_copy_part)
{ {
/// FIXME for some reason we cannot just use this part, we have to re-create it through MergeTreeDataPartBuilder /// FIXME for some reason we cannot just use this part, we have to re-create it through MergeTreeDataPartBuilder
preserve_blobs = true;
zero_copy_part->is_temp = false; /// Do not remove it in dtor zero_copy_part->is_temp = false; /// Do not remove it in dtor
cloned_part_storage = zero_copy_part->getDataPartStoragePtr(); cloned_part_storage = zero_copy_part->getDataPartStoragePtr();
} }
@ -272,7 +280,17 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
cloned_part.part = std::move(builder).withPartFormatFromDisk().build(); cloned_part.part = std::move(builder).withPartFormatFromDisk().build();
LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part.part->getDataPartStorage().getFullPath()); LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part.part->getDataPartStorage().getFullPath());
cloned_part.part->is_temp = data->allowRemoveStaleMovingParts(); cloned_part.part->is_temp = false;
if (data->allowRemoveStaleMovingParts())
{
cloned_part.part->is_temp = true;
/// Setting it in case connection to zookeeper is lost while moving
/// Otherwise part might be stuck in the moving directory due to the KEEPER_EXCEPTION in part's destructor
if (preserve_blobs)
cloned_part.part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::PRESERVE_BLOBS;
else
cloned_part.part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
}
cloned_part.part->loadColumnsChecksumsIndexes(true, true); cloned_part.part->loadColumnsChecksumsIndexes(true, true);
cloned_part.part->loadVersionMetadata(); cloned_part.part->loadVersionMetadata();
cloned_part.part->modification_time = cloned_part.part->getDataPartStorage().getLastModified().epochTime(); cloned_part.part->modification_time = cloned_part.part->getDataPartStorage().getLastModified().epochTime();
@ -282,6 +300,8 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
void MergeTreePartsMover::swapClonedPart(TemporaryClonedPart & cloned_part) const void MergeTreePartsMover::swapClonedPart(TemporaryClonedPart & cloned_part) const
{ {
/// Used to get some stuck parts in the moving directory by stopping moves while pause is active
FailPointInjection::pauseFailPoint(FailPoints::stop_moving_part_before_swap_with_active);
if (moves_blocker.isCancelled()) if (moves_blocker.isCancelled())
throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts."); throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts.");

View File

@ -42,6 +42,7 @@
<multi_read>1</multi_read> <multi_read>1</multi_read>
<check_not_exists>1</check_not_exists> <check_not_exists>1</check_not_exists>
<create_if_not_exists>1</create_if_not_exists> <create_if_not_exists>1</create_if_not_exists>
<remove_recursive>1</remove_recursive>
</feature_flags> </feature_flags>
</keeper_server> </keeper_server>
</clickhouse> </clickhouse>

View File

@ -64,6 +64,7 @@ function configure()
randomize_config_boolean_value multi_read keeper_port randomize_config_boolean_value multi_read keeper_port
randomize_config_boolean_value check_not_exists keeper_port randomize_config_boolean_value check_not_exists keeper_port
randomize_config_boolean_value create_if_not_exists keeper_port randomize_config_boolean_value create_if_not_exists keeper_port
randomize_config_boolean_value remove_recursive keeper_port
fi fi
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml

View File

@ -393,6 +393,7 @@ def test_table_functions():
f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')", f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '{azure_account_key}')", f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '{azure_account_key}')",
f"iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", f"iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
f"gcs('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
] ]
def make_test_case(i): def make_test_case(i):

View File

@ -0,0 +1,46 @@
<clickhouse>
<remote_servers>
<cluster>
<shard>
<replica>
<host>ch1</host>
<port>9000</port>
</replica>
</shard>
</cluster>
</remote_servers>
<macros>
<shard>01</shard>
</macros>
<storage_configuration>
<disks>
<s3>
<type>s3</type>
<endpoint>http://minio1:9001/root/data/</endpoint>
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
</s3>
</disks>
<policies>
<s3>
<volumes>
<default>
<disk>default</disk>
<perform_ttl_move_on_insert>False</perform_ttl_move_on_insert>
</default>
<s3>
<disk>s3</disk>
<perform_ttl_move_on_insert>False</perform_ttl_move_on_insert>
</s3>
</volumes>
<move_factor>0.0</move_factor>
</s3>
</policies>
</storage_configuration>
<merge_tree>
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
<storage_policy>s3</storage_policy>
</merge_tree>
<allow_remove_stale_moving_parts>true</allow_remove_stale_moving_parts>
</clickhouse>

View File

@ -0,0 +1,117 @@
from pathlib import Path
import time
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
ch1 = cluster.add_instance(
"ch1",
main_configs=[
"config.xml",
],
macros={"replica": "node1"},
with_zookeeper=True,
with_minio=True,
)
DATABASE_NAME = "stale_moving_parts"
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def q(node, query):
return node.query(database=DATABASE_NAME, sql=query)
# .../disks/s3/store/
def get_table_path(node, table):
return (
node.query(
sql=f"SELECT data_paths FROM system.tables WHERE table = '{table}' and database = '{DATABASE_NAME}' LIMIT 1"
)
.strip('"\n[]')
.split(",")[1]
.strip("'")
)
def exec(node, cmd, path):
return node.exec_in_container(
[
"bash",
"-c",
f"{cmd} {path}",
]
)
def wait_part_is_stuck(node, table_moving_path, moving_part):
num_tries = 5
while q(node, "SELECT part_name FROM system.moves").strip() != moving_part:
if num_tries == 0:
raise Exception("Part has not started to move")
num_tries -= 1
time.sleep(1)
num_tries = 5
while exec(node, "ls", table_moving_path).strip() != moving_part:
if num_tries == 0:
raise Exception("Part is not stuck in the moving directory")
num_tries -= 1
time.sleep(1)
def wait_zookeeper_node_to_start(zk_nodes, timeout=60):
start = time.time()
while time.time() - start < timeout:
try:
for instance in zk_nodes:
conn = cluster.get_kazoo_client(instance)
conn.get_children("/")
print("All instances of ZooKeeper started")
return
except Exception as ex:
print(("Can't connect to ZooKeeper " + str(ex)))
time.sleep(0.5)
def test_remove_stale_moving_parts_without_zookeeper(started_cluster):
ch1.query(f"CREATE DATABASE IF NOT EXISTS {DATABASE_NAME}")
q(
ch1,
"CREATE TABLE test_remove ON CLUSTER cluster ( id UInt32 ) ENGINE ReplicatedMergeTree() ORDER BY id;",
)
table_moving_path = Path(get_table_path(ch1, "test_remove")) / "moving"
q(ch1, "SYSTEM ENABLE FAILPOINT stop_moving_part_before_swap_with_active")
q(ch1, "INSERT INTO test_remove SELECT number FROM numbers(100);")
moving_part = "all_0_0_0"
move_response = ch1.get_query_request(
sql=f"ALTER TABLE test_remove MOVE PART '{moving_part}' TO DISK 's3'",
database=DATABASE_NAME,
)
wait_part_is_stuck(ch1, table_moving_path, moving_part)
cluster.stop_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
# Stop moves in case table is not read-only yet
q(ch1, "SYSTEM STOP MOVES")
q(ch1, "SYSTEM DISABLE FAILPOINT stop_moving_part_before_swap_with_active")
assert "Cancelled moving parts" in move_response.get_error()
assert exec(ch1, "ls", table_moving_path).strip() == ""
cluster.start_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
wait_zookeeper_node_to_start(["zoo1", "zoo2", "zoo3"])
q(ch1, "SYSTEM START MOVES")
q(ch1, f"DROP TABLE test_remove")

View File

@ -560,7 +560,6 @@ positionCaseInsensitive
positionCaseInsensitiveUTF8 positionCaseInsensitiveUTF8
positionUTF8 positionUTF8
pow pow
printf
proportionsZTest proportionsZTest
protocol protocol
queryID queryID

View File

@ -1,2 +1,2 @@
default 127.0.0.1 9181 0 0 0 1 1 ['FILTERED_LIST','MULTI_READ','CHECK_NOT_EXISTS','CREATE_IF_NOT_EXISTS'] default 127.0.0.1 9181 0 0 0 1 1 ['FILTERED_LIST','MULTI_READ','CHECK_NOT_EXISTS','CREATE_IF_NOT_EXISTS','REMOVE_RECURSIVE']
zookeeper2 localhost 9181 0 0 0 1 zookeeper2 localhost 9181 0 0 0 1