mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Compare commits
38 Commits
676b8e7048
...
90f3f0de05
Author | SHA1 | Date | |
---|---|---|---|
|
90f3f0de05 | ||
|
19e2197582 | ||
|
d793e06860 | ||
|
d223c4547f | ||
|
1986fb1418 | ||
|
58993d3f3b | ||
|
f36408a666 | ||
|
de85f5f251 | ||
|
85af661b9c | ||
|
b42c6491e4 | ||
|
1a4c7b7c61 | ||
|
14feba8443 | ||
|
4c4a051d5e | ||
|
a55cc03973 | ||
|
37411bf240 | ||
|
6f63a7b213 | ||
|
56cfa74a14 | ||
|
dbb1d043fe | ||
|
a461d20af9 | ||
|
b55d0b54ea | ||
|
418ef3f8bc | ||
|
b420bbf855 | ||
|
6a7cfd13f7 | ||
|
baf6aaef1d | ||
|
9ca149a487 | ||
|
042194e3f6 | ||
|
120e38c72a | ||
|
38b5ea9066 | ||
|
fe5e061fff | ||
|
f6b965872f | ||
|
22c3b71196 | ||
|
7425d4aa1a | ||
|
cf12e3924f | ||
|
cfc931160d | ||
|
b2c4b771d8 | ||
|
edf4e09fb2 | ||
|
07f44fdb89 | ||
|
2fcbe2465a |
34
.github/actions/debug/action.yml
vendored
34
.github/actions/debug/action.yml
vendored
@ -4,15 +4,31 @@ description: Prints workflow debug info
|
|||||||
runs:
|
runs:
|
||||||
using: "composite"
|
using: "composite"
|
||||||
steps:
|
steps:
|
||||||
- name: Print envs
|
- name: Envs, event.json and contexts
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
echo "::group::Envs"
|
echo '::group::Environment variables'
|
||||||
env
|
env | sort
|
||||||
echo "::endgroup::"
|
echo '::endgroup::'
|
||||||
- name: Print Event.json
|
|
||||||
shell: bash
|
echo '::group::event.json'
|
||||||
run: |
|
|
||||||
echo "::group::Event.json"
|
|
||||||
python3 -m json.tool "$GITHUB_EVENT_PATH"
|
python3 -m json.tool "$GITHUB_EVENT_PATH"
|
||||||
echo "::endgroup::"
|
echo '::endgroup::'
|
||||||
|
|
||||||
|
cat << 'EOF'
|
||||||
|
::group::github context
|
||||||
|
${{ toJSON(github) }}
|
||||||
|
::endgroup::
|
||||||
|
|
||||||
|
::group::env context
|
||||||
|
${{ toJSON(env) }}
|
||||||
|
::endgroup::
|
||||||
|
|
||||||
|
::group::runner context
|
||||||
|
${{ toJSON(runner) }}
|
||||||
|
::endgroup::
|
||||||
|
|
||||||
|
::group::job context
|
||||||
|
${{ toJSON(job) }}
|
||||||
|
::endgroup::
|
||||||
|
EOF
|
||||||
|
2
.github/workflows/backport_branches.yml
vendored
2
.github/workflows/backport_branches.yml
vendored
@ -27,6 +27,8 @@ jobs:
|
|||||||
clear-repository: true # to ensure correct digests
|
clear-repository: true # to ensure correct digests
|
||||||
fetch-depth: 0 # to get version
|
fetch-depth: 0 # to get version
|
||||||
filter: tree:0
|
filter: tree:0
|
||||||
|
- name: Debug Info
|
||||||
|
uses: ./.github/actions/debug
|
||||||
- name: Labels check
|
- name: Labels check
|
||||||
run: |
|
run: |
|
||||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||||
|
2
.github/workflows/cherry_pick.yml
vendored
2
.github/workflows/cherry_pick.yml
vendored
@ -33,6 +33,8 @@ jobs:
|
|||||||
clear-repository: true
|
clear-repository: true
|
||||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
- name: Debug Info
|
||||||
|
uses: ./.github/actions/debug
|
||||||
- name: Cherry pick
|
- name: Cherry pick
|
||||||
run: |
|
run: |
|
||||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||||
|
4
.github/workflows/create_release.yml
vendored
4
.github/workflows/create_release.yml
vendored
@ -56,13 +56,13 @@ jobs:
|
|||||||
GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
|
GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
|
||||||
runs-on: [self-hosted, release-maker]
|
runs-on: [self-hosted, release-maker]
|
||||||
steps:
|
steps:
|
||||||
- name: DebugInfo
|
|
||||||
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
|
|
||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: ClickHouse/checkout@v1
|
uses: ClickHouse/checkout@v1
|
||||||
with:
|
with:
|
||||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
- name: Debug Info
|
||||||
|
uses: ./.github/actions/debug
|
||||||
- name: Prepare Release Info
|
- name: Prepare Release Info
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
1
.github/workflows/docker_test_images.yml
vendored
1
.github/workflows/docker_test_images.yml
vendored
@ -11,6 +11,7 @@ name: Build docker images
|
|||||||
required: false
|
required: false
|
||||||
type: boolean
|
type: boolean
|
||||||
default: false
|
default: false
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
DockerBuildAarch64:
|
DockerBuildAarch64:
|
||||||
runs-on: [self-hosted, style-checker-aarch64]
|
runs-on: [self-hosted, style-checker-aarch64]
|
||||||
|
5
.github/workflows/jepsen.yml
vendored
5
.github/workflows/jepsen.yml
vendored
@ -8,20 +8,21 @@ on: # yamllint disable-line rule:truthy
|
|||||||
schedule:
|
schedule:
|
||||||
- cron: '0 */6 * * *'
|
- cron: '0 */6 * * *'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
RunConfig:
|
RunConfig:
|
||||||
runs-on: [self-hosted, style-checker-aarch64]
|
runs-on: [self-hosted, style-checker-aarch64]
|
||||||
outputs:
|
outputs:
|
||||||
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
||||||
steps:
|
steps:
|
||||||
- name: DebugInfo
|
|
||||||
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
|
|
||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: ClickHouse/checkout@v1
|
uses: ClickHouse/checkout@v1
|
||||||
with:
|
with:
|
||||||
clear-repository: true # to ensure correct digests
|
clear-repository: true # to ensure correct digests
|
||||||
fetch-depth: 0 # to get version
|
fetch-depth: 0 # to get version
|
||||||
filter: tree:0
|
filter: tree:0
|
||||||
|
- name: Debug Info
|
||||||
|
uses: ./.github/actions/debug
|
||||||
- name: PrepareRunConfig
|
- name: PrepareRunConfig
|
||||||
id: runconfig
|
id: runconfig
|
||||||
run: |
|
run: |
|
||||||
|
4
.github/workflows/master.yml
vendored
4
.github/workflows/master.yml
vendored
@ -15,14 +15,14 @@ jobs:
|
|||||||
outputs:
|
outputs:
|
||||||
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
||||||
steps:
|
steps:
|
||||||
- name: DebugInfo
|
|
||||||
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
|
|
||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: ClickHouse/checkout@v1
|
uses: ClickHouse/checkout@v1
|
||||||
with:
|
with:
|
||||||
clear-repository: true # to ensure correct digests
|
clear-repository: true # to ensure correct digests
|
||||||
fetch-depth: 0 # to get version
|
fetch-depth: 0 # to get version
|
||||||
filter: tree:0
|
filter: tree:0
|
||||||
|
- name: Debug Info
|
||||||
|
uses: ./.github/actions/debug
|
||||||
- name: Merge sync PR
|
- name: Merge sync PR
|
||||||
run: |
|
run: |
|
||||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||||
|
4
.github/workflows/merge_queue.yml
vendored
4
.github/workflows/merge_queue.yml
vendored
@ -14,14 +14,14 @@ jobs:
|
|||||||
outputs:
|
outputs:
|
||||||
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
||||||
steps:
|
steps:
|
||||||
- name: DebugInfo
|
|
||||||
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
|
|
||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: ClickHouse/checkout@v1
|
uses: ClickHouse/checkout@v1
|
||||||
with:
|
with:
|
||||||
clear-repository: true # to ensure correct digests
|
clear-repository: true # to ensure correct digests
|
||||||
fetch-depth: 0 # to get a version
|
fetch-depth: 0 # to get a version
|
||||||
filter: tree:0
|
filter: tree:0
|
||||||
|
- name: Debug Info
|
||||||
|
uses: ./.github/actions/debug
|
||||||
- name: Cancel PR workflow
|
- name: Cancel PR workflow
|
||||||
run: |
|
run: |
|
||||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
|
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
|
||||||
|
4
.github/workflows/nightly.yml
vendored
4
.github/workflows/nightly.yml
vendored
@ -15,14 +15,14 @@ jobs:
|
|||||||
outputs:
|
outputs:
|
||||||
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
||||||
steps:
|
steps:
|
||||||
- name: DebugInfo
|
|
||||||
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
|
|
||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: ClickHouse/checkout@v1
|
uses: ClickHouse/checkout@v1
|
||||||
with:
|
with:
|
||||||
clear-repository: true # to ensure correct digests
|
clear-repository: true # to ensure correct digests
|
||||||
fetch-depth: 0 # to get version
|
fetch-depth: 0 # to get version
|
||||||
filter: tree:0
|
filter: tree:0
|
||||||
|
- name: Debug Info
|
||||||
|
uses: ./.github/actions/debug
|
||||||
- name: PrepareRunConfig
|
- name: PrepareRunConfig
|
||||||
id: runconfig
|
id: runconfig
|
||||||
run: |
|
run: |
|
||||||
|
4
.github/workflows/pull_request.yml
vendored
4
.github/workflows/pull_request.yml
vendored
@ -25,14 +25,14 @@ jobs:
|
|||||||
outputs:
|
outputs:
|
||||||
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
||||||
steps:
|
steps:
|
||||||
- name: DebugInfo
|
|
||||||
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
|
|
||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: ClickHouse/checkout@v1
|
uses: ClickHouse/checkout@v1
|
||||||
with:
|
with:
|
||||||
clear-repository: true # to ensure correct digests
|
clear-repository: true # to ensure correct digests
|
||||||
fetch-depth: 0 # to get a version
|
fetch-depth: 0 # to get a version
|
||||||
filter: tree:0
|
filter: tree:0
|
||||||
|
- name: Debug Info
|
||||||
|
uses: ./.github/actions/debug
|
||||||
- name: Cancel previous Sync PR workflow
|
- name: Cancel previous Sync PR workflow
|
||||||
run: |
|
run: |
|
||||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
|
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
|
||||||
|
2
.github/workflows/release_branches.yml
vendored
2
.github/workflows/release_branches.yml
vendored
@ -24,6 +24,8 @@ jobs:
|
|||||||
clear-repository: true # to ensure correct digests
|
clear-repository: true # to ensure correct digests
|
||||||
fetch-depth: 0 # to get version
|
fetch-depth: 0 # to get version
|
||||||
filter: tree:0
|
filter: tree:0
|
||||||
|
- name: Debug Info
|
||||||
|
uses: ./.github/actions/debug
|
||||||
- name: Labels check
|
- name: Labels check
|
||||||
run: |
|
run: |
|
||||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||||
|
4
.github/workflows/reusable_simple_job.yml
vendored
4
.github/workflows/reusable_simple_job.yml
vendored
@ -62,8 +62,6 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}
|
GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}
|
||||||
steps:
|
steps:
|
||||||
- name: DebugInfo
|
|
||||||
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
|
|
||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: ClickHouse/checkout@v1
|
uses: ClickHouse/checkout@v1
|
||||||
with:
|
with:
|
||||||
@ -72,6 +70,8 @@ jobs:
|
|||||||
submodules: ${{inputs.submodules}}
|
submodules: ${{inputs.submodules}}
|
||||||
fetch-depth: ${{inputs.checkout_depth}}
|
fetch-depth: ${{inputs.checkout_depth}}
|
||||||
filter: tree:0
|
filter: tree:0
|
||||||
|
- name: Debug Info
|
||||||
|
uses: ./.github/actions/debug
|
||||||
- name: Set build envs
|
- name: Set build envs
|
||||||
run: |
|
run: |
|
||||||
cat >> "$GITHUB_ENV" << 'EOF'
|
cat >> "$GITHUB_ENV" << 'EOF'
|
||||||
|
@ -13,16 +13,17 @@ Here is a complete list of available database engines. Follow the links for more
|
|||||||
|
|
||||||
- [Atomic](../../engines/database-engines/atomic.md)
|
- [Atomic](../../engines/database-engines/atomic.md)
|
||||||
|
|
||||||
- [MySQL](../../engines/database-engines/mysql.md)
|
- [Lazy](../../engines/database-engines/lazy.md)
|
||||||
|
|
||||||
|
- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md)
|
||||||
|
|
||||||
- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md)
|
- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md)
|
||||||
|
|
||||||
- [Lazy](../../engines/database-engines/lazy.md)
|
- [MySQL](../../engines/database-engines/mysql.md)
|
||||||
|
|
||||||
- [PostgreSQL](../../engines/database-engines/postgresql.md)
|
- [PostgreSQL](../../engines/database-engines/postgresql.md)
|
||||||
|
|
||||||
- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md)
|
|
||||||
|
|
||||||
- [Replicated](../../engines/database-engines/replicated.md)
|
- [Replicated](../../engines/database-engines/replicated.md)
|
||||||
|
|
||||||
- [SQLite](../../engines/database-engines/sqlite.md)
|
- [SQLite](../../engines/database-engines/sqlite.md)
|
||||||
|
|
||||||
|
@ -107,6 +107,10 @@ The vector similarity index currently does not work with per-table, non-default
|
|||||||
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
Vector index creation is known to be slow. To speed the process up, index creation can be parallelized. The maximum number of threads can be
|
||||||
|
configured using server configuration
|
||||||
|
setting [max_build_vector_similarity_index_thread_pool_size](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters_max_build_vector_similarity_index_thread_pool_size).
|
||||||
|
|
||||||
ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
|
ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
|
||||||
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
|
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
|
||||||
requests.
|
requests.
|
||||||
|
@ -491,6 +491,14 @@ Type: Double
|
|||||||
|
|
||||||
Default: 0.9
|
Default: 0.9
|
||||||
|
|
||||||
|
## max_build_vector_similarity_index_thread_pool_size {#server_configuration_parameters_max_build_vector_similarity_index_thread_pool_size}
|
||||||
|
|
||||||
|
The maximum number of threads to use for building vector indexes. 0 means all cores.
|
||||||
|
|
||||||
|
Type: UInt64
|
||||||
|
|
||||||
|
Default: 16
|
||||||
|
|
||||||
## cgroups_memory_usage_observer_wait_time
|
## cgroups_memory_usage_observer_wait_time
|
||||||
|
|
||||||
Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see
|
Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see
|
||||||
|
@ -3,370 +3,89 @@
|
|||||||
#include <Parsers/FunctionSecretArgumentsFinder.h>
|
#include <Parsers/FunctionSecretArgumentsFinder.h>
|
||||||
#include <Analyzer/ConstantNode.h>
|
#include <Analyzer/ConstantNode.h>
|
||||||
#include <Analyzer/FunctionNode.h>
|
#include <Analyzer/FunctionNode.h>
|
||||||
#include <Analyzer/IQueryTreeNode.h>
|
|
||||||
#include <Analyzer/IdentifierNode.h>
|
#include <Analyzer/IdentifierNode.h>
|
||||||
#include <Analyzer/ListNode.h>
|
|
||||||
#include <Common/KnownObjectNames.h>
|
|
||||||
#include <Core/QualifiedTableName.h>
|
|
||||||
|
|
||||||
#include <boost/algorithm/string/predicate.hpp>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
class FunctionTreeNode : public AbstractFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
class ArgumentTreeNode : public Argument
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ArgumentTreeNode(const IQueryTreeNode * argument_) : argument(argument_) {}
|
||||||
|
std::unique_ptr<AbstractFunction> getFunction() const override
|
||||||
|
{
|
||||||
|
if (const auto * f = argument->as<FunctionNode>())
|
||||||
|
return std::make_unique<FunctionTreeNode>(*f);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
bool isIdentifier() const override { return argument->as<IdentifierNode>(); }
|
||||||
|
bool tryGetString(String * res, bool allow_identifier) const override
|
||||||
|
{
|
||||||
|
if (const auto * literal = argument->as<ConstantNode>())
|
||||||
|
{
|
||||||
|
if (literal->getValue().getType() != Field::Types::String)
|
||||||
|
return false;
|
||||||
|
if (res)
|
||||||
|
*res = literal->getValue().safeGet<String>();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allow_identifier)
|
||||||
|
{
|
||||||
|
if (const auto * id = argument->as<IdentifierNode>())
|
||||||
|
{
|
||||||
|
if (res)
|
||||||
|
*res = id->getIdentifier().getFullName();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
const IQueryTreeNode * argument = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ArgumentsTreeNode : public Arguments
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ArgumentsTreeNode(const QueryTreeNodes * arguments_) : arguments(arguments_) {}
|
||||||
|
size_t size() const override { return arguments ? arguments->size() : 0; }
|
||||||
|
std::unique_ptr<Argument> at(size_t n) const override { return std::make_unique<ArgumentTreeNode>(arguments->at(n).get()); }
|
||||||
|
private:
|
||||||
|
const QueryTreeNodes * arguments = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit FunctionTreeNode(const FunctionNode & function_) : function(&function_)
|
||||||
|
{
|
||||||
|
if (const auto & nodes = function->getArguments().getNodes(); !nodes.empty())
|
||||||
|
arguments = std::make_unique<ArgumentsTreeNode>(&nodes);
|
||||||
|
}
|
||||||
|
String name() const override { return function->getFunctionName(); }
|
||||||
|
private:
|
||||||
|
const FunctionNode * function = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
|
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
|
||||||
/// That involves passwords and secret keys.
|
/// That involves passwords and secret keys.
|
||||||
class FunctionSecretArgumentsFinderTreeNode
|
class FunctionSecretArgumentsFinderTreeNode : public FunctionSecretArgumentsFinder
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_) : function(function_), arguments(function.getArguments())
|
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_)
|
||||||
|
: FunctionSecretArgumentsFinder(std::make_unique<FunctionTreeNode>(function_))
|
||||||
{
|
{
|
||||||
if (arguments.getNodes().empty())
|
if (!function->hasArguments())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
findFunctionSecretArguments();
|
findOrdinaryFunctionSecretArguments();
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Result
|
|
||||||
{
|
|
||||||
/// Result constructed by default means no arguments will be hidden.
|
|
||||||
size_t start = static_cast<size_t>(-1);
|
|
||||||
size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
|
|
||||||
/// In all known cases secret arguments are consecutive
|
|
||||||
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
|
|
||||||
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
|
|
||||||
std::vector<std::string> nested_maps;
|
|
||||||
|
|
||||||
bool hasSecrets() const
|
|
||||||
{
|
|
||||||
return count != 0 || !nested_maps.empty();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
|
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
|
||||||
|
|
||||||
private:
|
|
||||||
const FunctionNode & function;
|
|
||||||
const ListNode & arguments;
|
|
||||||
FunctionSecretArgumentsFinder::Result result;
|
|
||||||
|
|
||||||
void markSecretArgument(size_t index, bool argument_is_named = false)
|
|
||||||
{
|
|
||||||
if (index >= arguments.getNodes().size())
|
|
||||||
return;
|
|
||||||
if (!result.count)
|
|
||||||
{
|
|
||||||
result.start = index;
|
|
||||||
result.are_named = argument_is_named;
|
|
||||||
}
|
|
||||||
chassert(index >= result.start); /// We always check arguments consecutively
|
|
||||||
result.count = index + 1 - result.start;
|
|
||||||
if (!argument_is_named)
|
|
||||||
result.are_named = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void findFunctionSecretArguments()
|
|
||||||
{
|
|
||||||
const auto & name = function.getFunctionName();
|
|
||||||
|
|
||||||
if ((name == "mysql") || (name == "postgresql") || (name == "mongodb"))
|
|
||||||
{
|
|
||||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
|
||||||
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
|
|
||||||
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
|
|
||||||
findMySQLFunctionSecretArguments();
|
|
||||||
}
|
|
||||||
else if ((name == "s3") || (name == "cosn") || (name == "oss") ||
|
|
||||||
(name == "deltaLake") || (name == "hudi") || (name == "iceberg"))
|
|
||||||
{
|
|
||||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
|
||||||
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
|
|
||||||
}
|
|
||||||
else if (name == "s3Cluster")
|
|
||||||
{
|
|
||||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
|
||||||
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
|
|
||||||
}
|
|
||||||
else if ((name == "remote") || (name == "remoteSecure"))
|
|
||||||
{
|
|
||||||
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
|
|
||||||
findRemoteFunctionSecretArguments();
|
|
||||||
}
|
|
||||||
else if ((name == "encrypt") || (name == "decrypt") ||
|
|
||||||
(name == "aes_encrypt_mysql") || (name == "aes_decrypt_mysql") ||
|
|
||||||
(name == "tryDecrypt"))
|
|
||||||
{
|
|
||||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
|
||||||
findEncryptionFunctionSecretArguments();
|
|
||||||
}
|
|
||||||
else if (name == "url")
|
|
||||||
{
|
|
||||||
findURLSecretArguments();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void findMySQLFunctionSecretArguments()
|
|
||||||
{
|
|
||||||
if (isNamedCollectionName(0))
|
|
||||||
{
|
|
||||||
/// mysql(named_collection, ..., password = 'password', ...)
|
|
||||||
findSecretNamedArgument("password", 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
|
||||||
markSecretArgument(4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
|
|
||||||
/// always be at the end). Marks "headers" as secret, if found.
|
|
||||||
size_t excludeS3OrURLNestedMaps()
|
|
||||||
{
|
|
||||||
const auto & nodes = arguments.getNodes();
|
|
||||||
size_t count = nodes.size();
|
|
||||||
while (count > 0)
|
|
||||||
{
|
|
||||||
const FunctionNode * f = nodes.at(count - 1)->as<FunctionNode>();
|
|
||||||
if (!f)
|
|
||||||
break;
|
|
||||||
if (f->getFunctionName() == "headers")
|
|
||||||
result.nested_maps.push_back(f->getFunctionName());
|
|
||||||
else if (f->getFunctionName() != "extra_credentials")
|
|
||||||
break;
|
|
||||||
count -= 1;
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
void findS3FunctionSecretArguments(bool is_cluster_function)
|
|
||||||
{
|
|
||||||
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
|
||||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
|
||||||
|
|
||||||
if (!is_cluster_function && isNamedCollectionName(0))
|
|
||||||
{
|
|
||||||
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
|
|
||||||
findSecretNamedArgument("secret_access_key", 1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
|
||||||
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
|
||||||
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
|
||||||
size_t count = excludeS3OrURLNestedMaps();
|
|
||||||
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
|
|
||||||
{
|
|
||||||
String second_arg;
|
|
||||||
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
|
|
||||||
{
|
|
||||||
if (boost::iequals(second_arg, "NOSIGN"))
|
|
||||||
return; /// The argument after 'url' is "NOSIGN".
|
|
||||||
|
|
||||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
|
||||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
|
||||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
|
||||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
|
||||||
if (url_arg_idx + 2 < count)
|
|
||||||
markSecretArgument(url_arg_idx + 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
void findURLSecretArguments()
|
|
||||||
{
|
|
||||||
if (!isNamedCollectionName(0))
|
|
||||||
excludeS3OrURLNestedMaps();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
|
|
||||||
{
|
|
||||||
if (arg_idx >= arguments.getNodes().size())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return tryGetStringFromArgument(arguments.getNodes()[arg_idx], res, allow_identifier);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool tryGetStringFromArgument(const QueryTreeNodePtr argument, String * res, bool allow_identifier = true)
|
|
||||||
{
|
|
||||||
if (const auto * literal = argument->as<ConstantNode>())
|
|
||||||
{
|
|
||||||
if (literal->getValue().getType() != Field::Types::String)
|
|
||||||
return false;
|
|
||||||
if (res)
|
|
||||||
*res = literal->getValue().safeGet<String>();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (allow_identifier)
|
|
||||||
{
|
|
||||||
if (const auto * id = argument->as<IdentifierNode>())
|
|
||||||
{
|
|
||||||
if (res)
|
|
||||||
*res = id->getIdentifier().getFullName();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void findRemoteFunctionSecretArguments()
|
|
||||||
{
|
|
||||||
if (isNamedCollectionName(0))
|
|
||||||
{
|
|
||||||
/// remote(named_collection, ..., password = 'password', ...)
|
|
||||||
findSecretNamedArgument("password", 1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
|
|
||||||
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
|
|
||||||
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
|
|
||||||
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
|
|
||||||
|
|
||||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
|
||||||
/// remote('addresses_expr', db.table)
|
|
||||||
if (arguments.getNodes().size() < 3)
|
|
||||||
return;
|
|
||||||
|
|
||||||
size_t arg_num = 1;
|
|
||||||
|
|
||||||
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
|
|
||||||
const auto * table_function = arguments.getNodes()[arg_num]->as<FunctionNode>();
|
|
||||||
if (table_function && KnownTableFunctionNames::instance().exists(table_function->getFunctionName()))
|
|
||||||
{
|
|
||||||
++arg_num;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::optional<String> database;
|
|
||||||
std::optional<QualifiedTableName> qualified_table_name;
|
|
||||||
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
|
|
||||||
{
|
|
||||||
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
|
|
||||||
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
|
|
||||||
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
|
|
||||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
|
||||||
/// before wiping it (because the `password` argument is always a literal string).
|
|
||||||
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
|
|
||||||
{
|
|
||||||
/// Wipe either `password` or `user`.
|
|
||||||
markSecretArgument(arg_num + 2);
|
|
||||||
}
|
|
||||||
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
|
|
||||||
{
|
|
||||||
/// Wipe either `password` or `sharding_key`.
|
|
||||||
markSecretArgument(arg_num + 3);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Skip the current argument (which is either a database name or a qualified table name).
|
|
||||||
++arg_num;
|
|
||||||
if (database)
|
|
||||||
{
|
|
||||||
/// Skip the 'table' argument if the previous argument was a database name.
|
|
||||||
++arg_num;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Skip username.
|
|
||||||
++arg_num;
|
|
||||||
|
|
||||||
/// Do our replacement:
|
|
||||||
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
|
|
||||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
|
||||||
/// before wiping it (because the `password` argument is always a literal string).
|
|
||||||
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
|
|
||||||
if (can_be_password)
|
|
||||||
markSecretArgument(arg_num);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tries to get either a database name or a qualified table name from an argument.
|
|
||||||
/// Empty string is also allowed (it means the default database).
|
|
||||||
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
|
|
||||||
bool tryGetDatabaseNameOrQualifiedTableName(
|
|
||||||
size_t arg_idx,
|
|
||||||
std::optional<String> & res_database,
|
|
||||||
std::optional<QualifiedTableName> & res_qualified_table_name) const
|
|
||||||
{
|
|
||||||
res_database.reset();
|
|
||||||
res_qualified_table_name.reset();
|
|
||||||
|
|
||||||
String str;
|
|
||||||
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (str.empty())
|
|
||||||
{
|
|
||||||
res_database = "";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
|
|
||||||
if (!qualified_table_name)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (qualified_table_name->database.empty())
|
|
||||||
res_database = std::move(qualified_table_name->table);
|
|
||||||
else
|
|
||||||
res_qualified_table_name = std::move(qualified_table_name);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void findEncryptionFunctionSecretArguments()
|
|
||||||
{
|
|
||||||
if (arguments.getNodes().empty())
|
|
||||||
return;
|
|
||||||
|
|
||||||
/// We replace all arguments after 'mode' with '[HIDDEN]':
|
|
||||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
|
|
||||||
result.start = 1;
|
|
||||||
result.count = arguments.getNodes().size() - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Whether a specified argument can be the name of a named collection?
|
|
||||||
bool isNamedCollectionName(size_t arg_idx) const
|
|
||||||
{
|
|
||||||
if (arguments.getNodes().size() <= arg_idx)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
const auto * identifier = arguments.getNodes()[arg_idx]->as<IdentifierNode>();
|
|
||||||
return identifier != nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
|
|
||||||
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
|
|
||||||
{
|
|
||||||
for (size_t i = start; i < arguments.getNodes().size(); ++i)
|
|
||||||
{
|
|
||||||
const auto & argument = arguments.getNodes()[i];
|
|
||||||
const auto * equals_func = argument->as<FunctionNode>();
|
|
||||||
if (!equals_func || (equals_func->getFunctionName() != "equals"))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const auto * expr_list = equals_func->getArguments().as<ListNode>();
|
|
||||||
if (!expr_list)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const auto & equal_args = expr_list->getNodes();
|
|
||||||
if (equal_args.size() != 2)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
String found_key;
|
|
||||||
if (!tryGetStringFromArgument(equal_args[0], &found_key))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (found_key == key)
|
|
||||||
markSecretArgument(i, /* argument_is_named= */ true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -178,6 +178,9 @@
|
|||||||
M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
|
M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
|
||||||
M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
|
M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
|
||||||
M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \
|
M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \
|
||||||
|
M(BuildVectorSimilarityIndexThreads, "Number of threads in the build vector similarity index thread pool.") \
|
||||||
|
M(BuildVectorSimilarityIndexThreadsActive, "Number of threads in the build vector similarity index thread pool running a task.") \
|
||||||
|
M(BuildVectorSimilarityIndexThreadsScheduled, "Number of queued or active jobs in the build vector similarity index thread pool.") \
|
||||||
\
|
\
|
||||||
M(DiskPlainRewritableAzureDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for AzureObjectStorage.") \
|
M(DiskPlainRewritableAzureDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for AzureObjectStorage.") \
|
||||||
M(DiskPlainRewritableLocalDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for LocalObjectStorage.") \
|
M(DiskPlainRewritableLocalDirectoryMapSize, "Number of local-to-remote path entries in the 'plain_rewritable' in-memory map for LocalObjectStorage.") \
|
||||||
|
@ -63,6 +63,7 @@ static struct InitFiu
|
|||||||
REGULAR(keepermap_fail_drop_data) \
|
REGULAR(keepermap_fail_drop_data) \
|
||||||
REGULAR(lazy_pipe_fds_fail_close) \
|
REGULAR(lazy_pipe_fds_fail_close) \
|
||||||
PAUSEABLE(infinite_sleep) \
|
PAUSEABLE(infinite_sleep) \
|
||||||
|
PAUSEABLE(stop_moving_part_before_swap_with_active) \
|
||||||
|
|
||||||
|
|
||||||
namespace FailPoints
|
namespace FailPoints
|
||||||
|
@ -50,7 +50,7 @@ namespace DB
|
|||||||
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
|
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
|
||||||
M(String, default_database, "default", "Default database name.", 0) \
|
M(String, default_database, "default", "Default database name.", 0) \
|
||||||
M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
|
M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
|
||||||
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \
|
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting.", 0) \
|
||||||
M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
|
M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
|
||||||
M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
|
M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
|
||||||
M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \
|
M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \
|
||||||
@ -65,6 +65,7 @@ namespace DB
|
|||||||
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
|
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
|
||||||
M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
|
M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
|
||||||
M(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \
|
M(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \
|
||||||
|
M(UInt64, max_build_vector_similarity_index_thread_pool_size, 16, "The maximum number of threads to use to build vector similarity indexes. 0 means all cores.", 0) \
|
||||||
\
|
\
|
||||||
/* Database Catalog */ \
|
/* Database Catalog */ \
|
||||||
M(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \
|
M(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \
|
||||||
|
@ -50,13 +50,6 @@ private:
|
|||||||
return executeNonconstant(input);
|
return executeNonconstant(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
[[maybe_unused]] String toString() const
|
|
||||||
{
|
|
||||||
WriteBufferFromOwnString buf;
|
|
||||||
buf << "format:" << format << ", rows:" << rows << ", is_literal:" << is_literal << ", input:" << input.dumpStructure() << "\n";
|
|
||||||
return buf.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ColumnWithTypeAndName executeLiteral(std::string_view literal) const
|
ColumnWithTypeAndName executeLiteral(std::string_view literal) const
|
||||||
{
|
{
|
||||||
@ -231,9 +224,7 @@ public:
|
|||||||
const auto & instruction = instructions[i];
|
const auto & instruction = instructions[i];
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
// std::cout << "instruction[" << i << "]:" << instructions[i].toString() << std::endl;
|
|
||||||
concat_args[i] = instruction.execute();
|
concat_args[i] = instruction.execute();
|
||||||
// std::cout << "concat_args[" << i << "]:" << concat_args[i].dumpStructure() << std::endl;
|
|
||||||
}
|
}
|
||||||
catch (const fmt::v9::format_error & e)
|
catch (const fmt::v9::format_error & e)
|
||||||
{
|
{
|
||||||
@ -358,7 +349,14 @@ private:
|
|||||||
|
|
||||||
REGISTER_FUNCTION(Printf)
|
REGISTER_FUNCTION(Printf)
|
||||||
{
|
{
|
||||||
factory.registerFunction<FunctionPrintf>();
|
factory.registerFunction<FunctionPrintf>(
|
||||||
|
FunctionDocumentation{.description=R"(
|
||||||
|
The `printf` function formats the given string with the values (strings, integers, floating-points etc.) listed in the arguments, similar to printf function in C++.
|
||||||
|
The format string can contain format specifiers starting with `%` character.
|
||||||
|
Anything not contained in `%` and the following format specifier is considered literal text and copied verbatim into the output.
|
||||||
|
Literal `%` character can be escaped by `%%`.)", .examples{{"sum", "select printf('%%%s %s %d', 'Hello', 'World', 2024);", "%Hello World 2024"}}, .categories{"String"}
|
||||||
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include <Common/SensitiveDataMasker.h>
|
#include <Common/SensitiveDataMasker.h>
|
||||||
#include <Common/Macros.h>
|
#include <Common/Macros.h>
|
||||||
#include <Common/EventNotifier.h>
|
#include <Common/EventNotifier.h>
|
||||||
|
#include <Common/getNumberOfPhysicalCPUCores.h>
|
||||||
#include <Common/Stopwatch.h>
|
#include <Common/Stopwatch.h>
|
||||||
#include <Common/formatReadable.h>
|
#include <Common/formatReadable.h>
|
||||||
#include <Common/Throttler.h>
|
#include <Common/Throttler.h>
|
||||||
@ -121,7 +122,6 @@
|
|||||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||||
#include <base/defines.h>
|
#include <base/defines.h>
|
||||||
|
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
namespace ProfileEvents
|
namespace ProfileEvents
|
||||||
@ -164,6 +164,9 @@ namespace CurrentMetrics
|
|||||||
extern const Metric TablesLoaderForegroundThreadsActive;
|
extern const Metric TablesLoaderForegroundThreadsActive;
|
||||||
extern const Metric TablesLoaderForegroundThreadsScheduled;
|
extern const Metric TablesLoaderForegroundThreadsScheduled;
|
||||||
extern const Metric IOWriterThreadsScheduled;
|
extern const Metric IOWriterThreadsScheduled;
|
||||||
|
extern const Metric BuildVectorSimilarityIndexThreads;
|
||||||
|
extern const Metric BuildVectorSimilarityIndexThreadsActive;
|
||||||
|
extern const Metric BuildVectorSimilarityIndexThreadsScheduled;
|
||||||
extern const Metric AttachedTable;
|
extern const Metric AttachedTable;
|
||||||
extern const Metric AttachedView;
|
extern const Metric AttachedView;
|
||||||
extern const Metric AttachedDictionary;
|
extern const Metric AttachedDictionary;
|
||||||
@ -297,6 +300,8 @@ struct ContextSharedPart : boost::noncopyable
|
|||||||
mutable std::unique_ptr<ThreadPool> load_marks_threadpool; /// Threadpool for loading marks cache.
|
mutable std::unique_ptr<ThreadPool> load_marks_threadpool; /// Threadpool for loading marks cache.
|
||||||
mutable OnceFlag prefetch_threadpool_initialized;
|
mutable OnceFlag prefetch_threadpool_initialized;
|
||||||
mutable std::unique_ptr<ThreadPool> prefetch_threadpool; /// Threadpool for loading marks cache.
|
mutable std::unique_ptr<ThreadPool> prefetch_threadpool; /// Threadpool for loading marks cache.
|
||||||
|
mutable OnceFlag build_vector_similarity_index_threadpool_initialized;
|
||||||
|
mutable std::unique_ptr<ThreadPool> build_vector_similarity_index_threadpool; /// Threadpool for vector-similarity index creation.
|
||||||
mutable UncompressedCachePtr index_uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks for MergeTree indices.
|
mutable UncompressedCachePtr index_uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks for MergeTree indices.
|
||||||
mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex); /// Cache of query results.
|
mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex); /// Cache of query results.
|
||||||
mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices.
|
mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices.
|
||||||
@ -3297,6 +3302,21 @@ size_t Context::getPrefetchThreadpoolSize() const
|
|||||||
return config.getUInt(".prefetch_threadpool_pool_size", 100);
|
return config.getUInt(".prefetch_threadpool_pool_size", 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ThreadPool & Context::getBuildVectorSimilarityIndexThreadPool() const
|
||||||
|
{
|
||||||
|
callOnce(shared->build_vector_similarity_index_threadpool_initialized, [&] {
|
||||||
|
size_t pool_size = shared->server_settings.max_build_vector_similarity_index_thread_pool_size > 0
|
||||||
|
? shared->server_settings.max_build_vector_similarity_index_thread_pool_size
|
||||||
|
: getNumberOfPhysicalCPUCores();
|
||||||
|
shared->build_vector_similarity_index_threadpool = std::make_unique<ThreadPool>(
|
||||||
|
CurrentMetrics::BuildVectorSimilarityIndexThreads,
|
||||||
|
CurrentMetrics::BuildVectorSimilarityIndexThreadsActive,
|
||||||
|
CurrentMetrics::BuildVectorSimilarityIndexThreadsScheduled,
|
||||||
|
pool_size);
|
||||||
|
});
|
||||||
|
return *shared->build_vector_similarity_index_threadpool;
|
||||||
|
}
|
||||||
|
|
||||||
BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
|
BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
|
||||||
{
|
{
|
||||||
callOnce(shared->buffer_flush_schedule_pool_initialized, [&] {
|
callOnce(shared->buffer_flush_schedule_pool_initialized, [&] {
|
||||||
|
@ -1097,6 +1097,8 @@ public:
|
|||||||
/// and make a prefetch by putting a read task to threadpoolReader.
|
/// and make a prefetch by putting a read task to threadpoolReader.
|
||||||
size_t getPrefetchThreadpoolSize() const;
|
size_t getPrefetchThreadpoolSize() const;
|
||||||
|
|
||||||
|
ThreadPool & getBuildVectorSimilarityIndexThreadPool() const;
|
||||||
|
|
||||||
/// Settings for MergeTree background tasks stored in config.xml
|
/// Settings for MergeTree background tasks stored in config.xml
|
||||||
BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const;
|
BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const;
|
||||||
BackgroundTaskSchedulingSettings getBackgroundMoveTaskSchedulingSettings() const;
|
BackgroundTaskSchedulingSettings getBackgroundMoveTaskSchedulingSettings() const;
|
||||||
|
@ -1,10 +1,42 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <vector>
|
#include <Common/KnownObjectNames.h>
|
||||||
|
#include <Core/QualifiedTableName.h>
|
||||||
|
#include <base/defines.h>
|
||||||
|
#include <boost/algorithm/string/predicate.hpp>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
class AbstractFunction
|
||||||
|
{
|
||||||
|
friend class FunctionSecretArgumentsFinder;
|
||||||
|
public:
|
||||||
|
class Argument
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~Argument() = default;
|
||||||
|
virtual std::unique_ptr<AbstractFunction> getFunction() const = 0;
|
||||||
|
virtual bool isIdentifier() const = 0;
|
||||||
|
virtual bool tryGetString(String * res, bool allow_identifier) const = 0;
|
||||||
|
};
|
||||||
|
class Arguments
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~Arguments() = default;
|
||||||
|
virtual size_t size() const = 0;
|
||||||
|
virtual std::unique_ptr<Argument> at(size_t n) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual ~AbstractFunction() = default;
|
||||||
|
virtual String name() const = 0;
|
||||||
|
bool hasArguments() const { return !!arguments; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::unique_ptr<Arguments> arguments;
|
||||||
|
};
|
||||||
|
|
||||||
class FunctionSecretArgumentsFinder
|
class FunctionSecretArgumentsFinder
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -23,6 +55,485 @@ public:
|
|||||||
return count != 0 || !nested_maps.empty();
|
return count != 0 || !nested_maps.empty();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
explicit FunctionSecretArgumentsFinder(std::unique_ptr<AbstractFunction> && function_) : function(std::move(function_)) {}
|
||||||
|
|
||||||
|
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const std::unique_ptr<AbstractFunction> function;
|
||||||
|
Result result;
|
||||||
|
|
||||||
|
void markSecretArgument(size_t index, bool argument_is_named = false)
|
||||||
|
{
|
||||||
|
if (index >= function->arguments->size())
|
||||||
|
return;
|
||||||
|
if (!result.count)
|
||||||
|
{
|
||||||
|
result.start = index;
|
||||||
|
result.are_named = argument_is_named;
|
||||||
|
}
|
||||||
|
chassert(index >= result.start); /// We always check arguments consecutively
|
||||||
|
result.count = index + 1 - result.start;
|
||||||
|
if (!argument_is_named)
|
||||||
|
result.are_named = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void findOrdinaryFunctionSecretArguments()
|
||||||
|
{
|
||||||
|
if ((function->name() == "mysql") || (function->name() == "postgresql") || (function->name() == "mongodb"))
|
||||||
|
{
|
||||||
|
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||||
|
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||||
|
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||||
|
findMySQLFunctionSecretArguments();
|
||||||
|
}
|
||||||
|
else if ((function->name() == "s3") || (function->name() == "cosn") || (function->name() == "oss") ||
|
||||||
|
(function->name() == "deltaLake") || (function->name() == "hudi") || (function->name() == "iceberg") ||
|
||||||
|
(function->name() == "gcs"))
|
||||||
|
{
|
||||||
|
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||||
|
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
|
||||||
|
}
|
||||||
|
else if (function->name() == "s3Cluster")
|
||||||
|
{
|
||||||
|
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||||
|
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
|
||||||
|
}
|
||||||
|
else if (function->name() == "azureBlobStorage")
|
||||||
|
{
|
||||||
|
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
||||||
|
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
|
||||||
|
}
|
||||||
|
else if (function->name() == "azureBlobStorageCluster")
|
||||||
|
{
|
||||||
|
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||||
|
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
|
||||||
|
}
|
||||||
|
else if ((function->name() == "remote") || (function->name() == "remoteSecure"))
|
||||||
|
{
|
||||||
|
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
|
||||||
|
findRemoteFunctionSecretArguments();
|
||||||
|
}
|
||||||
|
else if ((function->name() == "encrypt") || (function->name() == "decrypt") ||
|
||||||
|
(function->name() == "aes_encrypt_mysql") || (function->name() == "aes_decrypt_mysql") ||
|
||||||
|
(function->name() == "tryDecrypt"))
|
||||||
|
{
|
||||||
|
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||||
|
findEncryptionFunctionSecretArguments();
|
||||||
|
}
|
||||||
|
else if (function->name() == "url")
|
||||||
|
{
|
||||||
|
findURLSecretArguments();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void findMySQLFunctionSecretArguments()
|
||||||
|
{
|
||||||
|
if (isNamedCollectionName(0))
|
||||||
|
{
|
||||||
|
/// mysql(named_collection, ..., password = 'password', ...)
|
||||||
|
findSecretNamedArgument("password", 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||||
|
markSecretArgument(4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
|
||||||
|
/// always be at the end). Marks "headers" as secret, if found.
|
||||||
|
size_t excludeS3OrURLNestedMaps()
|
||||||
|
{
|
||||||
|
size_t count = function->arguments->size();
|
||||||
|
while (count > 0)
|
||||||
|
{
|
||||||
|
const auto f = function->arguments->at(count - 1)->getFunction();
|
||||||
|
if (!f)
|
||||||
|
break;
|
||||||
|
if (f->name() == "headers")
|
||||||
|
result.nested_maps.push_back(f->name());
|
||||||
|
else if (f->name() != "extra_credentials")
|
||||||
|
break;
|
||||||
|
count -= 1;
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
void findS3FunctionSecretArguments(bool is_cluster_function)
|
||||||
|
{
|
||||||
|
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
||||||
|
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||||
|
|
||||||
|
if (!is_cluster_function && isNamedCollectionName(0))
|
||||||
|
{
|
||||||
|
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
|
||||||
|
findSecretNamedArgument("secret_access_key", 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||||
|
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||||
|
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||||
|
size_t count = excludeS3OrURLNestedMaps();
|
||||||
|
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
|
||||||
|
{
|
||||||
|
String second_arg;
|
||||||
|
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
|
||||||
|
{
|
||||||
|
if (boost::iequals(second_arg, "NOSIGN"))
|
||||||
|
return; /// The argument after 'url' is "NOSIGN".
|
||||||
|
|
||||||
|
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||||
|
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||||
|
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||||
|
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||||
|
if (url_arg_idx + 2 < count)
|
||||||
|
markSecretArgument(url_arg_idx + 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
|
||||||
|
{
|
||||||
|
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
|
||||||
|
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||||
|
|
||||||
|
if (!is_cluster_function && isNamedCollectionName(0))
|
||||||
|
{
|
||||||
|
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
|
||||||
|
findSecretNamedArgument("account_key", 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (is_cluster_function && isNamedCollectionName(1))
|
||||||
|
{
|
||||||
|
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
|
||||||
|
findSecretNamedArgument("account_key", 2);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
|
||||||
|
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
||||||
|
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||||
|
size_t count = function->arguments->size();
|
||||||
|
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
|
||||||
|
{
|
||||||
|
String second_arg;
|
||||||
|
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
|
||||||
|
{
|
||||||
|
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||||
|
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
|
||||||
|
if (url_arg_idx + 4 < count)
|
||||||
|
markSecretArgument(url_arg_idx + 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
void findURLSecretArguments()
|
||||||
|
{
|
||||||
|
if (!isNamedCollectionName(0))
|
||||||
|
excludeS3OrURLNestedMaps();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
|
||||||
|
{
|
||||||
|
if (arg_idx >= function->arguments->size())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return tryGetStringFromArgument(*function->arguments->at(arg_idx), res, allow_identifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool tryGetStringFromArgument(const AbstractFunction::Argument & argument, String * res, bool allow_identifier = true)
|
||||||
|
{
|
||||||
|
return argument.tryGetString(res, allow_identifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
void findRemoteFunctionSecretArguments()
|
||||||
|
{
|
||||||
|
if (isNamedCollectionName(0))
|
||||||
|
{
|
||||||
|
/// remote(named_collection, ..., password = 'password', ...)
|
||||||
|
findSecretNamedArgument("password", 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
|
||||||
|
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
|
||||||
|
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
|
||||||
|
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
|
||||||
|
|
||||||
|
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
||||||
|
/// remote('addresses_expr', db.table)
|
||||||
|
if (function->arguments->size() < 3)
|
||||||
|
return;
|
||||||
|
|
||||||
|
size_t arg_num = 1;
|
||||||
|
|
||||||
|
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
|
||||||
|
auto table_function = function->arguments->at(arg_num)->getFunction();
|
||||||
|
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name()))
|
||||||
|
{
|
||||||
|
++arg_num;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::optional<String> database;
|
||||||
|
std::optional<QualifiedTableName> qualified_table_name;
|
||||||
|
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
|
||||||
|
{
|
||||||
|
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
|
||||||
|
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
|
||||||
|
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
|
||||||
|
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||||
|
/// before wiping it (because the `password` argument is always a literal string).
|
||||||
|
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
|
||||||
|
{
|
||||||
|
/// Wipe either `password` or `user`.
|
||||||
|
markSecretArgument(arg_num + 2);
|
||||||
|
}
|
||||||
|
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
|
||||||
|
{
|
||||||
|
/// Wipe either `password` or `sharding_key`.
|
||||||
|
markSecretArgument(arg_num + 3);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Skip the current argument (which is either a database name or a qualified table name).
|
||||||
|
++arg_num;
|
||||||
|
if (database)
|
||||||
|
{
|
||||||
|
/// Skip the 'table' argument if the previous argument was a database name.
|
||||||
|
++arg_num;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Skip username.
|
||||||
|
++arg_num;
|
||||||
|
|
||||||
|
/// Do our replacement:
|
||||||
|
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
|
||||||
|
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||||
|
/// before wiping it (because the `password` argument is always a literal string).
|
||||||
|
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
|
||||||
|
if (can_be_password)
|
||||||
|
markSecretArgument(arg_num);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tries to get either a database name or a qualified table name from an argument.
|
||||||
|
/// Empty string is also allowed (it means the default database).
|
||||||
|
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
|
||||||
|
bool tryGetDatabaseNameOrQualifiedTableName(
|
||||||
|
size_t arg_idx,
|
||||||
|
std::optional<String> & res_database,
|
||||||
|
std::optional<QualifiedTableName> & res_qualified_table_name) const
|
||||||
|
{
|
||||||
|
res_database.reset();
|
||||||
|
res_qualified_table_name.reset();
|
||||||
|
|
||||||
|
String str;
|
||||||
|
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (str.empty())
|
||||||
|
{
|
||||||
|
res_database = "";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
|
||||||
|
if (!qualified_table_name)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (qualified_table_name->database.empty())
|
||||||
|
res_database = std::move(qualified_table_name->table);
|
||||||
|
else
|
||||||
|
res_qualified_table_name = std::move(qualified_table_name);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void findEncryptionFunctionSecretArguments()
|
||||||
|
{
|
||||||
|
if (function->arguments->size() == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/// We replace all arguments after 'mode' with '[HIDDEN]':
|
||||||
|
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
|
||||||
|
result.start = 1;
|
||||||
|
result.count = function->arguments->size() - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void findTableEngineSecretArguments()
|
||||||
|
{
|
||||||
|
const String & engine_name = function->name();
|
||||||
|
if (engine_name == "ExternalDistributed")
|
||||||
|
{
|
||||||
|
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||||
|
findExternalDistributedTableEngineSecretArguments();
|
||||||
|
}
|
||||||
|
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
|
||||||
|
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
|
||||||
|
{
|
||||||
|
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||||
|
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||||
|
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||||
|
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||||
|
findMySQLFunctionSecretArguments();
|
||||||
|
}
|
||||||
|
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
|
||||||
|
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
|
||||||
|
{
|
||||||
|
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
|
||||||
|
findS3TableEngineSecretArguments();
|
||||||
|
}
|
||||||
|
else if (engine_name == "URL")
|
||||||
|
{
|
||||||
|
findURLSecretArguments();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void findExternalDistributedTableEngineSecretArguments()
|
||||||
|
{
|
||||||
|
if (isNamedCollectionName(1))
|
||||||
|
{
|
||||||
|
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
|
||||||
|
findSecretNamedArgument("password", 2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||||
|
markSecretArgument(5);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void findS3TableEngineSecretArguments()
|
||||||
|
{
|
||||||
|
if (isNamedCollectionName(0))
|
||||||
|
{
|
||||||
|
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
|
||||||
|
findSecretNamedArgument("secret_access_key", 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||||
|
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||||
|
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
|
||||||
|
size_t count = excludeS3OrURLNestedMaps();
|
||||||
|
if ((3 <= count) && (count <= 4))
|
||||||
|
{
|
||||||
|
String second_arg;
|
||||||
|
if (tryGetStringFromArgument(1, &second_arg))
|
||||||
|
{
|
||||||
|
if (boost::iequals(second_arg, "NOSIGN"))
|
||||||
|
return; /// The argument after 'url' is "NOSIGN".
|
||||||
|
|
||||||
|
if (count == 3)
|
||||||
|
{
|
||||||
|
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||||
|
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||||
|
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
|
||||||
|
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
|
||||||
|
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||||
|
if (2 < count)
|
||||||
|
markSecretArgument(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void findDatabaseEngineSecretArguments()
|
||||||
|
{
|
||||||
|
const String & engine_name = function->name();
|
||||||
|
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
|
||||||
|
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
|
||||||
|
(engine_name == "MaterializedPostgreSQL"))
|
||||||
|
{
|
||||||
|
/// MySQL('host:port', 'database', 'user', 'password')
|
||||||
|
/// PostgreSQL('host:port', 'database', 'user', 'password')
|
||||||
|
findMySQLDatabaseSecretArguments();
|
||||||
|
}
|
||||||
|
else if (engine_name == "S3")
|
||||||
|
{
|
||||||
|
/// S3('url', 'access_key_id', 'secret_access_key')
|
||||||
|
findS3DatabaseSecretArguments();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void findMySQLDatabaseSecretArguments()
|
||||||
|
{
|
||||||
|
if (isNamedCollectionName(0))
|
||||||
|
{
|
||||||
|
/// MySQL(named_collection, ..., password = 'password', ...)
|
||||||
|
findSecretNamedArgument("password", 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// MySQL('host:port', 'database', 'user', 'password')
|
||||||
|
markSecretArgument(3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void findS3DatabaseSecretArguments()
|
||||||
|
{
|
||||||
|
if (isNamedCollectionName(0))
|
||||||
|
{
|
||||||
|
/// S3(named_collection, ..., secret_access_key = 'password', ...)
|
||||||
|
findSecretNamedArgument("secret_access_key", 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// S3('url', 'access_key_id', 'secret_access_key')
|
||||||
|
markSecretArgument(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void findBackupNameSecretArguments()
|
||||||
|
{
|
||||||
|
const String & engine_name = function->name();
|
||||||
|
if (engine_name == "S3")
|
||||||
|
{
|
||||||
|
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
|
||||||
|
markSecretArgument(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether a specified argument can be the name of a named collection?
|
||||||
|
bool isNamedCollectionName(size_t arg_idx) const
|
||||||
|
{
|
||||||
|
if (function->arguments->size() <= arg_idx)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return function->arguments->at(arg_idx)->isIdentifier();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
|
||||||
|
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
|
||||||
|
{
|
||||||
|
for (size_t i = start; i < function->arguments->size(); ++i)
|
||||||
|
{
|
||||||
|
const auto & argument = function->arguments->at(i);
|
||||||
|
const auto equals_func = argument->getFunction();
|
||||||
|
if (!equals_func || (equals_func->name() != "equals"))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!equals_func->arguments || equals_func->arguments->size() != 2)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
String found_key;
|
||||||
|
if (!tryGetStringFromArgument(*equals_func->arguments->at(0), &found_key))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (found_key == key)
|
||||||
|
markSecretArgument(i, /* argument_is_named= */ true);
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,35 +1,97 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Parsers/FunctionSecretArgumentsFinder.h>
|
#include <Parsers/FunctionSecretArgumentsFinder.h>
|
||||||
#include <Core/QualifiedTableName.h>
|
|
||||||
#include <Parsers/ASTFunction.h>
|
#include <Parsers/ASTFunction.h>
|
||||||
#include <Parsers/ASTLiteral.h>
|
#include <Parsers/ASTLiteral.h>
|
||||||
#include <Parsers/ASTIdentifier.h>
|
#include <Parsers/ASTIdentifier.h>
|
||||||
#include <Common/KnownObjectNames.h>
|
|
||||||
|
|
||||||
#include <boost/algorithm/string/predicate.hpp>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
class FunctionAST : public AbstractFunction
|
||||||
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
|
|
||||||
/// That involves passwords and secret keys.
|
|
||||||
class FunctionSecretArgumentsFinderAST
|
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_) : function(function_)
|
class ArgumentAST : public Argument
|
||||||
{
|
{
|
||||||
if (!function.arguments)
|
public:
|
||||||
|
explicit ArgumentAST(const IAST * argument_) : argument(argument_) {}
|
||||||
|
std::unique_ptr<AbstractFunction> getFunction() const override
|
||||||
|
{
|
||||||
|
if (const auto * f = argument->as<ASTFunction>())
|
||||||
|
return std::make_unique<FunctionAST>(*f);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
bool isIdentifier() const override { return argument->as<ASTIdentifier>(); }
|
||||||
|
bool tryGetString(String * res, bool allow_identifier) const override
|
||||||
|
{
|
||||||
|
if (const auto * literal = argument->as<ASTLiteral>())
|
||||||
|
{
|
||||||
|
if (literal->value.getType() != Field::Types::String)
|
||||||
|
return false;
|
||||||
|
if (res)
|
||||||
|
*res = literal->value.safeGet<String>();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allow_identifier)
|
||||||
|
{
|
||||||
|
if (const auto * id = argument->as<ASTIdentifier>())
|
||||||
|
{
|
||||||
|
if (res)
|
||||||
|
*res = id->name();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
const IAST * argument = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ArgumentsAST : public Arguments
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ArgumentsAST(const ASTs * arguments_) : arguments(arguments_) {}
|
||||||
|
size_t size() const override { return arguments ? arguments->size() : 0; }
|
||||||
|
std::unique_ptr<Argument> at(size_t n) const override
|
||||||
|
{
|
||||||
|
return std::make_unique<ArgumentAST>(arguments->at(n).get());
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
const ASTs * arguments = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit FunctionAST(const ASTFunction & function_) : function(&function_)
|
||||||
|
{
|
||||||
|
if (!function->arguments)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const auto * expr_list = function.arguments->as<ASTExpressionList>();
|
const auto * expr_list = function->arguments->as<ASTExpressionList>();
|
||||||
if (!expr_list)
|
if (!expr_list)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
arguments = &expr_list->children;
|
arguments = std::make_unique<ArgumentsAST>(&expr_list->children);
|
||||||
switch (function.kind)
|
}
|
||||||
|
|
||||||
|
String name() const override { return function->name; }
|
||||||
|
private:
|
||||||
|
const ASTFunction * function = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
|
||||||
|
/// That involves passwords and secret keys.
|
||||||
|
class FunctionSecretArgumentsFinderAST : public FunctionSecretArgumentsFinder
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_)
|
||||||
|
: FunctionSecretArgumentsFinder(std::make_unique<FunctionAST>(function_))
|
||||||
|
{
|
||||||
|
if (!function->hasArguments())
|
||||||
|
return;
|
||||||
|
|
||||||
|
switch (function_.kind)
|
||||||
{
|
{
|
||||||
case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break;
|
case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break;
|
||||||
case ASTFunction::Kind::WINDOW_FUNCTION: break;
|
case ASTFunction::Kind::WINDOW_FUNCTION: break;
|
||||||
@ -43,506 +105,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
|
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
|
||||||
|
|
||||||
private:
|
|
||||||
const ASTFunction & function;
|
|
||||||
const ASTs * arguments = nullptr;
|
|
||||||
FunctionSecretArgumentsFinder::Result result;
|
|
||||||
|
|
||||||
void markSecretArgument(size_t index, bool argument_is_named = false)
|
|
||||||
{
|
|
||||||
if (index >= arguments->size())
|
|
||||||
return;
|
|
||||||
if (!result.count)
|
|
||||||
{
|
|
||||||
result.start = index;
|
|
||||||
result.are_named = argument_is_named;
|
|
||||||
}
|
|
||||||
chassert(index >= result.start); /// We always check arguments consecutively
|
|
||||||
result.count = index + 1 - result.start;
|
|
||||||
if (!argument_is_named)
|
|
||||||
result.are_named = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void findOrdinaryFunctionSecretArguments()
|
|
||||||
{
|
|
||||||
if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb"))
|
|
||||||
{
|
|
||||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
|
||||||
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
|
|
||||||
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
|
|
||||||
findMySQLFunctionSecretArguments();
|
|
||||||
}
|
|
||||||
else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") ||
|
|
||||||
(function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg"))
|
|
||||||
{
|
|
||||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
|
||||||
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
|
|
||||||
}
|
|
||||||
else if (function.name == "s3Cluster")
|
|
||||||
{
|
|
||||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
|
||||||
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
|
|
||||||
}
|
|
||||||
else if (function.name == "azureBlobStorage")
|
|
||||||
{
|
|
||||||
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
|
||||||
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
|
|
||||||
}
|
|
||||||
else if (function.name == "azureBlobStorageCluster")
|
|
||||||
{
|
|
||||||
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
|
||||||
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
|
|
||||||
}
|
|
||||||
else if ((function.name == "remote") || (function.name == "remoteSecure"))
|
|
||||||
{
|
|
||||||
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
|
|
||||||
findRemoteFunctionSecretArguments();
|
|
||||||
}
|
|
||||||
else if ((function.name == "encrypt") || (function.name == "decrypt") ||
|
|
||||||
(function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") ||
|
|
||||||
(function.name == "tryDecrypt"))
|
|
||||||
{
|
|
||||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
|
||||||
findEncryptionFunctionSecretArguments();
|
|
||||||
}
|
|
||||||
else if (function.name == "url")
|
|
||||||
{
|
|
||||||
findURLSecretArguments();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void findMySQLFunctionSecretArguments()
|
|
||||||
{
|
|
||||||
if (isNamedCollectionName(0))
|
|
||||||
{
|
|
||||||
/// mysql(named_collection, ..., password = 'password', ...)
|
|
||||||
findSecretNamedArgument("password", 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
|
||||||
markSecretArgument(4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
|
|
||||||
/// always be at the end). Marks "headers" as secret, if found.
|
|
||||||
size_t excludeS3OrURLNestedMaps()
|
|
||||||
{
|
|
||||||
size_t count = arguments->size();
|
|
||||||
while (count > 0)
|
|
||||||
{
|
|
||||||
const ASTFunction * f = arguments->at(count - 1)->as<ASTFunction>();
|
|
||||||
if (!f)
|
|
||||||
break;
|
|
||||||
if (f->name == "headers")
|
|
||||||
result.nested_maps.push_back(f->name);
|
|
||||||
else if (f->name != "extra_credentials")
|
|
||||||
break;
|
|
||||||
count -= 1;
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
void findS3FunctionSecretArguments(bool is_cluster_function)
|
|
||||||
{
|
|
||||||
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
|
||||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
|
||||||
|
|
||||||
if (!is_cluster_function && isNamedCollectionName(0))
|
|
||||||
{
|
|
||||||
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
|
|
||||||
findSecretNamedArgument("secret_access_key", 1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
|
||||||
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
|
||||||
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
|
||||||
size_t count = excludeS3OrURLNestedMaps();
|
|
||||||
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
|
|
||||||
{
|
|
||||||
String second_arg;
|
|
||||||
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
|
|
||||||
{
|
|
||||||
if (boost::iequals(second_arg, "NOSIGN"))
|
|
||||||
return; /// The argument after 'url' is "NOSIGN".
|
|
||||||
|
|
||||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
|
||||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
|
||||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
|
||||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
|
||||||
if (url_arg_idx + 2 < count)
|
|
||||||
markSecretArgument(url_arg_idx + 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
|
|
||||||
{
|
|
||||||
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
|
|
||||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
|
||||||
|
|
||||||
if (!is_cluster_function && isNamedCollectionName(0))
|
|
||||||
{
|
|
||||||
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
|
|
||||||
findSecretNamedArgument("account_key", 1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else if (is_cluster_function && isNamedCollectionName(1))
|
|
||||||
{
|
|
||||||
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
|
|
||||||
findSecretNamedArgument("account_key", 2);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
|
|
||||||
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
|
||||||
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
|
||||||
size_t count = arguments->size();
|
|
||||||
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
|
|
||||||
{
|
|
||||||
String second_arg;
|
|
||||||
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
|
|
||||||
{
|
|
||||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
|
||||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
|
|
||||||
if (url_arg_idx + 4 < count)
|
|
||||||
markSecretArgument(url_arg_idx + 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
void findURLSecretArguments()
|
|
||||||
{
|
|
||||||
if (!isNamedCollectionName(0))
|
|
||||||
excludeS3OrURLNestedMaps();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
|
|
||||||
{
|
|
||||||
if (arg_idx >= arguments->size())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true)
|
|
||||||
{
|
|
||||||
if (const auto * literal = argument.as<ASTLiteral>())
|
|
||||||
{
|
|
||||||
if (literal->value.getType() != Field::Types::String)
|
|
||||||
return false;
|
|
||||||
if (res)
|
|
||||||
*res = literal->value.safeGet<String>();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (allow_identifier)
|
|
||||||
{
|
|
||||||
if (const auto * id = argument.as<ASTIdentifier>())
|
|
||||||
{
|
|
||||||
if (res)
|
|
||||||
*res = id->name();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void findRemoteFunctionSecretArguments()
|
|
||||||
{
|
|
||||||
if (isNamedCollectionName(0))
|
|
||||||
{
|
|
||||||
/// remote(named_collection, ..., password = 'password', ...)
|
|
||||||
findSecretNamedArgument("password", 1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
|
|
||||||
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
|
|
||||||
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
|
|
||||||
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
|
|
||||||
|
|
||||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
|
||||||
/// remote('addresses_expr', db.table)
|
|
||||||
if (arguments->size() < 3)
|
|
||||||
return;
|
|
||||||
|
|
||||||
size_t arg_num = 1;
|
|
||||||
|
|
||||||
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
|
|
||||||
const auto * table_function = (*arguments)[arg_num]->as<ASTFunction>();
|
|
||||||
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name))
|
|
||||||
{
|
|
||||||
++arg_num;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::optional<String> database;
|
|
||||||
std::optional<QualifiedTableName> qualified_table_name;
|
|
||||||
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
|
|
||||||
{
|
|
||||||
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
|
|
||||||
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
|
|
||||||
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
|
|
||||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
|
||||||
/// before wiping it (because the `password` argument is always a literal string).
|
|
||||||
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
|
|
||||||
{
|
|
||||||
/// Wipe either `password` or `user`.
|
|
||||||
markSecretArgument(arg_num + 2);
|
|
||||||
}
|
|
||||||
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
|
|
||||||
{
|
|
||||||
/// Wipe either `password` or `sharding_key`.
|
|
||||||
markSecretArgument(arg_num + 3);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Skip the current argument (which is either a database name or a qualified table name).
|
|
||||||
++arg_num;
|
|
||||||
if (database)
|
|
||||||
{
|
|
||||||
/// Skip the 'table' argument if the previous argument was a database name.
|
|
||||||
++arg_num;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Skip username.
|
|
||||||
++arg_num;
|
|
||||||
|
|
||||||
/// Do our replacement:
|
|
||||||
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
|
|
||||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
|
||||||
/// before wiping it (because the `password` argument is always a literal string).
|
|
||||||
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
|
|
||||||
if (can_be_password)
|
|
||||||
markSecretArgument(arg_num);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tries to get either a database name or a qualified table name from an argument.
|
|
||||||
/// Empty string is also allowed (it means the default database).
|
|
||||||
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
|
|
||||||
bool tryGetDatabaseNameOrQualifiedTableName(
|
|
||||||
size_t arg_idx,
|
|
||||||
std::optional<String> & res_database,
|
|
||||||
std::optional<QualifiedTableName> & res_qualified_table_name) const
|
|
||||||
{
|
|
||||||
res_database.reset();
|
|
||||||
res_qualified_table_name.reset();
|
|
||||||
|
|
||||||
String str;
|
|
||||||
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (str.empty())
|
|
||||||
{
|
|
||||||
res_database = "";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
|
|
||||||
if (!qualified_table_name)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (qualified_table_name->database.empty())
|
|
||||||
res_database = std::move(qualified_table_name->table);
|
|
||||||
else
|
|
||||||
res_qualified_table_name = std::move(qualified_table_name);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void findEncryptionFunctionSecretArguments()
|
|
||||||
{
|
|
||||||
if (arguments->empty())
|
|
||||||
return;
|
|
||||||
|
|
||||||
/// We replace all arguments after 'mode' with '[HIDDEN]':
|
|
||||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
|
|
||||||
result.start = 1;
|
|
||||||
result.count = arguments->size() - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void findTableEngineSecretArguments()
|
|
||||||
{
|
|
||||||
const String & engine_name = function.name;
|
|
||||||
if (engine_name == "ExternalDistributed")
|
|
||||||
{
|
|
||||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
|
||||||
findExternalDistributedTableEngineSecretArguments();
|
|
||||||
}
|
|
||||||
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
|
|
||||||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
|
|
||||||
{
|
|
||||||
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
|
|
||||||
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
|
||||||
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
|
||||||
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
|
|
||||||
findMySQLFunctionSecretArguments();
|
|
||||||
}
|
|
||||||
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
|
|
||||||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
|
|
||||||
{
|
|
||||||
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
|
|
||||||
findS3TableEngineSecretArguments();
|
|
||||||
}
|
|
||||||
else if (engine_name == "URL")
|
|
||||||
{
|
|
||||||
findURLSecretArguments();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void findExternalDistributedTableEngineSecretArguments()
|
|
||||||
{
|
|
||||||
if (isNamedCollectionName(1))
|
|
||||||
{
|
|
||||||
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
|
|
||||||
findSecretNamedArgument("password", 2);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
|
||||||
markSecretArgument(5);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void findS3TableEngineSecretArguments()
|
|
||||||
{
|
|
||||||
if (isNamedCollectionName(0))
|
|
||||||
{
|
|
||||||
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
|
|
||||||
findSecretNamedArgument("secret_access_key", 1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
|
||||||
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
|
||||||
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
|
|
||||||
size_t count = excludeS3OrURLNestedMaps();
|
|
||||||
if ((3 <= count) && (count <= 4))
|
|
||||||
{
|
|
||||||
String second_arg;
|
|
||||||
if (tryGetStringFromArgument(1, &second_arg))
|
|
||||||
{
|
|
||||||
if (boost::iequals(second_arg, "NOSIGN"))
|
|
||||||
return; /// The argument after 'url' is "NOSIGN".
|
|
||||||
|
|
||||||
if (count == 3)
|
|
||||||
{
|
|
||||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
|
||||||
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
|
||||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
|
|
||||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
|
|
||||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
|
||||||
if (2 < count)
|
|
||||||
markSecretArgument(2);
|
|
||||||
}
|
|
||||||
|
|
||||||
void findDatabaseEngineSecretArguments()
|
|
||||||
{
|
|
||||||
const String & engine_name = function.name;
|
|
||||||
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
|
|
||||||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
|
|
||||||
(engine_name == "MaterializedPostgreSQL"))
|
|
||||||
{
|
|
||||||
/// MySQL('host:port', 'database', 'user', 'password')
|
|
||||||
/// PostgreSQL('host:port', 'database', 'user', 'password')
|
|
||||||
findMySQLDatabaseSecretArguments();
|
|
||||||
}
|
|
||||||
else if (engine_name == "S3")
|
|
||||||
{
|
|
||||||
/// S3('url', 'access_key_id', 'secret_access_key')
|
|
||||||
findS3DatabaseSecretArguments();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void findMySQLDatabaseSecretArguments()
|
|
||||||
{
|
|
||||||
if (isNamedCollectionName(0))
|
|
||||||
{
|
|
||||||
/// MySQL(named_collection, ..., password = 'password', ...)
|
|
||||||
findSecretNamedArgument("password", 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/// MySQL('host:port', 'database', 'user', 'password')
|
|
||||||
markSecretArgument(3);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void findS3DatabaseSecretArguments()
|
|
||||||
{
|
|
||||||
if (isNamedCollectionName(0))
|
|
||||||
{
|
|
||||||
/// S3(named_collection, ..., secret_access_key = 'password', ...)
|
|
||||||
findSecretNamedArgument("secret_access_key", 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/// S3('url', 'access_key_id', 'secret_access_key')
|
|
||||||
markSecretArgument(2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void findBackupNameSecretArguments()
|
|
||||||
{
|
|
||||||
const String & engine_name = function.name;
|
|
||||||
if (engine_name == "S3")
|
|
||||||
{
|
|
||||||
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
|
|
||||||
markSecretArgument(2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether a specified argument can be the name of a named collection?
|
|
||||||
bool isNamedCollectionName(size_t arg_idx) const
|
|
||||||
{
|
|
||||||
if (arguments->size() <= arg_idx)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
const auto * identifier = (*arguments)[arg_idx]->as<ASTIdentifier>();
|
|
||||||
return identifier != nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
|
|
||||||
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
|
|
||||||
{
|
|
||||||
for (size_t i = start; i < arguments->size(); ++i)
|
|
||||||
{
|
|
||||||
const auto & argument = (*arguments)[i];
|
|
||||||
const auto * equals_func = argument->as<ASTFunction>();
|
|
||||||
if (!equals_func || (equals_func->name != "equals"))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const auto * expr_list = equals_func->arguments->as<ASTExpressionList>();
|
|
||||||
if (!expr_list)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const auto & equal_args = expr_list->children;
|
|
||||||
if (equal_args.size() != 2)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
String found_key;
|
|
||||||
if (!tryGetStringFromArgument(*equal_args[0], &found_key))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (found_key == key)
|
|
||||||
markSecretArgument(i, /* argument_is_named= */ true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -5,9 +5,11 @@
|
|||||||
#include <Columns/ColumnArray.h>
|
#include <Columns/ColumnArray.h>
|
||||||
#include <Common/BitHelpers.h>
|
#include <Common/BitHelpers.h>
|
||||||
#include <Common/formatReadable.h>
|
#include <Common/formatReadable.h>
|
||||||
|
#include <Common/getNumberOfPhysicalCPUCores.h>
|
||||||
#include <Common/logger_useful.h>
|
#include <Common/logger_useful.h>
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
#include <Core/Field.h>
|
#include <Core/Field.h>
|
||||||
|
#include <Core/ServerSettings.h>
|
||||||
#include <DataTypes/DataTypeArray.h>
|
#include <DataTypes/DataTypeArray.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
@ -29,7 +31,6 @@ namespace DB
|
|||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
|
||||||
extern const int FORMAT_VERSION_TOO_OLD;
|
extern const int FORMAT_VERSION_TOO_OLD;
|
||||||
extern const int ILLEGAL_COLUMN;
|
extern const int ILLEGAL_COLUMN;
|
||||||
extern const int INCORRECT_DATA;
|
extern const int INCORRECT_DATA;
|
||||||
@ -131,8 +132,7 @@ void USearchIndexWithSerialization::deserialize(ReadBuffer & istr)
|
|||||||
/// See the comment in MergeTreeIndexGranuleVectorSimilarity::deserializeBinary why we throw here
|
/// See the comment in MergeTreeIndexGranuleVectorSimilarity::deserializeBinary why we throw here
|
||||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Could not load vector similarity index. Please drop the index and create it again. Error: {}", String(result.error.release()));
|
throw Exception(ErrorCodes::INCORRECT_DATA, "Could not load vector similarity index. Please drop the index and create it again. Error: {}", String(result.error.release()));
|
||||||
|
|
||||||
if (!try_reserve(limits()))
|
try_reserve(limits());
|
||||||
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
USearchIndexWithSerialization::Statistics USearchIndexWithSerialization::getStatistics() const
|
USearchIndexWithSerialization::Statistics USearchIndexWithSerialization::getStatistics() const
|
||||||
@ -270,20 +270,49 @@ void updateImpl(const ColumnArray * column_array, const ColumnArray::Offsets & c
|
|||||||
throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column with vector similarity index must have equal length");
|
throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column with vector similarity index must have equal length");
|
||||||
|
|
||||||
/// Reserving space is mandatory
|
/// Reserving space is mandatory
|
||||||
if (!index->try_reserve(roundUpToPowerOfTwoOrZero(index->size() + rows)))
|
size_t max_thread_pool_size = Context::getGlobalContextInstance()->getServerSettings().max_build_vector_similarity_index_thread_pool_size;
|
||||||
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for vector similarity index");
|
if (max_thread_pool_size == 0)
|
||||||
|
max_thread_pool_size = getNumberOfPhysicalCPUCores();
|
||||||
|
unum::usearch::index_limits_t limits(roundUpToPowerOfTwoOrZero(index->size() + rows), max_thread_pool_size);
|
||||||
|
index->reserve(limits);
|
||||||
|
|
||||||
for (size_t row = 0; row < rows; ++row)
|
/// Vector index creation is slooooow. Add the new rows in parallel. The threadpool is global to avoid oversubscription when multiple
|
||||||
|
/// indexes are build simultaneously (e.g. multiple merges run at the same time).
|
||||||
|
auto & thread_pool = Context::getGlobalContextInstance()->getBuildVectorSimilarityIndexThreadPool();
|
||||||
|
|
||||||
|
auto add_vector_to_index = [&](USearchIndex::vector_key_t key, size_t row, ThreadGroupPtr thread_group)
|
||||||
{
|
{
|
||||||
if (auto result = index->add(static_cast<USearchIndex::vector_key_t>(index->size()), &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
|
SCOPE_EXIT_SAFE(
|
||||||
|
if (thread_group)
|
||||||
|
CurrentThread::detachFromGroupIfNotDetached();
|
||||||
|
);
|
||||||
|
|
||||||
|
if (thread_group)
|
||||||
|
CurrentThread::attachToGroupIfDetached(thread_group);
|
||||||
|
|
||||||
|
/// add is thread-safe
|
||||||
|
if (auto result = index->add(key, &column_array_data_float_data[column_array_offsets[row - 1]]); !result)
|
||||||
|
{
|
||||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index. Error: {}", String(result.error.release()));
|
throw Exception(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index. Error: {}", String(result.error.release()));
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ProfileEvents::increment(ProfileEvents::USearchAddCount);
|
ProfileEvents::increment(ProfileEvents::USearchAddCount);
|
||||||
ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, result.visited_members);
|
ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, result.visited_members);
|
||||||
ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, result.computed_distances);
|
ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, result.computed_distances);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t index_size = index->size();
|
||||||
|
|
||||||
|
for (size_t row = 0; row < rows; ++row)
|
||||||
|
{
|
||||||
|
auto key = static_cast<USearchIndex::vector_key_t>(index_size + row);
|
||||||
|
auto task = [group = CurrentThread::getGroup(), &add_vector_to_index, key, row] { add_vector_to_index(key, row, group); };
|
||||||
|
thread_pool.scheduleOrThrowOnError(task);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
thread_pool.wait();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <Storages/MergeTree/MergeTreeData.h>
|
#include <Storages/MergeTree/MergeTreeData.h>
|
||||||
#include <Storages/MergeTree/MergeTreePartsMover.h>
|
#include <Storages/MergeTree/MergeTreePartsMover.h>
|
||||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||||
|
#include <Common/FailPoint.h>
|
||||||
#include <Common/logger_useful.h>
|
#include <Common/logger_useful.h>
|
||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
@ -15,6 +16,11 @@ namespace ErrorCodes
|
|||||||
extern const int DIRECTORY_ALREADY_EXISTS;
|
extern const int DIRECTORY_ALREADY_EXISTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace FailPoints
|
||||||
|
{
|
||||||
|
extern const char stop_moving_part_before_swap_with_active[];
|
||||||
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -226,6 +232,7 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
|
|||||||
cloned_part.temporary_directory_lock = data->getTemporaryPartDirectoryHolder(part->name);
|
cloned_part.temporary_directory_lock = data->getTemporaryPartDirectoryHolder(part->name);
|
||||||
|
|
||||||
MutableDataPartStoragePtr cloned_part_storage;
|
MutableDataPartStoragePtr cloned_part_storage;
|
||||||
|
bool preserve_blobs = false;
|
||||||
if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)
|
if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)
|
||||||
{
|
{
|
||||||
/// Try zero-copy replication and fallback to default copy if it's not possible
|
/// Try zero-copy replication and fallback to default copy if it's not possible
|
||||||
@ -253,6 +260,7 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
|
|||||||
if (zero_copy_part)
|
if (zero_copy_part)
|
||||||
{
|
{
|
||||||
/// FIXME for some reason we cannot just use this part, we have to re-create it through MergeTreeDataPartBuilder
|
/// FIXME for some reason we cannot just use this part, we have to re-create it through MergeTreeDataPartBuilder
|
||||||
|
preserve_blobs = true;
|
||||||
zero_copy_part->is_temp = false; /// Do not remove it in dtor
|
zero_copy_part->is_temp = false; /// Do not remove it in dtor
|
||||||
cloned_part_storage = zero_copy_part->getDataPartStoragePtr();
|
cloned_part_storage = zero_copy_part->getDataPartStoragePtr();
|
||||||
}
|
}
|
||||||
@ -272,7 +280,17 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
|
|||||||
cloned_part.part = std::move(builder).withPartFormatFromDisk().build();
|
cloned_part.part = std::move(builder).withPartFormatFromDisk().build();
|
||||||
LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part.part->getDataPartStorage().getFullPath());
|
LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part.part->getDataPartStorage().getFullPath());
|
||||||
|
|
||||||
cloned_part.part->is_temp = data->allowRemoveStaleMovingParts();
|
cloned_part.part->is_temp = false;
|
||||||
|
if (data->allowRemoveStaleMovingParts())
|
||||||
|
{
|
||||||
|
cloned_part.part->is_temp = true;
|
||||||
|
/// Setting it in case connection to zookeeper is lost while moving
|
||||||
|
/// Otherwise part might be stuck in the moving directory due to the KEEPER_EXCEPTION in part's destructor
|
||||||
|
if (preserve_blobs)
|
||||||
|
cloned_part.part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::PRESERVE_BLOBS;
|
||||||
|
else
|
||||||
|
cloned_part.part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
|
||||||
|
}
|
||||||
cloned_part.part->loadColumnsChecksumsIndexes(true, true);
|
cloned_part.part->loadColumnsChecksumsIndexes(true, true);
|
||||||
cloned_part.part->loadVersionMetadata();
|
cloned_part.part->loadVersionMetadata();
|
||||||
cloned_part.part->modification_time = cloned_part.part->getDataPartStorage().getLastModified().epochTime();
|
cloned_part.part->modification_time = cloned_part.part->getDataPartStorage().getLastModified().epochTime();
|
||||||
@ -282,6 +300,8 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
|
|||||||
|
|
||||||
void MergeTreePartsMover::swapClonedPart(TemporaryClonedPart & cloned_part) const
|
void MergeTreePartsMover::swapClonedPart(TemporaryClonedPart & cloned_part) const
|
||||||
{
|
{
|
||||||
|
/// Used to get some stuck parts in the moving directory by stopping moves while pause is active
|
||||||
|
FailPointInjection::pauseFailPoint(FailPoints::stop_moving_part_before_swap_with_active);
|
||||||
if (moves_blocker.isCancelled())
|
if (moves_blocker.isCancelled())
|
||||||
throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts.");
|
throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts.");
|
||||||
|
|
||||||
|
@ -42,6 +42,7 @@
|
|||||||
<multi_read>1</multi_read>
|
<multi_read>1</multi_read>
|
||||||
<check_not_exists>1</check_not_exists>
|
<check_not_exists>1</check_not_exists>
|
||||||
<create_if_not_exists>1</create_if_not_exists>
|
<create_if_not_exists>1</create_if_not_exists>
|
||||||
|
<remove_recursive>1</remove_recursive>
|
||||||
</feature_flags>
|
</feature_flags>
|
||||||
</keeper_server>
|
</keeper_server>
|
||||||
</clickhouse>
|
</clickhouse>
|
||||||
|
@ -64,6 +64,7 @@ function configure()
|
|||||||
randomize_config_boolean_value multi_read keeper_port
|
randomize_config_boolean_value multi_read keeper_port
|
||||||
randomize_config_boolean_value check_not_exists keeper_port
|
randomize_config_boolean_value check_not_exists keeper_port
|
||||||
randomize_config_boolean_value create_if_not_exists keeper_port
|
randomize_config_boolean_value create_if_not_exists keeper_port
|
||||||
|
randomize_config_boolean_value remove_recursive keeper_port
|
||||||
fi
|
fi
|
||||||
|
|
||||||
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||||
|
@ -393,6 +393,7 @@ def test_table_functions():
|
|||||||
f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
|
f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
|
||||||
f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '{azure_account_key}')",
|
f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '{azure_account_key}')",
|
||||||
f"iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
|
f"iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
|
||||||
|
f"gcs('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
|
||||||
]
|
]
|
||||||
|
|
||||||
def make_test_case(i):
|
def make_test_case(i):
|
||||||
|
46
tests/integration/test_remove_stale_moving_parts/config.xml
Normal file
46
tests/integration/test_remove_stale_moving_parts/config.xml
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
<clickhouse>
|
||||||
|
<remote_servers>
|
||||||
|
<cluster>
|
||||||
|
<shard>
|
||||||
|
<replica>
|
||||||
|
<host>ch1</host>
|
||||||
|
<port>9000</port>
|
||||||
|
</replica>
|
||||||
|
</shard>
|
||||||
|
</cluster>
|
||||||
|
</remote_servers>
|
||||||
|
<macros>
|
||||||
|
<shard>01</shard>
|
||||||
|
</macros>
|
||||||
|
<storage_configuration>
|
||||||
|
<disks>
|
||||||
|
<s3>
|
||||||
|
<type>s3</type>
|
||||||
|
<endpoint>http://minio1:9001/root/data/</endpoint>
|
||||||
|
<access_key_id>minio</access_key_id>
|
||||||
|
<secret_access_key>minio123</secret_access_key>
|
||||||
|
</s3>
|
||||||
|
</disks>
|
||||||
|
<policies>
|
||||||
|
<s3>
|
||||||
|
<volumes>
|
||||||
|
<default>
|
||||||
|
<disk>default</disk>
|
||||||
|
<perform_ttl_move_on_insert>False</perform_ttl_move_on_insert>
|
||||||
|
</default>
|
||||||
|
<s3>
|
||||||
|
<disk>s3</disk>
|
||||||
|
<perform_ttl_move_on_insert>False</perform_ttl_move_on_insert>
|
||||||
|
</s3>
|
||||||
|
</volumes>
|
||||||
|
<move_factor>0.0</move_factor>
|
||||||
|
</s3>
|
||||||
|
</policies>
|
||||||
|
</storage_configuration>
|
||||||
|
|
||||||
|
<merge_tree>
|
||||||
|
<allow_remote_fs_zero_copy_replication>true</allow_remote_fs_zero_copy_replication>
|
||||||
|
<storage_policy>s3</storage_policy>
|
||||||
|
</merge_tree>
|
||||||
|
<allow_remove_stale_moving_parts>true</allow_remove_stale_moving_parts>
|
||||||
|
</clickhouse>
|
117
tests/integration/test_remove_stale_moving_parts/test.py
Normal file
117
tests/integration/test_remove_stale_moving_parts/test.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import time
|
||||||
|
import pytest
|
||||||
|
from helpers.cluster import ClickHouseCluster
|
||||||
|
|
||||||
|
cluster = ClickHouseCluster(__file__)
|
||||||
|
ch1 = cluster.add_instance(
|
||||||
|
"ch1",
|
||||||
|
main_configs=[
|
||||||
|
"config.xml",
|
||||||
|
],
|
||||||
|
macros={"replica": "node1"},
|
||||||
|
with_zookeeper=True,
|
||||||
|
with_minio=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
DATABASE_NAME = "stale_moving_parts"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def started_cluster():
|
||||||
|
try:
|
||||||
|
cluster.start()
|
||||||
|
yield cluster
|
||||||
|
|
||||||
|
finally:
|
||||||
|
cluster.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
def q(node, query):
|
||||||
|
return node.query(database=DATABASE_NAME, sql=query)
|
||||||
|
|
||||||
|
|
||||||
|
# .../disks/s3/store/
|
||||||
|
def get_table_path(node, table):
|
||||||
|
return (
|
||||||
|
node.query(
|
||||||
|
sql=f"SELECT data_paths FROM system.tables WHERE table = '{table}' and database = '{DATABASE_NAME}' LIMIT 1"
|
||||||
|
)
|
||||||
|
.strip('"\n[]')
|
||||||
|
.split(",")[1]
|
||||||
|
.strip("'")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def exec(node, cmd, path):
|
||||||
|
return node.exec_in_container(
|
||||||
|
[
|
||||||
|
"bash",
|
||||||
|
"-c",
|
||||||
|
f"{cmd} {path}",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def wait_part_is_stuck(node, table_moving_path, moving_part):
|
||||||
|
num_tries = 5
|
||||||
|
while q(node, "SELECT part_name FROM system.moves").strip() != moving_part:
|
||||||
|
if num_tries == 0:
|
||||||
|
raise Exception("Part has not started to move")
|
||||||
|
num_tries -= 1
|
||||||
|
time.sleep(1)
|
||||||
|
num_tries = 5
|
||||||
|
while exec(node, "ls", table_moving_path).strip() != moving_part:
|
||||||
|
if num_tries == 0:
|
||||||
|
raise Exception("Part is not stuck in the moving directory")
|
||||||
|
num_tries -= 1
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
|
def wait_zookeeper_node_to_start(zk_nodes, timeout=60):
|
||||||
|
start = time.time()
|
||||||
|
while time.time() - start < timeout:
|
||||||
|
try:
|
||||||
|
for instance in zk_nodes:
|
||||||
|
conn = cluster.get_kazoo_client(instance)
|
||||||
|
conn.get_children("/")
|
||||||
|
print("All instances of ZooKeeper started")
|
||||||
|
return
|
||||||
|
except Exception as ex:
|
||||||
|
print(("Can't connect to ZooKeeper " + str(ex)))
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
|
||||||
|
def test_remove_stale_moving_parts_without_zookeeper(started_cluster):
|
||||||
|
ch1.query(f"CREATE DATABASE IF NOT EXISTS {DATABASE_NAME}")
|
||||||
|
|
||||||
|
q(
|
||||||
|
ch1,
|
||||||
|
"CREATE TABLE test_remove ON CLUSTER cluster ( id UInt32 ) ENGINE ReplicatedMergeTree() ORDER BY id;",
|
||||||
|
)
|
||||||
|
|
||||||
|
table_moving_path = Path(get_table_path(ch1, "test_remove")) / "moving"
|
||||||
|
|
||||||
|
q(ch1, "SYSTEM ENABLE FAILPOINT stop_moving_part_before_swap_with_active")
|
||||||
|
q(ch1, "INSERT INTO test_remove SELECT number FROM numbers(100);")
|
||||||
|
moving_part = "all_0_0_0"
|
||||||
|
move_response = ch1.get_query_request(
|
||||||
|
sql=f"ALTER TABLE test_remove MOVE PART '{moving_part}' TO DISK 's3'",
|
||||||
|
database=DATABASE_NAME,
|
||||||
|
)
|
||||||
|
|
||||||
|
wait_part_is_stuck(ch1, table_moving_path, moving_part)
|
||||||
|
|
||||||
|
cluster.stop_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
|
||||||
|
# Stop moves in case table is not read-only yet
|
||||||
|
q(ch1, "SYSTEM STOP MOVES")
|
||||||
|
q(ch1, "SYSTEM DISABLE FAILPOINT stop_moving_part_before_swap_with_active")
|
||||||
|
|
||||||
|
assert "Cancelled moving parts" in move_response.get_error()
|
||||||
|
assert exec(ch1, "ls", table_moving_path).strip() == ""
|
||||||
|
|
||||||
|
cluster.start_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
|
||||||
|
wait_zookeeper_node_to_start(["zoo1", "zoo2", "zoo3"])
|
||||||
|
q(ch1, "SYSTEM START MOVES")
|
||||||
|
|
||||||
|
q(ch1, f"DROP TABLE test_remove")
|
@ -560,7 +560,6 @@ positionCaseInsensitive
|
|||||||
positionCaseInsensitiveUTF8
|
positionCaseInsensitiveUTF8
|
||||||
positionUTF8
|
positionUTF8
|
||||||
pow
|
pow
|
||||||
printf
|
|
||||||
proportionsZTest
|
proportionsZTest
|
||||||
protocol
|
protocol
|
||||||
queryID
|
queryID
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
default 127.0.0.1 9181 0 0 0 1 1 ['FILTERED_LIST','MULTI_READ','CHECK_NOT_EXISTS','CREATE_IF_NOT_EXISTS']
|
default 127.0.0.1 9181 0 0 0 1 1 ['FILTERED_LIST','MULTI_READ','CHECK_NOT_EXISTS','CREATE_IF_NOT_EXISTS','REMOVE_RECURSIVE']
|
||||||
zookeeper2 localhost 9181 0 0 0 1
|
zookeeper2 localhost 9181 0 0 0 1
|
||||||
|
Loading…
Reference in New Issue
Block a user