mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
Merge branch 'master' into update-datasketches
This commit is contained in:
commit
cb9d35e490
@ -89,7 +89,7 @@ PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 60
|
||||
RemoveBracesLLVM: true
|
||||
RemoveBracesLLVM: false
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
|
@ -138,6 +138,8 @@ Checks: [
|
||||
|
||||
# This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872
|
||||
'-modernize-use-constraints',
|
||||
# https://github.com/abseil/abseil-cpp/issues/1667
|
||||
'-clang-analyzer-optin.core.EnumCastOutOfRange'
|
||||
]
|
||||
|
||||
WarningsAsErrors: '*'
|
||||
|
2
contrib/libunwind
vendored
2
contrib/libunwind
vendored
@ -1 +1 @@
|
||||
Subproject commit 40d8eadf96b127d9b22d53ce7a4fc52aaedea965
|
||||
Subproject commit ba533a7246a2686b0552061809612f503804d26b
|
@ -31,7 +31,9 @@ add_library(unwind ${LIBUNWIND_SOURCES})
|
||||
set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake")
|
||||
|
||||
target_include_directories(unwind SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBUNWIND_SOURCE_DIR}/include>)
|
||||
target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY)
|
||||
target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1)
|
||||
# NOTE: from this macros sizeof(unw_context_t)/sizeof(unw_cursor_t) is depends, so it should be set always
|
||||
target_compile_definitions(unwind PUBLIC -D_LIBUNWIND_IS_NATIVE_ONLY)
|
||||
|
||||
# We should enable optimizations (otherwise it will be too slow in debug)
|
||||
# and disable sanitizers (otherwise infinite loop may happen)
|
||||
|
@ -138,7 +138,7 @@ function filter_exists_and_template
|
||||
# but it doesn't allow to use regex
|
||||
echo "$path" | sed 's/\.sql\.j2$/.gen.sql/'
|
||||
else
|
||||
echo "'$path' does not exists" >&2
|
||||
echo "'$path' does not exist" >&2
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ setup_minio() {
|
||||
./mc alias set clickminio http://localhost:11111 clickhouse clickhouse
|
||||
./mc admin user add clickminio test testtest
|
||||
./mc admin policy set clickminio readwrite user=test
|
||||
./mc mb clickminio/test
|
||||
./mc mb --ignore-existing clickminio/test
|
||||
if [ "$test_type" = "stateless" ]; then
|
||||
./mc policy set public clickminio/test
|
||||
fi
|
||||
|
@ -93,7 +93,7 @@ sidebar_label: 2022
|
||||
* `(U)Int128` and `(U)Int256` values are correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix truncate table does not hold lock correctly. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)).
|
||||
* Fix possible SIGSEGV for web disks when file does not exists (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix possible SIGSEGV for web disks when file does not exist (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)).
|
||||
* Fix stack-use-after-return under ASAN build in ParserCreateUserQuery. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
@ -53,7 +53,7 @@ sidebar_label: 2022
|
||||
* Store Keeper API version inside a predefined path. [#39096](https://github.com/ClickHouse/ClickHouse/pull/39096) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Now entrypoint.sh in docker image creates and executes chown for all folders it found in config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Add profile events for fsync. [#39179](https://github.com/ClickHouse/ClickHouse/pull/39179) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add the second argument to the ordinary function `file(path[, default])`, which function returns in the case when a file does not exists. [#39218](https://github.com/ClickHouse/ClickHouse/pull/39218) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Add the second argument to the ordinary function `file(path[, default])`, which function returns in the case when a file does not exist. [#39218](https://github.com/ClickHouse/ClickHouse/pull/39218) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Some small fixes for reading via http, allow to retry partial content in case if got 200OK. [#39244](https://github.com/ClickHouse/ClickHouse/pull/39244) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Improved Base58 encoding/decoding. [#39292](https://github.com/ClickHouse/ClickHouse/pull/39292) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Normalize `AggregateFunction` types and state representations because optimizations like https://github.com/ClickHouse/ClickHouse/pull/35788 will treat `count(not null columns)` as `count()`, which might confuses distributed interpreters with the following error : `Conversion from AggregateFunction(count) to AggregateFunction(count, Int64) is not supported`. [#39420](https://github.com/ClickHouse/ClickHouse/pull/39420) ([Amos Bird](https://github.com/amosbird)).
|
||||
|
@ -291,7 +291,7 @@ sidebar_label: 2023
|
||||
* Fix replica groups for Replicated database engine [#55587](https://github.com/ClickHouse/ClickHouse/pull/55587) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Remove unused protobuf includes [#55590](https://github.com/ClickHouse/ClickHouse/pull/55590) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Apply Context changes to standalone Keeper [#55591](https://github.com/ClickHouse/ClickHouse/pull/55591) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Do not fail if label-to-remove does not exists in PR [#55592](https://github.com/ClickHouse/ClickHouse/pull/55592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Do not fail if label-to-remove does not exist in PR [#55592](https://github.com/ClickHouse/ClickHouse/pull/55592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* CI: cast extra column expression `pull_request_number` to Int32 [#55599](https://github.com/ClickHouse/ClickHouse/pull/55599) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Add back a test that was removed by mistake [#55605](https://github.com/ClickHouse/ClickHouse/pull/55605) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Bump croaring to v2.0.4 [#55606](https://github.com/ClickHouse/ClickHouse/pull/55606) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
@ -67,6 +67,8 @@ generates merged configuration file:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
### Using from_env and from_zk
|
||||
|
||||
To specify that a value of an element should be replaced by the value of an environment variable, you can use attribute `from_env`.
|
||||
|
||||
Example with `$MAX_QUERY_SIZE = 150000`:
|
||||
@ -93,6 +95,59 @@ which is equal to
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
The same is possible using `from_zk`:
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<postgresql_port from_zk="/zk_configs/postgresql_port"/>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
```
|
||||
# clickhouse-keeper-client
|
||||
/ :) touch /zk_configs
|
||||
/ :) create /zk_configs/postgresql_port "9005"
|
||||
/ :) get /zk_configs/postgresql_port
|
||||
9005
|
||||
```
|
||||
|
||||
which is equal to
|
||||
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<postgresql_port>9005</postgresql_port>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
#### Default values for from_env and from_zk attributes
|
||||
|
||||
It's possible to set the default value and substitute it only if the environment variable or zookeeper node is set using `replace="1"`.
|
||||
|
||||
With previous example, but `MAX_QUERY_SIZE` is unset:
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<max_query_size from_env="MAX_QUERY_SIZE" replace="1">150000</max_query_size>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
will take the default value
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<max_query_size>150000</max_query_size>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Substituting Configuration {#substitution}
|
||||
|
||||
The config can define substitutions. There are two types of substitutions:
|
||||
|
@ -32,7 +32,7 @@ WHERE name LIKE '%thread_pool%'
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name────────────────────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┬─changeable_without_restart─┬─is_obsolete─┐
|
||||
┌─name──────────────────────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┬─changeable_without_restart─┬─is_obsolete─┐
|
||||
│ max_thread_pool_size │ 10000 │ 10000 │ 0 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ No │ 0 │
|
||||
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ No │ 0 │
|
||||
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ No │ 0 │
|
||||
@ -41,11 +41,12 @@ WHERE name LIKE '%thread_pool%'
|
||||
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ No │ 0 │
|
||||
│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ No │ 0 │
|
||||
│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ No │ 0 │
|
||||
│ max_unexpected_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Unexpected ones) at startup. │ UInt64 │ No │ 0 │
|
||||
│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ No │ 0 │
|
||||
│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ No │ 0 │
|
||||
│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ No │ 0 │
|
||||
│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ No │ 0 │
|
||||
└─────────────────────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴────────────────────────────┴─────────────┘
|
||||
└───────────────────────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴────────────────────────────┴─────────────┘
|
||||
|
||||
```
|
||||
|
||||
|
@ -35,7 +35,7 @@ api_version
|
||||
/keeper/api_version :) ls
|
||||
|
||||
/keeper/api_version :) cd xyz
|
||||
Path /keeper/api_version/xyz does not exists
|
||||
Path /keeper/api_version/xyz does not exist
|
||||
/keeper/api_version :) cd ../../
|
||||
/ :) ls
|
||||
keeper foo bar
|
||||
|
@ -152,7 +152,7 @@ Configuration example:
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
cutToFirstSignificantSubdomain(URL, TLD)
|
||||
cutToFirstSignificantSubdomainCustom(URL, TLD)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
@ -248,6 +248,25 @@ FROM s3(
|
||||
LIMIT 5;
|
||||
```
|
||||
|
||||
|
||||
## Working with archives
|
||||
|
||||
Suppose that we have several archive files with following URIs on S3:
|
||||
|
||||
- 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-10.csv.zip'
|
||||
- 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-11.csv.zip'
|
||||
- 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-12.csv.zip'
|
||||
|
||||
Extracting data from these archives is possible using ::. Globs can be used both in the url part as well as in the part after :: (responsible for the name of a file inside the archive).
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM s3(
|
||||
'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-1{0..2}.csv.zip :: *.csv'
|
||||
);
|
||||
```
|
||||
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
|
||||
|
@ -263,7 +263,7 @@ void Client::initialize(Poco::Util::Application & self)
|
||||
config().add(loaded_config.configuration);
|
||||
}
|
||||
else if (config().has("connection"))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "--connection was specified, but config does not exists");
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "--connection was specified, but config does not exist");
|
||||
|
||||
/** getenv is thread-safe in Linux glibc and in all sane libc implementations.
|
||||
* But the standard does not guarantee that subsequent calls will not rewrite the value by returned pointer.
|
||||
|
@ -8,6 +8,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int KEEPER_EXCEPTION;
|
||||
}
|
||||
|
||||
bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String path;
|
||||
@ -58,7 +63,7 @@ void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
|
||||
|
||||
auto new_path = client->getAbsolutePath(query->args[0].safeGet<String>());
|
||||
if (!client->zookeeper->exists(new_path))
|
||||
std::cerr << "Path " << new_path << " does not exists\n";
|
||||
std::cerr << "Path " << new_path << " does not exist\n";
|
||||
else
|
||||
client->cwd = new_path;
|
||||
}
|
||||
@ -216,6 +221,8 @@ bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> &
|
||||
|
||||
node->args.push_back(threshold->as<ASTLiteral &>().value);
|
||||
|
||||
ParserToken{TokenType::Whitespace}.ignore(pos);
|
||||
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
path = ".";
|
||||
@ -230,19 +237,23 @@ void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client
|
||||
auto path = client->getAbsolutePath(query->args[1].safeGet<String>());
|
||||
|
||||
Coordination::Stat stat;
|
||||
client->zookeeper->get(path, &stat);
|
||||
if (!client->zookeeper->exists(path, &stat))
|
||||
return; /// It is ok if node was deleted meanwhile
|
||||
|
||||
if (stat.numChildren >= static_cast<Int32>(threshold))
|
||||
{
|
||||
std::cout << static_cast<String>(path) << "\t" << stat.numChildren << "\n";
|
||||
return;
|
||||
}
|
||||
|
||||
auto children = client->zookeeper->getChildren(path);
|
||||
Strings children;
|
||||
auto status = client->zookeeper->tryGetChildren(path, children);
|
||||
if (status == Coordination::Error::ZNONODE)
|
||||
return; /// It is ok if node was deleted meanwhile
|
||||
else if (status != Coordination::Error::ZOK)
|
||||
throw DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, "Error {} while getting children of {}", status, path.string());
|
||||
|
||||
std::sort(children.begin(), children.end());
|
||||
auto next_query = *query;
|
||||
for (const auto & child : children)
|
||||
{
|
||||
auto next_query = *query;
|
||||
next_query.args[1] = DB::Field(path / child);
|
||||
execute(&next_query, client);
|
||||
}
|
||||
@ -310,31 +321,34 @@ bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> &
|
||||
return true;
|
||||
}
|
||||
|
||||
/// DFS the subtree and return the number of nodes in the subtree
|
||||
static Int64 traverse(const fs::path & path, KeeperClient * client, std::vector<std::tuple<Int64, String>> & result)
|
||||
{
|
||||
Int64 nodes_in_subtree = 1;
|
||||
|
||||
Strings children;
|
||||
auto status = client->zookeeper->tryGetChildren(path, children);
|
||||
if (status == Coordination::Error::ZNONODE)
|
||||
return 0;
|
||||
else if (status != Coordination::Error::ZOK)
|
||||
throw DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, "Error {} while getting children of {}", status, path.string());
|
||||
|
||||
for (auto & child : children)
|
||||
nodes_in_subtree += traverse(path / child, client, result);
|
||||
|
||||
result.emplace_back(nodes_in_subtree, path.string());
|
||||
|
||||
return nodes_in_subtree;
|
||||
}
|
||||
|
||||
void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const
|
||||
{
|
||||
auto path = client->getAbsolutePath(query->args[0].safeGet<String>());
|
||||
auto n = query->args[1].safeGet<UInt64>();
|
||||
|
||||
std::vector<std::tuple<Int32, String>> result;
|
||||
std::vector<std::tuple<Int64, String>> result;
|
||||
|
||||
std::queue<fs::path> queue;
|
||||
queue.push(path);
|
||||
while (!queue.empty())
|
||||
{
|
||||
auto next_path = queue.front();
|
||||
queue.pop();
|
||||
|
||||
auto children = client->zookeeper->getChildren(next_path);
|
||||
for (auto & child : children)
|
||||
child = next_path / child;
|
||||
auto response = client->zookeeper->get(children);
|
||||
|
||||
for (size_t i = 0; i < response.size(); ++i)
|
||||
{
|
||||
result.emplace_back(response[i].stat.numChildren, children[i]);
|
||||
queue.push(children[i]);
|
||||
}
|
||||
}
|
||||
traverse(path, client, result);
|
||||
|
||||
std::sort(result.begin(), result.end(), std::greater());
|
||||
for (UInt64 i = 0; i < std::min(result.size(), static_cast<size_t>(n)); ++i)
|
||||
|
@ -160,6 +160,14 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
||||
|
||||
getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
|
||||
|
||||
const size_t unexpected_parts_loading_threads = config().getUInt("max_unexpected_parts_loading_thread_pool_size", 32);
|
||||
getUnexpectedPartsLoadingThreadPool().initialize(
|
||||
unexpected_parts_loading_threads,
|
||||
0, // We don't need any threads one all the parts will be loaded
|
||||
unexpected_parts_loading_threads);
|
||||
|
||||
getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
|
||||
|
||||
const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128);
|
||||
getPartsCleaningThreadPool().initialize(
|
||||
cleanup_threads,
|
||||
|
@ -885,6 +885,16 @@ try
|
||||
server_settings.max_active_parts_loading_thread_pool_size
|
||||
);
|
||||
|
||||
getUnexpectedPartsLoadingThreadPool().initialize(
|
||||
server_settings.max_unexpected_parts_loading_thread_pool_size,
|
||||
0, // We don't need any threads once all the parts will be loaded
|
||||
server_settings.max_unexpected_parts_loading_thread_pool_size);
|
||||
|
||||
/// It could grow if we need to synchronously wait until all the data parts will be loaded.
|
||||
getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads(
|
||||
server_settings.max_active_parts_loading_thread_pool_size
|
||||
);
|
||||
|
||||
getPartsCleaningThreadPool().initialize(
|
||||
server_settings.max_parts_cleaning_thread_pool_size,
|
||||
0, // We don't need any threads one all the parts will be deleted
|
||||
|
@ -769,6 +769,7 @@ struct IdentifierResolveScope
|
||||
|
||||
/// Nodes with duplicated aliases
|
||||
std::unordered_set<QueryTreeNodePtr> nodes_with_duplicated_aliases;
|
||||
std::vector<QueryTreeNodePtr> cloned_nodes_with_duplicated_aliases;
|
||||
|
||||
/// Current scope expression in resolve process stack
|
||||
ExpressionsStack expressions_in_resolve_process_stack;
|
||||
@ -1031,6 +1032,14 @@ public:
|
||||
return true;
|
||||
}
|
||||
private:
|
||||
void addDuplicatingAlias(const QueryTreeNodePtr & node)
|
||||
{
|
||||
scope.nodes_with_duplicated_aliases.emplace(node);
|
||||
auto cloned_node = node->clone();
|
||||
scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node);
|
||||
scope.nodes_with_duplicated_aliases.emplace(cloned_node);
|
||||
}
|
||||
|
||||
void updateAliasesIfNeeded(const QueryTreeNodePtr & node, bool is_lambda_node)
|
||||
{
|
||||
if (!node->hasAlias())
|
||||
@ -1045,21 +1054,21 @@ private:
|
||||
if (is_lambda_node)
|
||||
{
|
||||
if (scope.alias_name_to_expression_node->contains(alias))
|
||||
scope.nodes_with_duplicated_aliases.insert(node);
|
||||
addDuplicatingAlias(node);
|
||||
|
||||
auto [_, inserted] = scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node));
|
||||
if (!inserted)
|
||||
scope.nodes_with_duplicated_aliases.insert(node);
|
||||
addDuplicatingAlias(node);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (scope.alias_name_to_lambda_node.contains(alias))
|
||||
scope.nodes_with_duplicated_aliases.insert(node);
|
||||
addDuplicatingAlias(node);
|
||||
|
||||
auto [_, inserted] = scope.alias_name_to_expression_node->insert(std::make_pair(alias, node));
|
||||
if (!inserted)
|
||||
scope.nodes_with_duplicated_aliases.insert(node);
|
||||
addDuplicatingAlias(node);
|
||||
|
||||
/// If node is identifier put it also in scope alias name to lambda node map
|
||||
if (node->getNodeType() == QueryTreeNodeType::IDENTIFIER)
|
||||
@ -5081,14 +5090,14 @@ ProjectionName QueryAnalyzer::resolveWindow(QueryTreeNodePtr & node, IdentifierR
|
||||
auto * nearest_query_scope = scope.getNearestQueryScope();
|
||||
|
||||
if (!nearest_query_scope)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window '{}' does not exists.", parent_window_name);
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window '{}' does not exist.", parent_window_name);
|
||||
|
||||
auto & scope_window_name_to_window_node = nearest_query_scope->window_name_to_window_node;
|
||||
|
||||
auto window_node_it = scope_window_name_to_window_node.find(parent_window_name);
|
||||
if (window_node_it == scope_window_name_to_window_node.end())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Window '{}' does not exists. In scope {}",
|
||||
"Window '{}' does not exist. In scope {}",
|
||||
parent_window_name,
|
||||
nearest_query_scope->scope_node->formatASTForErrorMessage());
|
||||
|
||||
@ -5615,9 +5624,13 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
/// Replace storage with values storage of insertion block
|
||||
if (StoragePtr storage = scope.context->getViewSource())
|
||||
{
|
||||
if (auto * query_node = in_second_argument->as<QueryNode>())
|
||||
QueryTreeNodePtr table_expression;
|
||||
/// Process possibly nested sub-selects
|
||||
for (auto * query_node = in_second_argument->as<QueryNode>(); query_node; query_node = table_expression->as<QueryNode>())
|
||||
table_expression = extractLeftTableExpression(query_node->getJoinTree());
|
||||
|
||||
if (table_expression)
|
||||
{
|
||||
auto table_expression = extractLeftTableExpression(query_node->getJoinTree());
|
||||
if (auto * query_table_node = table_expression->as<TableNode>())
|
||||
{
|
||||
if (query_table_node->getStorageID().getFullNameNotQuoted() == storage->getStorageID().getFullNameNotQuoted())
|
||||
@ -5848,7 +5861,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
{
|
||||
if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name))
|
||||
{
|
||||
throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Aggregate function with name '{}' does not exists. In scope {}{}",
|
||||
throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Aggregate function with name '{}' does not exist. In scope {}{}",
|
||||
function_name, scope.scope_node->formatASTForErrorMessage(),
|
||||
getHintsErrorMessageSuffix(AggregateFunctionFactory::instance().getHints(function_name)));
|
||||
}
|
||||
@ -5929,7 +5942,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
auto hints = NamePrompter<2>::getHints(function_name, possible_function_names);
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
|
||||
"Function with name '{}' does not exists. In scope {}{}",
|
||||
"Function with name '{}' does not exist. In scope {}{}",
|
||||
function_name,
|
||||
scope.scope_node->formatASTForErrorMessage(),
|
||||
getHintsErrorMessageSuffix(hints));
|
||||
@ -6254,6 +6267,10 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
|
||||
result_projection_names.push_back(node_alias);
|
||||
}
|
||||
|
||||
bool is_duplicated_alias = scope.nodes_with_duplicated_aliases.contains(node);
|
||||
if (is_duplicated_alias)
|
||||
scope.non_cached_identifier_lookups_during_expression_resolve.insert({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION});
|
||||
|
||||
/** Do not use alias table if node has alias same as some other node.
|
||||
* Example: WITH x -> x + 1 AS lambda SELECT 1 AS lambda;
|
||||
* During 1 AS lambda resolve if we use alias table we replace node with x -> x + 1 AS lambda.
|
||||
@ -6264,7 +6281,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
|
||||
* alias table because in alias table subquery could be evaluated as scalar.
|
||||
*/
|
||||
bool use_alias_table = true;
|
||||
if (scope.nodes_with_duplicated_aliases.contains(node) || (allow_table_expression && isSubqueryNodeType(node->getNodeType())))
|
||||
if (is_duplicated_alias || (allow_table_expression && isSubqueryNodeType(node->getNodeType())))
|
||||
use_alias_table = false;
|
||||
|
||||
if (!node_alias.empty() && use_alias_table)
|
||||
@ -6568,6 +6585,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
|
||||
}
|
||||
}
|
||||
|
||||
if (is_duplicated_alias)
|
||||
scope.non_cached_identifier_lookups_during_expression_resolve.erase({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION});
|
||||
|
||||
resolved_expressions.emplace(node, result_projection_names);
|
||||
|
||||
scope.popExpressionNode();
|
||||
@ -6600,7 +6620,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node
|
||||
{
|
||||
auto node_to_resolve = node;
|
||||
auto expression_node_projection_names = resolveExpressionNode(node_to_resolve, scope, allow_lambda_expression, allow_table_expression);
|
||||
|
||||
size_t expected_projection_names_size = 1;
|
||||
if (auto * expression_list = node_to_resolve->as<ListNode>())
|
||||
{
|
||||
@ -8051,7 +8070,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
|
||||
auto window_node_it = scope.window_name_to_window_node.find(parent_window_name);
|
||||
if (window_node_it == scope.window_name_to_window_node.end())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Window '{}' does not exists. In scope {}",
|
||||
"Window '{}' does not exist. In scope {}",
|
||||
parent_window_name,
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
|
||||
@ -8208,10 +8227,13 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
|
||||
* After scope nodes are resolved, we can compare node with duplicate alias with
|
||||
* node from scope alias table.
|
||||
*/
|
||||
for (const auto & node_with_duplicated_alias : scope.nodes_with_duplicated_aliases)
|
||||
for (const auto & node_with_duplicated_alias : scope.cloned_nodes_with_duplicated_aliases)
|
||||
{
|
||||
auto node = node_with_duplicated_alias;
|
||||
auto node_alias = node->getAlias();
|
||||
|
||||
/// Add current alias to non cached set, because in case of cyclic alias identifier should not be substituted from cache.
|
||||
/// See 02896_cyclic_aliases_crash.
|
||||
resolveExpressionNode(node, scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/);
|
||||
|
||||
bool has_node_in_alias_table = false;
|
||||
@ -8246,7 +8268,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
|
||||
|
||||
if (!has_node_in_alias_table)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Node {} with duplicate alias {} does not exists in alias table. In scope {}",
|
||||
"Node {} with duplicate alias {} does not exist in alias table. In scope {}",
|
||||
node->formatASTForErrorMessage(),
|
||||
node_alias,
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
|
@ -1242,8 +1242,9 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
|
||||
}
|
||||
else if (auto * explain_query = typeid_cast<ASTExplainQuery *>(ast.get()))
|
||||
{
|
||||
const auto & explained_query = explain_query->getExplainedQuery();
|
||||
/// Fuzzing EXPLAIN query to SELECT query randomly
|
||||
if (fuzz_rand() % 20 == 0 && explain_query->getExplainedQuery()->getQueryKind() == IAST::QueryKind::Select)
|
||||
if (explained_query && explained_query->getQueryKind() == IAST::QueryKind::Select && fuzz_rand() % 20 == 0)
|
||||
{
|
||||
auto select_query = explain_query->getExplainedQuery()->clone();
|
||||
fuzz(select_query);
|
||||
|
@ -177,6 +177,9 @@
|
||||
M(MergeTreeOutdatedPartsLoaderThreads, "Number of threads in the threadpool for loading Outdated data parts.") \
|
||||
M(MergeTreeOutdatedPartsLoaderThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \
|
||||
M(MergeTreeOutdatedPartsLoaderThreadsScheduled, "Number of queued or active jobs in the threadpool for loading Outdated data parts.") \
|
||||
M(MergeTreeUnexpectedPartsLoaderThreads, "Number of threads in the threadpool for loading Unexpected data parts.") \
|
||||
M(MergeTreeUnexpectedPartsLoaderThreadsActive, "Number of active threads in the threadpool for loading Unexpected data parts.") \
|
||||
M(MergeTreeUnexpectedPartsLoaderThreadsScheduled, "Number of queued or active jobs in the threadpool for loading Unexpected data parts.") \
|
||||
M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \
|
||||
M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \
|
||||
M(MergeTreePartsCleanerThreadsScheduled, "Number of queued or active jobs in the MergeTree parts cleaner thread pool.") \
|
||||
|
@ -26,12 +26,12 @@ using LoggerRawPtr = Poco::Logger *;
|
||||
* Then it must be destroyed when underlying table is destroyed.
|
||||
*/
|
||||
|
||||
/** Get Logger with specified name. If the Logger does not exists, it is created.
|
||||
/** Get Logger with specified name. If the Logger does not exist, it is created.
|
||||
* Logger is destroyed, when last shared ptr that refers to Logger with specified name is destroyed.
|
||||
*/
|
||||
LoggerPtr getLogger(const std::string & name);
|
||||
|
||||
/** Get Logger with specified name. If the Logger does not exists, it is created.
|
||||
/** Get Logger with specified name. If the Logger does not exist, it is created.
|
||||
* This overload was added for specific purpose, when logger is constructed from constexpr string.
|
||||
* Logger is destroyed only during program shutdown.
|
||||
*/
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <Common/OpenTelemetryTraceContext.h>
|
||||
#include <Common/noexcept_scope.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <type_traits>
|
||||
|
||||
#include <Poco/Util/Application.h>
|
||||
@ -437,6 +436,11 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
|
||||
/// We don't run jobs after `shutdown` is set, but we have to properly dequeue all jobs and finish them.
|
||||
if (shutdown)
|
||||
{
|
||||
{
|
||||
ALLOW_ALLOCATIONS_IN_SCOPE;
|
||||
/// job can contain packaged_task which can set exception during destruction
|
||||
job_data.reset();
|
||||
}
|
||||
job_is_done = true;
|
||||
continue;
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ namespace DB
|
||||
M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
|
||||
M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \
|
||||
M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \
|
||||
M(UInt64, max_unexpected_parts_loading_thread_pool_size, 8, "The number of threads to load inactive set of data parts (Unexpected ones) at startup.", 0) \
|
||||
M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \
|
||||
M(UInt64, max_mutations_bandwidth_for_server, 0, "The maximum read speed of all mutations on server in bytes per second. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_merges_bandwidth_for_server, 0, "The maximum read speed of all merges on server in bytes per second. Zero means unlimited.", 0) \
|
||||
|
@ -799,8 +799,8 @@ class IColumn;
|
||||
M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \
|
||||
M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \
|
||||
M(UInt64, filesystem_cache_segments_batch_size, 20, "Limit on size of a single batch of file segments that a read buffer can request from cache. Too low value will lead to excessive requests to cache, too large may slow down eviction from cache", 0) \
|
||||
M(UInt64, filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, 1000, "Wait time to lock cache for sapce reservation in filesystem cache", 0) \
|
||||
M(UInt64, temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds, (10 * 60 * 1000), "Wait time to lock cache for sapce reservation for temporary data in filesystem cache", 0) \
|
||||
M(UInt64, filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, 1000, "Wait time to lock cache for space reservation in filesystem cache", 0) \
|
||||
M(UInt64, temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds, (10 * 60 * 1000), "Wait time to lock cache for space reservation for temporary data in filesystem cache", 0) \
|
||||
\
|
||||
M(Bool, use_page_cache_for_disks_without_file_cache, false, "Use userspace page cache for remote disks that don't have filesystem cache enabled.", 0) \
|
||||
M(Bool, read_from_page_cache_if_exists_otherwise_bypass_cache, false, "Use userspace page cache in passive mode, similar to read_from_filesystem_cache_if_exists_otherwise_bypass_cache.", 0) \
|
||||
|
@ -13,6 +13,7 @@ class ASTStorage;
|
||||
M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
|
||||
M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \
|
||||
M(Bool, check_consistency, true, "Check consistency of local metadata and metadata in Keeper, do replica recovery on inconsistency", 0) \
|
||||
M(UInt64, max_retries_before_automatic_recovery, 100, "Max number of attempts to execute a queue entry before marking replica as lost recovering it from snapshot (0 means infinite)", 0) \
|
||||
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)
|
||||
|
@ -18,6 +18,8 @@ namespace ErrorCodes
|
||||
extern const int UNFINISHED;
|
||||
}
|
||||
|
||||
static constexpr const char * FORCE_AUTO_RECOVERY_DIGEST = "42";
|
||||
|
||||
DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db, ContextPtr context_)
|
||||
: DDLWorker(/* pool_size */ 1, db->zookeeper_path + "/log", context_, nullptr, {}, fmt::format("DDLWorker({})", db->getDatabaseName()))
|
||||
, database(db)
|
||||
@ -44,6 +46,26 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread()
|
||||
/// NOTE It will not stop cleanup thread until DDLWorker::shutdown() call (cleanup thread will just do nothing)
|
||||
break;
|
||||
}
|
||||
|
||||
if (database->db_settings.max_retries_before_automatic_recovery &&
|
||||
database->db_settings.max_retries_before_automatic_recovery <= subsequent_errors_count)
|
||||
{
|
||||
String current_task_name;
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
current_task_name = current_task;
|
||||
}
|
||||
LOG_WARNING(log, "Database got stuck at processing task {}: it failed {} times in a row with the same error. "
|
||||
"Will reset digest to mark our replica as lost, and trigger recovery from the most up-to-date metadata "
|
||||
"from ZooKeeper. See max_retries_before_automatic_recovery setting. The error: {}",
|
||||
current_task, subsequent_errors_count, last_unexpected_error);
|
||||
|
||||
String digest_str;
|
||||
zookeeper->tryGet(database->replica_path + "/digest", digest_str);
|
||||
LOG_WARNING(log, "Resetting digest from {} to {}", digest_str, FORCE_AUTO_RECOVERY_DIGEST);
|
||||
zookeeper->trySet(database->replica_path + "/digest", FORCE_AUTO_RECOVERY_DIGEST);
|
||||
}
|
||||
|
||||
initializeReplication();
|
||||
initialized = true;
|
||||
return true;
|
||||
|
@ -440,7 +440,7 @@ void DatabaseMySQL::detachTablePermanently(ContextPtr, const String & table_name
|
||||
throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", backQuoteIfNeed(database_name), backQuoteIfNeed(table_name));
|
||||
|
||||
if (fs::exists(remove_flag))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The remove flag file already exists but the {}.{} does not exists remove tables, it is bug.",
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The remove flag file already exists but the {}.{} does not exist remove tables, it is bug.",
|
||||
backQuoteIfNeed(database_name), backQuoteIfNeed(table_name));
|
||||
|
||||
auto table_iter = local_tables_cache.find(table_name);
|
||||
|
@ -470,7 +470,7 @@ public:
|
||||
auto path = std::filesystem::path{file_path};
|
||||
auto parent_path_directory = path.parent_path();
|
||||
|
||||
/// If cache file is in directory that does not exists create it
|
||||
/// If cache file is in directory that does not exist create it
|
||||
if (!std::filesystem::exists(parent_path_directory))
|
||||
if (!std::filesystem::create_directories(parent_path_directory))
|
||||
throw Exception(ErrorCodes::CANNOT_CREATE_DIRECTORY, "Failed to create directories.");
|
||||
|
@ -73,8 +73,9 @@ bool ParallelReadBuffer::addReaderToPool()
|
||||
|
||||
auto worker = read_workers.emplace_back(std::make_shared<ReadWorker>(input, range_start, size));
|
||||
|
||||
++active_working_readers;
|
||||
schedule([this, my_worker = std::move(worker)]() mutable { readerThreadFunction(std::move(my_worker)); }, Priority{});
|
||||
/// increase number of workers only after we are sure that the reader was scheduled
|
||||
++active_working_readers;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1,8 +1,7 @@
|
||||
#include <IO/S3/URI.h>
|
||||
#include <Poco/URI.h>
|
||||
#include "Common/Macros.h"
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Storages/NamedCollectionsHelpers.h>
|
||||
#include "Common/Macros.h"
|
||||
#if USE_AWS_S3
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/quoteString.h>
|
||||
@ -55,7 +54,11 @@ URI::URI(const std::string & uri_)
|
||||
static constexpr auto OSS = "OSS";
|
||||
static constexpr auto EOS = "EOS";
|
||||
|
||||
uri = Poco::URI(uri_);
|
||||
if (containsArchive(uri_))
|
||||
std::tie(uri_str, archive_pattern) = getPathToArchiveAndArchivePattern(uri_);
|
||||
else
|
||||
uri_str = uri_;
|
||||
uri = Poco::URI(uri_str);
|
||||
|
||||
std::unordered_map<std::string, std::string> mapper;
|
||||
auto context = Context::getGlobalContextInstance();
|
||||
@ -126,7 +129,8 @@ URI::URI(const std::string & uri_)
|
||||
boost::to_upper(name);
|
||||
/// For S3Express it will look like s3express-eun1-az1, i.e. contain region and AZ info
|
||||
if (name != S3 && !name.starts_with(S3EXPRESS) && name != COS && name != OBS && name != OSS && name != EOS)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Object storage system name is unrecognized in virtual hosted style S3 URI: {}",
|
||||
quoteString(name));
|
||||
|
||||
@ -156,10 +160,40 @@ void URI::validateBucket(const String & bucket, const Poco::URI & uri)
|
||||
/// S3 specification requires at least 3 and at most 63 characters in bucket name.
|
||||
/// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html
|
||||
if (bucket.length() < 3 || bucket.length() > 63)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}",
|
||||
quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : "");
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}",
|
||||
quoteString(bucket),
|
||||
!uri.empty() ? " (" + uri.toString() + ")" : "");
|
||||
}
|
||||
|
||||
bool URI::containsArchive(const std::string & source)
|
||||
{
|
||||
size_t pos = source.find("::");
|
||||
return (pos != std::string::npos);
|
||||
}
|
||||
|
||||
std::pair<std::string, std::string> URI::getPathToArchiveAndArchivePattern(const std::string & source)
|
||||
{
|
||||
size_t pos = source.find("::");
|
||||
assert(pos != std::string::npos);
|
||||
|
||||
std::string path_to_archive = source.substr(0, pos);
|
||||
while ((!path_to_archive.empty()) && path_to_archive.ends_with(' '))
|
||||
path_to_archive.pop_back();
|
||||
|
||||
if (path_to_archive.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty");
|
||||
|
||||
std::string_view path_in_archive_view = std::string_view{source}.substr(pos + 2);
|
||||
while (path_in_archive_view.front() == ' ')
|
||||
path_in_archive_view.remove_prefix(1);
|
||||
|
||||
if (path_in_archive_view.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty");
|
||||
|
||||
return {path_to_archive, std::string{path_in_archive_view}};
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
#include "config.h"
|
||||
@ -28,6 +29,8 @@ struct URI
|
||||
std::string key;
|
||||
std::string version_id;
|
||||
std::string storage_name;
|
||||
std::optional<std::string> archive_pattern;
|
||||
std::string uri_str;
|
||||
|
||||
bool is_virtual_hosted_style;
|
||||
|
||||
@ -36,6 +39,10 @@ struct URI
|
||||
void addRegionToURI(const std::string & region);
|
||||
|
||||
static void validateBucket(const std::string & bucket, const Poco::URI & uri);
|
||||
|
||||
private:
|
||||
bool containsArchive(const std::string & source);
|
||||
std::pair<std::string, std::string> getPathToArchiveAndArchivePattern(const std::string & source);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -20,6 +20,9 @@ namespace CurrentMetrics
|
||||
extern const Metric MergeTreeOutdatedPartsLoaderThreads;
|
||||
extern const Metric MergeTreeOutdatedPartsLoaderThreadsActive;
|
||||
extern const Metric MergeTreeOutdatedPartsLoaderThreadsScheduled;
|
||||
extern const Metric MergeTreeUnexpectedPartsLoaderThreads;
|
||||
extern const Metric MergeTreeUnexpectedPartsLoaderThreadsActive;
|
||||
extern const Metric MergeTreeUnexpectedPartsLoaderThreadsScheduled;
|
||||
extern const Metric DatabaseReplicatedCreateTablesThreads;
|
||||
extern const Metric DatabaseReplicatedCreateTablesThreadsActive;
|
||||
extern const Metric DatabaseReplicatedCreateTablesThreadsScheduled;
|
||||
@ -151,6 +154,12 @@ StaticThreadPool & getOutdatedPartsLoadingThreadPool()
|
||||
return instance;
|
||||
}
|
||||
|
||||
StaticThreadPool & getUnexpectedPartsLoadingThreadPool()
|
||||
{
|
||||
static StaticThreadPool instance("MergeTreeUnexpectedPartsLoaderThreadPool", CurrentMetrics::MergeTreeUnexpectedPartsLoaderThreads, CurrentMetrics::MergeTreeUnexpectedPartsLoaderThreadsActive, CurrentMetrics::MergeTreeUnexpectedPartsLoaderThreadsScheduled);
|
||||
return instance;
|
||||
}
|
||||
|
||||
StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool()
|
||||
{
|
||||
static StaticThreadPool instance("CreateTablesThreadPool", CurrentMetrics::DatabaseReplicatedCreateTablesThreads, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsActive, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsScheduled);
|
||||
|
@ -64,6 +64,8 @@ StaticThreadPool & getPartsCleaningThreadPool();
|
||||
/// the number of threads by calling enableTurboMode() :-)
|
||||
StaticThreadPool & getOutdatedPartsLoadingThreadPool();
|
||||
|
||||
StaticThreadPool & getUnexpectedPartsLoadingThreadPool();
|
||||
|
||||
/// ThreadPool used for creating tables in DatabaseReplicated.
|
||||
StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool();
|
||||
|
||||
|
@ -21,6 +21,9 @@
|
||||
#include <base/sort.h>
|
||||
#include <Common/JSONBuilder.h>
|
||||
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
#include <absl/container/inlined_vector.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -708,16 +711,18 @@ static ColumnWithTypeAndName executeActionForPartialResult(const ActionsDAG::Nod
|
||||
return res_column;
|
||||
}
|
||||
|
||||
Block ActionsDAG::updateHeader(Block header) const
|
||||
Block ActionsDAG::updateHeader(const Block & header) const
|
||||
{
|
||||
IntermediateExecutionResult node_to_column;
|
||||
std::set<size_t> pos_to_remove;
|
||||
|
||||
{
|
||||
std::unordered_map<std::string_view, std::list<size_t>> input_positions;
|
||||
using inline_vector = absl::InlinedVector<size_t, 7>; // 64B, holding max 7 size_t elements inlined
|
||||
absl::flat_hash_map<std::string_view, inline_vector> input_positions;
|
||||
|
||||
for (size_t pos = 0; pos < inputs.size(); ++pos)
|
||||
input_positions[inputs[pos]->result_name].emplace_back(pos);
|
||||
/// We insert from last to first in the inlinedVector so it's easier to pop_back matches later
|
||||
for (size_t pos = inputs.size(); pos != 0; pos--)
|
||||
input_positions[inputs[pos - 1]->result_name].emplace_back(pos - 1);
|
||||
|
||||
for (size_t pos = 0; pos < header.columns(); ++pos)
|
||||
{
|
||||
@ -725,10 +730,11 @@ Block ActionsDAG::updateHeader(Block header) const
|
||||
auto it = input_positions.find(col.name);
|
||||
if (it != input_positions.end() && !it->second.empty())
|
||||
{
|
||||
auto & list = it->second;
|
||||
pos_to_remove.insert(pos);
|
||||
node_to_column[inputs[list.front()]] = col;
|
||||
list.pop_front();
|
||||
|
||||
auto & v = it->second;
|
||||
node_to_column[inputs[v.back()]] = col;
|
||||
v.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -746,18 +752,21 @@ Block ActionsDAG::updateHeader(Block header) const
|
||||
throw;
|
||||
}
|
||||
|
||||
if (isInputProjected())
|
||||
header.clear();
|
||||
else
|
||||
header.erase(pos_to_remove);
|
||||
|
||||
Block res;
|
||||
|
||||
res.reserve(result_columns.size());
|
||||
for (auto & col : result_columns)
|
||||
res.insert(std::move(col));
|
||||
|
||||
for (auto && item : header)
|
||||
res.insert(std::move(item));
|
||||
if (isInputProjected())
|
||||
return res;
|
||||
|
||||
res.reserve(header.columns() - pos_to_remove.size());
|
||||
for (size_t i = 0; i < header.columns(); i++)
|
||||
{
|
||||
if (!pos_to_remove.contains(i))
|
||||
res.insert(header.data[i]);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -272,7 +272,7 @@ public:
|
||||
///
|
||||
/// In addition, check that result constants are constants according to DAG.
|
||||
/// In case if function return constant, but arguments are not constant, materialize it.
|
||||
Block updateHeader(Block header) const;
|
||||
Block updateHeader(const Block & header) const;
|
||||
|
||||
using IntermediateExecutionResult = std::unordered_map<const Node *, ColumnWithTypeAndName>;
|
||||
static ColumnsWithTypeAndName evaluatePartialResult(
|
||||
|
@ -667,11 +667,7 @@ namespace
|
||||
using TimePoint = std::chrono::time_point<std::chrono::system_clock>;
|
||||
|
||||
void appendElementsToLogSafe(
|
||||
AsynchronousInsertLog & log,
|
||||
std::vector<AsynchronousInsertLogElement> elements,
|
||||
TimePoint flush_time,
|
||||
const String & flush_query_id,
|
||||
const String & flush_exception)
|
||||
AsynchronousInsertLog & log, std::vector<AsynchronousInsertLogElement> elements, TimePoint flush_time, const String & flush_exception)
|
||||
try
|
||||
{
|
||||
using Status = AsynchronousInsertLogElement::Status;
|
||||
@ -680,7 +676,6 @@ try
|
||||
{
|
||||
elem.flush_time = timeInSeconds(flush_time);
|
||||
elem.flush_time_microseconds = timeInMicroseconds(flush_time);
|
||||
elem.flush_query_id = flush_query_id;
|
||||
elem.exception = flush_exception;
|
||||
elem.status = flush_exception.empty() ? Status::Ok : Status::FlushError;
|
||||
log.add(std::move(elem));
|
||||
@ -808,7 +803,7 @@ try
|
||||
throw;
|
||||
}
|
||||
|
||||
auto add_entry_to_log = [&](const auto & entry,
|
||||
auto add_entry_to_asynchronous_insert_log = [&](const auto & entry,
|
||||
const auto & entry_query_for_logging,
|
||||
const auto & exception,
|
||||
size_t num_rows,
|
||||
@ -831,6 +826,7 @@ try
|
||||
elem.exception = exception;
|
||||
elem.data_kind = entry->chunk.getDataKind();
|
||||
elem.timeout_milliseconds = timeout_ms.count();
|
||||
elem.flush_query_id = insert_query_id;
|
||||
|
||||
/// If there was a parsing error,
|
||||
/// the entry won't be flushed anyway,
|
||||
@ -857,7 +853,7 @@ try
|
||||
if (!log_elements.empty())
|
||||
{
|
||||
auto flush_time = std::chrono::system_clock::now();
|
||||
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, insert_query_id, "");
|
||||
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, "");
|
||||
}
|
||||
};
|
||||
|
||||
@ -865,15 +861,27 @@ try
|
||||
auto header = pipeline.getHeader();
|
||||
|
||||
if (key.data_kind == DataKind::Parsed)
|
||||
chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_log);
|
||||
chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_asynchronous_insert_log);
|
||||
else
|
||||
chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_log);
|
||||
chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_asynchronous_insert_log);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows());
|
||||
|
||||
auto log_and_add_finish_to_query_log = [&](size_t num_rows, size_t num_bytes)
|
||||
{
|
||||
LOG_DEBUG(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str);
|
||||
queue_shard_flush_time_history.updateWithCurrentTime();
|
||||
|
||||
bool pulling_pipeline = false;
|
||||
logQueryFinish(
|
||||
query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal);
|
||||
};
|
||||
|
||||
|
||||
if (chunk.getNumRows() == 0)
|
||||
{
|
||||
finish_entries();
|
||||
log_and_add_finish_to_query_log(0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -888,12 +896,7 @@ try
|
||||
CompletedPipelineExecutor completed_executor(pipeline);
|
||||
completed_executor.execute();
|
||||
|
||||
LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str);
|
||||
|
||||
queue_shard_flush_time_history.updateWithCurrentTime();
|
||||
|
||||
bool pulling_pipeline = false;
|
||||
logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal);
|
||||
log_and_add_finish_to_query_log(num_rows, num_bytes);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -903,7 +906,7 @@ try
|
||||
{
|
||||
auto exception = getCurrentExceptionMessage(false);
|
||||
auto flush_time = std::chrono::system_clock::now();
|
||||
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, insert_query_id, exception);
|
||||
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, exception);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
@ -1607,6 +1607,21 @@ Tables Context::getExternalTables() const
|
||||
|
||||
|
||||
void Context::addExternalTable(const String & table_name, TemporaryTableHolder && temporary_table)
|
||||
{
|
||||
addExternalTable(table_name, std::make_shared<TemporaryTableHolder>(std::move(temporary_table)));
|
||||
}
|
||||
|
||||
void Context::updateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table)
|
||||
{
|
||||
updateExternalTable(table_name, std::make_shared<TemporaryTableHolder>(std::move(temporary_table)));
|
||||
}
|
||||
|
||||
void Context::addOrUpdateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table)
|
||||
{
|
||||
addOrUpdateExternalTable(table_name, std::make_shared<TemporaryTableHolder>(std::move(temporary_table)));
|
||||
}
|
||||
|
||||
void Context::addExternalTable(const String & table_name, std::shared_ptr<TemporaryTableHolder> temporary_table)
|
||||
{
|
||||
if (isGlobalContext())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables");
|
||||
@ -1614,34 +1629,32 @@ void Context::addExternalTable(const String & table_name, TemporaryTableHolder &
|
||||
std::lock_guard lock(mutex);
|
||||
if (external_tables_mapping.end() != external_tables_mapping.find(table_name))
|
||||
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} already exists", backQuoteIfNeed(table_name));
|
||||
external_tables_mapping.emplace(table_name, std::make_shared<TemporaryTableHolder>(std::move(temporary_table)));
|
||||
|
||||
external_tables_mapping.emplace(table_name, std::move(temporary_table));
|
||||
}
|
||||
|
||||
void Context::updateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table)
|
||||
void Context::updateExternalTable(const String & table_name, std::shared_ptr<TemporaryTableHolder> temporary_table)
|
||||
{
|
||||
if (isGlobalContext())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables");
|
||||
|
||||
auto temporary_table_ptr = std::make_shared<TemporaryTableHolder>(std::move(temporary_table));
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
auto it = external_tables_mapping.find(table_name);
|
||||
if (it == external_tables_mapping.end())
|
||||
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} does not exists", backQuoteIfNeed(table_name));
|
||||
it->second = std::move(temporary_table_ptr);
|
||||
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} does not exist", backQuoteIfNeed(table_name));
|
||||
|
||||
it->second = std::move(temporary_table);
|
||||
}
|
||||
|
||||
void Context::addOrUpdateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table)
|
||||
void Context::addOrUpdateExternalTable(const String & table_name, std::shared_ptr<TemporaryTableHolder> temporary_table)
|
||||
{
|
||||
if (isGlobalContext())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables");
|
||||
|
||||
auto temporary_table_ptr = std::make_shared<TemporaryTableHolder>(std::move(temporary_table));
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
auto [it, inserted] = external_tables_mapping.emplace(table_name, temporary_table_ptr);
|
||||
auto [it, inserted] = external_tables_mapping.emplace(table_name, temporary_table);
|
||||
if (!inserted)
|
||||
it->second = std::move(temporary_table_ptr);
|
||||
it->second = std::move(temporary_table);
|
||||
}
|
||||
|
||||
std::shared_ptr<TemporaryTableHolder> Context::findExternalTable(const String & table_name) const
|
||||
@ -4467,7 +4480,7 @@ void Context::setApplicationType(ApplicationType type)
|
||||
/// Lock isn't required, you should set it at start
|
||||
shared->application_type = type;
|
||||
|
||||
if (type == ApplicationType::LOCAL || type == ApplicationType::SERVER)
|
||||
if (type == ApplicationType::LOCAL || type == ApplicationType::SERVER || type == ApplicationType::DISKS)
|
||||
shared->server_settings.loadSettingsFromConfig(Poco::Util::Application::instance().config());
|
||||
|
||||
if (type == ApplicationType::SERVER)
|
||||
|
@ -685,6 +685,9 @@ public:
|
||||
void addExternalTable(const String & table_name, TemporaryTableHolder && temporary_table);
|
||||
void updateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table);
|
||||
void addOrUpdateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table);
|
||||
void addExternalTable(const String & table_name, std::shared_ptr<TemporaryTableHolder> temporary_table);
|
||||
void updateExternalTable(const String & table_name, std::shared_ptr<TemporaryTableHolder> temporary_table);
|
||||
void addOrUpdateExternalTable(const String & table_name, std::shared_ptr<TemporaryTableHolder> temporary_table);
|
||||
std::shared_ptr<TemporaryTableHolder> findExternalTable(const String & table_name) const;
|
||||
std::shared_ptr<TemporaryTableHolder> removeExternalTable(const String & table_name);
|
||||
|
||||
|
@ -676,7 +676,8 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
|
||||
if (task.execution_status.code != 0)
|
||||
{
|
||||
bool status_written_by_table_or_db = task.ops.empty();
|
||||
if (status_written_by_table_or_db)
|
||||
bool is_replicated_database_task = dynamic_cast<DatabaseReplicatedTask *>(&task);
|
||||
if (status_written_by_table_or_db || is_replicated_database_task)
|
||||
{
|
||||
throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.message);
|
||||
}
|
||||
@ -710,6 +711,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
|
||||
task.createSyncedNodeIfNeed(zookeeper);
|
||||
updateMaxDDLEntryID(task.entry_name);
|
||||
task.completely_processed = true;
|
||||
subsequent_errors_count = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -791,6 +793,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
|
||||
bool executed_by_us = false;
|
||||
bool executed_by_other_leader = false;
|
||||
|
||||
bool extra_attempt_for_replicated_database = false;
|
||||
|
||||
/// Defensive programming. One hour is more than enough to execute almost all DDL queries.
|
||||
/// If it will be very long query like ALTER DELETE for a huge table it's still will be executed,
|
||||
/// but DDL worker can continue processing other queries.
|
||||
@ -835,7 +839,14 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
|
||||
/// Checking and incrementing counter exclusively.
|
||||
size_t counter = parse<int>(zookeeper->get(tries_to_execute_path));
|
||||
if (counter > MAX_TRIES_TO_EXECUTE)
|
||||
{
|
||||
/// Replicated databases have their own retries, limiting retries here would break outer retries
|
||||
bool is_replicated_database_task = dynamic_cast<DatabaseReplicatedTask *>(&task);
|
||||
if (is_replicated_database_task)
|
||||
extra_attempt_for_replicated_database = true;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
zookeeper->set(tries_to_execute_path, toString(counter + 1));
|
||||
|
||||
@ -849,6 +860,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
|
||||
executed_by_us = true;
|
||||
break;
|
||||
}
|
||||
else if (extra_attempt_for_replicated_database)
|
||||
break;
|
||||
}
|
||||
|
||||
/// Waiting for someone who will execute query and change is_executed_path node
|
||||
@ -892,6 +905,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
|
||||
else /// If we exceeded amount of tries
|
||||
{
|
||||
LOG_WARNING(log, "Task {} was not executed by anyone, maximum number of retries exceeded", task.entry_name);
|
||||
bool keep_original_error = extra_attempt_for_replicated_database && task.execution_status.code;
|
||||
if (!keep_original_error)
|
||||
task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, maximum retries exceeded");
|
||||
}
|
||||
return false;
|
||||
@ -1144,12 +1159,14 @@ void DDLWorker::runMainThread()
|
||||
|
||||
cleanup_event->set();
|
||||
scheduleTasks(reinitialized);
|
||||
subsequent_errors_count = 0;
|
||||
|
||||
LOG_DEBUG(log, "Waiting for queue updates");
|
||||
queue_updated_event->wait();
|
||||
}
|
||||
catch (const Coordination::Exception & e)
|
||||
{
|
||||
subsequent_errors_count = 0;
|
||||
if (Coordination::isHardwareError(e.code))
|
||||
{
|
||||
initialized = false;
|
||||
@ -1167,9 +1184,32 @@ void DDLWorker::runMainThread()
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, "Unexpected error, will try to restart main thread");
|
||||
reset_state();
|
||||
String message = getCurrentExceptionMessage(/*with_stacktrace*/ true);
|
||||
if (subsequent_errors_count)
|
||||
{
|
||||
if (last_unexpected_error == message)
|
||||
{
|
||||
++subsequent_errors_count;
|
||||
}
|
||||
else
|
||||
{
|
||||
subsequent_errors_count = 1;
|
||||
last_unexpected_error = message;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
subsequent_errors_count = 1;
|
||||
last_unexpected_error = message;
|
||||
}
|
||||
|
||||
LOG_ERROR(log, "Unexpected error ({} times in a row), will try to restart main thread: {}", subsequent_errors_count, message);
|
||||
|
||||
/// Sleep before retrying
|
||||
sleepForSeconds(5);
|
||||
/// Reset state after sleeping, so DatabaseReplicated::canExecuteReplicatedMetadataAlter()
|
||||
/// will have a chance even when the database got stuck in infinite retries
|
||||
reset_state();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -194,6 +194,9 @@ protected:
|
||||
|
||||
ConcurrentSet entries_to_skip;
|
||||
|
||||
std::atomic_uint64_t subsequent_errors_count = 0;
|
||||
String last_unexpected_error;
|
||||
|
||||
const CurrentMetrics::Metric * max_entry_metric;
|
||||
const CurrentMetrics::Metric * max_pushed_entry_metric;
|
||||
};
|
||||
|
@ -2487,10 +2487,15 @@ HashJoin::~HashJoin()
|
||||
{
|
||||
if (!data)
|
||||
{
|
||||
LOG_TRACE(log, "{}Join data has been already released", instance_log_id);
|
||||
LOG_TEST(log, "{}Join data has been already released", instance_log_id);
|
||||
return;
|
||||
}
|
||||
LOG_TRACE(log, "{}Join data is being destroyed, {} bytes and {} rows in hash table", instance_log_id, getTotalByteCount(), getTotalRowCount());
|
||||
LOG_TEST(
|
||||
log,
|
||||
"{}Join data is being destroyed, {} bytes and {} rows in hash table",
|
||||
instance_log_id,
|
||||
getTotalByteCount(),
|
||||
getTotalRowCount());
|
||||
}
|
||||
|
||||
template <typename Mapped>
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <Common/ClickHouseRevision.h>
|
||||
#include <Common/SymbolIndex.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -53,6 +54,18 @@ ColumnsDescription TraceLogElement::getColumnsDescription()
|
||||
};
|
||||
}
|
||||
|
||||
NamesAndAliases TraceLogElement::getNamesAndAliases()
|
||||
{
|
||||
String build_id_hex;
|
||||
#if defined(__ELF__) && !defined(OS_FREEBSD)
|
||||
build_id_hex = SymbolIndex::instance().getBuildIDHex();
|
||||
#endif
|
||||
return
|
||||
{
|
||||
{"build_id", std::make_shared<DataTypeString>(), "\'" + build_id_hex + "\'"},
|
||||
};
|
||||
}
|
||||
|
||||
void TraceLogElement::appendToBlock(MutableColumns & columns) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
@ -39,7 +39,7 @@ struct TraceLogElement
|
||||
|
||||
static std::string name() { return "TraceLog"; }
|
||||
static ColumnsDescription getColumnsDescription();
|
||||
static NamesAndAliases getNamesAndAliases() { return {}; }
|
||||
static NamesAndAliases getNamesAndAliases();
|
||||
void appendToBlock(MutableColumns & columns) const;
|
||||
};
|
||||
|
||||
|
@ -808,10 +808,12 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
|
||||
bool is_create_parameterized_view = false;
|
||||
if (const auto * create_query = ast->as<ASTCreateQuery>())
|
||||
{
|
||||
is_create_parameterized_view = create_query->isParameterizedView();
|
||||
}
|
||||
else if (const auto * explain_query = ast->as<ASTExplainQuery>())
|
||||
{
|
||||
assert(!explain_query->children.empty());
|
||||
if (!explain_query->children.empty())
|
||||
if (const auto * create_of_explain_query = explain_query->children[0]->as<ASTCreateQuery>())
|
||||
is_create_parameterized_view = create_of_explain_query->isParameterizedView();
|
||||
}
|
||||
|
@ -1229,8 +1229,9 @@ void Planner::buildQueryPlanIfNeeded()
|
||||
if (query_plan.isInitialized())
|
||||
return;
|
||||
|
||||
LOG_TRACE(getLogger("Planner"), "Query {} to stage {}{}",
|
||||
query_tree->formatConvertedASTForErrorMessage(),
|
||||
LOG_TRACE(
|
||||
getLogger("Planner"),
|
||||
"Query to stage {}{}",
|
||||
QueryProcessingStage::toString(select_query_options.to_stage),
|
||||
select_query_options.only_analyze ? " only analyze" : "");
|
||||
|
||||
@ -1506,8 +1507,9 @@ void Planner::buildPlanForQueryNode()
|
||||
auto & mapping = join_tree_query_plan.query_node_to_plan_step_mapping;
|
||||
query_node_to_plan_step_mapping.insert(mapping.begin(), mapping.end());
|
||||
|
||||
LOG_TRACE(getLogger("Planner"), "Query {} from stage {} to stage {}{}",
|
||||
query_tree->formatConvertedASTForErrorMessage(),
|
||||
LOG_TRACE(
|
||||
getLogger("Planner"),
|
||||
"Query from stage {} to stage {}{}",
|
||||
QueryProcessingStage::toString(from_stage),
|
||||
QueryProcessingStage::toString(select_query_options.to_stage),
|
||||
select_query_options.only_analyze ? " only analyze" : "");
|
||||
|
@ -137,7 +137,7 @@ public:
|
||||
if (it == column_name_to_column.end())
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Column for column name {} does not exists. There are only column names: {}",
|
||||
"Column for column name {} does not exist. There are only column names: {}",
|
||||
column_name,
|
||||
fmt::join(column_names.begin(), column_names.end(), ", "));
|
||||
}
|
||||
@ -154,7 +154,7 @@ public:
|
||||
if (it == column_name_to_column_identifier.end())
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Column identifier for column name {} does not exists. There are only column names: {}",
|
||||
"Column identifier for column name {} does not exist. There are only column names: {}",
|
||||
column_name,
|
||||
fmt::join(column_names.begin(), column_names.end(), ", "));
|
||||
}
|
||||
|
@ -1,24 +1,25 @@
|
||||
#include <Planner/findQueryForParallelReplicas.h>
|
||||
#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
|
||||
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
||||
#include <Processors/QueryPlan/JoinStep.h>
|
||||
#include <Processors/QueryPlan/CreatingSetsStep.h>
|
||||
#include <Storages/buildQueryTreeForShard.h>
|
||||
#include <Interpreters/ClusterProxy/executeQuery.h>
|
||||
#include <Planner/PlannerJoinTree.h>
|
||||
#include <Planner/Utils.h>
|
||||
#include <Analyzer/ArrayJoinNode.h>
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/JoinNode.h>
|
||||
#include <Analyzer/QueryNode.h>
|
||||
#include <Analyzer/TableNode.h>
|
||||
#include <Analyzer/UnionNode.h>
|
||||
#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
|
||||
#include <Interpreters/ClusterProxy/executeQuery.h>
|
||||
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Planner/PlannerJoinTree.h>
|
||||
#include <Planner/Utils.h>
|
||||
#include <Planner/findQueryForParallelReplicas.h>
|
||||
#include <Processors/QueryPlan/CreatingSetsStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Processors/QueryPlan/JoinStep.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/StorageDummy.h>
|
||||
#include <Storages/StorageMaterializedView.h>
|
||||
#include <Storages/buildQueryTreeForShard.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -316,7 +317,8 @@ static const TableNode * findTableForParallelReplicas(const IQueryTreeNode * que
|
||||
case QueryTreeNodeType::TABLE:
|
||||
{
|
||||
const auto & table_node = query_tree_node->as<TableNode &>();
|
||||
const auto & storage = table_node.getStorage();
|
||||
const auto * as_mat_view = typeid_cast<const StorageMaterializedView *>(table_node.getStorage().get());
|
||||
const auto & storage = as_mat_view ? as_mat_view->getTargetTable() : table_node.getStorage();
|
||||
if (std::dynamic_pointer_cast<MergeTreeData>(storage) || typeid_cast<const StorageDummy *>(storage.get()))
|
||||
return &table_node;
|
||||
|
||||
|
@ -262,10 +262,6 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan::
|
||||
{
|
||||
const auto & left_table_key_name = join_clause.key_names_left[i];
|
||||
const auto & right_table_key_name = join_clause.key_names_right[i];
|
||||
|
||||
if (!join_header.has(left_table_key_name) || !join_header.has(right_table_key_name))
|
||||
continue;
|
||||
|
||||
const auto & left_table_column = left_stream_input_header.getByName(left_table_key_name);
|
||||
const auto & right_table_column = right_stream_input_header.getByName(right_table_key_name);
|
||||
|
||||
@ -338,9 +334,9 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan::
|
||||
auto join_filter_push_down_actions = filter->getExpression()->splitActionsForJOINFilterPushDown(filter->getFilterColumnName(),
|
||||
filter->removesFilterColumn(),
|
||||
left_stream_available_columns_to_push_down,
|
||||
left_stream_input_header.getColumnsWithTypeAndName(),
|
||||
left_stream_input_header,
|
||||
right_stream_available_columns_to_push_down,
|
||||
right_stream_input_header.getColumnsWithTypeAndName(),
|
||||
right_stream_input_header,
|
||||
equivalent_columns_to_push_down,
|
||||
equivalent_left_stream_column_to_right_stream_column,
|
||||
equivalent_right_stream_column_to_left_stream_column);
|
||||
|
@ -128,15 +128,21 @@ class IndexAccess
|
||||
public:
|
||||
explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_)
|
||||
{
|
||||
/// Some suffix of index columns might not be loaded (see `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns`)
|
||||
/// and we need to use the same set of index columns across all parts.
|
||||
/// Indices might be reloaded during the process and the reload might produce a different value
|
||||
/// (change in `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns`). Also, some suffix of index
|
||||
/// columns might not be loaded (same setting) so we keep a reference to the current indices and
|
||||
/// track the minimal subset of loaded columns across all parts.
|
||||
indices.reserve(parts.size());
|
||||
for (const auto & part : parts)
|
||||
loaded_columns = std::min(loaded_columns, part.data_part->getIndex()->size());
|
||||
indices.push_back(part.data_part->getIndex());
|
||||
|
||||
for (const auto & index : indices)
|
||||
loaded_columns = std::min(loaded_columns, index->size());
|
||||
}
|
||||
|
||||
Values getValue(size_t part_idx, size_t mark) const
|
||||
{
|
||||
const auto & index = parts[part_idx].data_part->getIndex();
|
||||
const auto & index = indices[part_idx];
|
||||
chassert(index->size() >= loaded_columns);
|
||||
Values values(loaded_columns);
|
||||
for (size_t i = 0; i < loaded_columns; ++i)
|
||||
@ -206,6 +212,7 @@ public:
|
||||
}
|
||||
private:
|
||||
const RangesInDataParts & parts;
|
||||
std::vector<IMergeTreeDataPart::Index> indices;
|
||||
size_t loaded_columns = std::numeric_limits<size_t>::max();
|
||||
};
|
||||
|
||||
|
@ -21,7 +21,7 @@ Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfo
|
||||
{
|
||||
if (prewhere_info->row_level_filter)
|
||||
{
|
||||
block = prewhere_info->row_level_filter->updateHeader(std::move(block));
|
||||
block = prewhere_info->row_level_filter->updateHeader(block);
|
||||
auto & row_level_column = block.getByName(prewhere_info->row_level_column_name);
|
||||
if (!row_level_column.type->canBeUsedInBooleanContext())
|
||||
{
|
||||
@ -36,7 +36,7 @@ Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfo
|
||||
|
||||
if (prewhere_info->prewhere_actions)
|
||||
{
|
||||
block = prewhere_info->prewhere_actions->updateHeader(std::move(block));
|
||||
block = prewhere_info->prewhere_actions->updateHeader(block);
|
||||
|
||||
auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name);
|
||||
if (!prewhere_column.type->canBeUsedInBooleanContext())
|
||||
|
@ -102,6 +102,7 @@ public:
|
||||
"Recursive CTE subquery {}. Expected projection columns to have same size in recursive and non recursive subquery.",
|
||||
recursive_cte_union_node->formatASTForErrorMessage());
|
||||
|
||||
working_temporary_table_holder = recursive_cte_table->holder;
|
||||
working_temporary_table_storage = recursive_cte_table->storage;
|
||||
|
||||
intermediate_temporary_table_holder = std::make_shared<TemporaryTableHolder>(
|
||||
@ -147,6 +148,7 @@ public:
|
||||
|
||||
truncateTemporaryTable(working_temporary_table_storage);
|
||||
|
||||
std::swap(intermediate_temporary_table_holder, working_temporary_table_holder);
|
||||
std::swap(intermediate_temporary_table_storage, working_temporary_table_storage);
|
||||
}
|
||||
|
||||
@ -172,6 +174,9 @@ private:
|
||||
SelectQueryOptions select_query_options;
|
||||
select_query_options.merge_tree_enable_remove_parts_from_snapshot_optimization = false;
|
||||
|
||||
const auto & recursive_table_name = recursive_cte_union_node->as<UnionNode &>().getCTEName();
|
||||
recursive_query_context->addOrUpdateExternalTable(recursive_table_name, working_temporary_table_holder);
|
||||
|
||||
auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(query_to_execute, recursive_query_context, select_query_options);
|
||||
auto pipeline_builder = interpreter->buildQueryPipeline();
|
||||
|
||||
@ -225,6 +230,7 @@ private:
|
||||
QueryTreeNodePtr recursive_query;
|
||||
ContextMutablePtr recursive_query_context;
|
||||
|
||||
TemporaryTableHolderPtr working_temporary_table_holder;
|
||||
StoragePtr working_temporary_table_storage;
|
||||
|
||||
TemporaryTableHolderPtr intermediate_temporary_table_holder;
|
||||
|
@ -3,9 +3,9 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
Block ExpressionTransform::transformHeader(Block header, const ActionsDAG & expression)
|
||||
Block ExpressionTransform::transformHeader(const Block & header, const ActionsDAG & expression)
|
||||
{
|
||||
return expression.updateHeader(std::move(header));
|
||||
return expression.updateHeader(header);
|
||||
}
|
||||
|
||||
|
||||
|
@ -24,7 +24,7 @@ public:
|
||||
|
||||
String getName() const override { return "ExpressionTransform"; }
|
||||
|
||||
static Block transformHeader(Block header, const ActionsDAG & expression);
|
||||
static Block transformHeader(const Block & header, const ActionsDAG & expression);
|
||||
|
||||
protected:
|
||||
void transform(Chunk & chunk) override;
|
||||
|
@ -174,26 +174,22 @@ static std::unique_ptr<IFilterDescription> combineFilterAndIndices(
|
||||
}
|
||||
|
||||
Block FilterTransform::transformHeader(
|
||||
Block header,
|
||||
const ActionsDAG * expression,
|
||||
const String & filter_column_name,
|
||||
bool remove_filter_column)
|
||||
const Block & header, const ActionsDAG * expression, const String & filter_column_name, bool remove_filter_column)
|
||||
{
|
||||
if (expression)
|
||||
header = expression->updateHeader(std::move(header));
|
||||
Block result = expression ? expression->updateHeader(header) : header;
|
||||
|
||||
auto filter_type = header.getByName(filter_column_name).type;
|
||||
auto filter_type = result.getByName(filter_column_name).type;
|
||||
if (!filter_type->onlyNull() && !isUInt8(removeNullable(removeLowCardinality(filter_type))))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER,
|
||||
"Illegal type {} of column {} for filter. Must be UInt8 or Nullable(UInt8).",
|
||||
filter_type->getName(), filter_column_name);
|
||||
|
||||
if (remove_filter_column)
|
||||
header.erase(filter_column_name);
|
||||
result.erase(filter_column_name);
|
||||
else
|
||||
replaceFilterToConstant(header, filter_column_name);
|
||||
replaceFilterToConstant(result, filter_column_name);
|
||||
|
||||
return header;
|
||||
return result;
|
||||
}
|
||||
|
||||
FilterTransform::FilterTransform(
|
||||
|
@ -22,11 +22,8 @@ public:
|
||||
const Block & header_, ExpressionActionsPtr expression_, String filter_column_name_,
|
||||
bool remove_filter_column_, bool on_totals_ = false, std::shared_ptr<std::atomic<size_t>> rows_filtered_ = nullptr);
|
||||
|
||||
static Block transformHeader(
|
||||
Block header,
|
||||
const ActionsDAG * expression,
|
||||
const String & filter_column_name,
|
||||
bool remove_filter_column);
|
||||
static Block
|
||||
transformHeader(const Block & header, const ActionsDAG * expression, const String & filter_column_name, bool remove_filter_column);
|
||||
|
||||
String getName() const override { return "FilterTransform"; }
|
||||
|
||||
|
@ -14,12 +14,12 @@ namespace ErrorCodes
|
||||
|
||||
Block JoiningTransform::transformHeader(Block header, const JoinPtr & join)
|
||||
{
|
||||
LOG_DEBUG(getLogger("JoiningTransform"), "Before join block: '{}'", header.dumpStructure());
|
||||
LOG_TEST(getLogger("JoiningTransform"), "Before join block: '{}'", header.dumpStructure());
|
||||
join->checkTypesOfKeys(header);
|
||||
join->initialize(header);
|
||||
ExtraBlockPtr tmp;
|
||||
join->joinBlock(header, tmp);
|
||||
LOG_DEBUG(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure());
|
||||
LOG_TEST(getLogger("JoiningTransform"), "After join block: '{}'", header.dumpStructure());
|
||||
return header;
|
||||
}
|
||||
|
||||
|
@ -338,8 +338,6 @@ static void prepareChunk(Chunk & chunk)
|
||||
|
||||
void MergeJoinAlgorithm::initialize(Inputs inputs)
|
||||
{
|
||||
LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: {} - '{}'", __FILE__, __LINE__, 0, inputs[0].chunk.dumpStructure());
|
||||
LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: {} - '{}'", __FILE__, __LINE__, 1, inputs[1].chunk.dumpStructure());
|
||||
if (inputs.size() != 2)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Two inputs are required, got {}", inputs.size());
|
||||
|
||||
@ -351,8 +349,6 @@ void MergeJoinAlgorithm::initialize(Inputs inputs)
|
||||
|
||||
void MergeJoinAlgorithm::consume(Input & input, size_t source_num)
|
||||
{
|
||||
LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: {} - '{}'", __FILE__, __LINE__, source_num, input.chunk.dumpStructure());
|
||||
|
||||
if (input.skip_last_row)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "skip_last_row is not supported");
|
||||
|
||||
@ -816,15 +812,9 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
|
||||
if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted())
|
||||
return Status(1);
|
||||
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: sampleColumns {} '{}'", __FILE__, __LINE__, i, cursors[i]->sampleBlock().dumpStructure());
|
||||
}
|
||||
|
||||
|
||||
if (auto result = handleAllJoinState())
|
||||
{
|
||||
LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{}: '{}'", __FILE__, __LINE__, result ? result->chunk.dumpStructure() : "NA");
|
||||
return std::move(*result);
|
||||
}
|
||||
|
||||
|
@ -49,7 +49,7 @@ Block TotalsHavingTransform::transformHeader(
|
||||
|
||||
if (expression)
|
||||
{
|
||||
block = expression->updateHeader(std::move(block));
|
||||
block = expression->updateHeader(block);
|
||||
if (remove_filter)
|
||||
block.erase(filter_column_name);
|
||||
}
|
||||
|
@ -1107,7 +1107,7 @@ void TCPHandler::processTablesStatusRequest()
|
||||
ContextPtr context_to_resolve_table_names;
|
||||
if (is_interserver_mode)
|
||||
{
|
||||
/// In interserver mode session context does not exists, because authentication is done for each query.
|
||||
/// In the interserver mode session context does not exist, because authentication is done for each query.
|
||||
/// We also cannot create query context earlier, because it cannot be created before authentication,
|
||||
/// but query is not received yet. So we have to do this trick.
|
||||
ContextMutablePtr fake_interserver_context = Context::createCopy(server.context());
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
#include <parquet/arrow/reader.h>
|
||||
#include <ranges>
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
@ -54,7 +54,7 @@ public:
|
||||
{
|
||||
std::lock_guard lock(configuration_update_mutex);
|
||||
updateConfigurationImpl(local_context);
|
||||
return Storage::getConfiguration();
|
||||
return Storage::getConfigurationCopy();
|
||||
}
|
||||
|
||||
void updateConfiguration(const ContextPtr & local_context) override
|
||||
@ -106,7 +106,7 @@ private:
|
||||
const bool updated = base_configuration.update(local_context);
|
||||
auto new_keys = getDataFiles(base_configuration, local_context);
|
||||
|
||||
if (!updated && new_keys == Storage::getConfiguration().keys)
|
||||
if (!updated && new_keys == Storage::getConfigurationCopy().keys)
|
||||
return;
|
||||
|
||||
Storage::useConfiguration(getConfigurationForDataRead(base_configuration, local_context, new_keys));
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <Poco/JSON/Object.h>
|
||||
#include <Poco/JSON/Parser.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -63,7 +63,7 @@ public:
|
||||
{
|
||||
std::lock_guard lock(configuration_update_mutex);
|
||||
updateConfigurationImpl(local_context);
|
||||
return StorageS3::getConfiguration();
|
||||
return StorageS3::getConfigurationCopy();
|
||||
}
|
||||
|
||||
void updateConfiguration(const ContextPtr & local_context) override
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <aws/s3/S3Client.h>
|
||||
#include <aws/s3/model/ListObjectsV2Request.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -173,7 +173,7 @@ bool DistributedAsyncInsertBatch::valid()
|
||||
{
|
||||
if (!fs::exists(file))
|
||||
{
|
||||
LOG_WARNING(parent.log, "File {} does not exists, likely due abnormal shutdown", file);
|
||||
LOG_WARNING(parent.log, "File {} does not exist, likely due abnormal shutdown", file);
|
||||
res = false;
|
||||
}
|
||||
}
|
||||
|
@ -554,7 +554,7 @@ void DistributedAsyncInsertDirectoryQueue::processFilesWithBatching(const Settin
|
||||
{
|
||||
if (!fs::exists(file_path))
|
||||
{
|
||||
LOG_WARNING(log, "File {} does not exists, likely due to current_batch.txt processing", file_path);
|
||||
LOG_WARNING(log, "File {} does not exist, likely due to current_batch.txt processing", file_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -580,7 +580,7 @@ void DataPartStorageOnDiskBase::rename(
|
||||
disk.setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr)));
|
||||
disk.moveDirectory(from, to);
|
||||
|
||||
/// Only after moveDirectory() since before the directory does not exists.
|
||||
/// Only after moveDirectory() since before the directory does not exist.
|
||||
SyncGuardPtr to_sync_guard;
|
||||
if (fsync_part_dir)
|
||||
to_sync_guard = volume->getDisk()->getDirectorySyncGuard(to);
|
||||
|
@ -346,16 +346,25 @@ IMergeTreeDataPart::Index IMergeTreeDataPart::getIndex() const
|
||||
if (!index_loaded)
|
||||
loadIndex();
|
||||
index_loaded = true;
|
||||
return TSA_SUPPRESS_WARNING_FOR_READ(index); /// The variable is guaranteed to be unchanged after return.
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
void IMergeTreeDataPart::setIndex(Index index_)
|
||||
void IMergeTreeDataPart::setIndex(const Columns & cols_)
|
||||
{
|
||||
std::scoped_lock lock(index_mutex);
|
||||
if (!index->empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The index of data part can be set only once");
|
||||
index = index_;
|
||||
index = std::make_shared<const Columns>(cols_);
|
||||
index_loaded = true;
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::setIndex(Columns && cols_)
|
||||
{
|
||||
std::scoped_lock lock(index_mutex);
|
||||
if (!index->empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The index of data part can be set only once");
|
||||
index = std::make_shared<const Columns>(std::move(cols_));
|
||||
index_loaded = true;
|
||||
}
|
||||
|
||||
@ -913,7 +922,7 @@ void IMergeTreeDataPart::loadIndex() const
|
||||
if (!index_file->eof())
|
||||
throw Exception(ErrorCodes::EXPECTED_END_OF_FILE, "Index file {} is unexpectedly long", index_path);
|
||||
|
||||
index->assign(std::make_move_iterator(loaded_index.begin()), std::make_move_iterator(loaded_index.end()));
|
||||
index = std::make_shared<Columns>(std::make_move_iterator(loaded_index.begin()), std::make_move_iterator(loaded_index.end()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1260,6 +1269,33 @@ void IMergeTreeDataPart::appendFilesOfChecksums(Strings & files)
|
||||
files.push_back("checksums.txt");
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::loadRowsCountFileForUnexpectedPart()
|
||||
{
|
||||
auto read_rows_count = [&]()
|
||||
{
|
||||
auto buf = metadata_manager->read("count.txt");
|
||||
readIntText(rows_count, *buf);
|
||||
assertEOF(*buf);
|
||||
};
|
||||
if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::Compact || parent_part)
|
||||
{
|
||||
if (metadata_manager->exists("count.txt"))
|
||||
{
|
||||
read_rows_count();
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (getDataPartStorage().exists("count.txt"))
|
||||
{
|
||||
read_rows_count();
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No count.txt in part {}", name);
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::loadRowsCount()
|
||||
{
|
||||
auto read_rows_count = [&]()
|
||||
|
@ -79,7 +79,7 @@ public:
|
||||
using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>;
|
||||
using NameToNumber = std::unordered_map<std::string, size_t>;
|
||||
|
||||
using Index = std::shared_ptr<Columns>;
|
||||
using Index = std::shared_ptr<const Columns>;
|
||||
using IndexSizeByName = std::unordered_map<std::string, ColumnSize>;
|
||||
|
||||
using Type = MergeTreeDataPartType;
|
||||
@ -183,6 +183,8 @@ public:
|
||||
void loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency);
|
||||
void appendFilesOfColumnsChecksumsIndexes(Strings & files, bool include_projection = false) const;
|
||||
|
||||
void loadRowsCountFileForUnexpectedPart();
|
||||
|
||||
String getMarksFileExtension() const { return index_granularity_info.mark_type.getFileExtension(); }
|
||||
|
||||
/// Generate the new name for this part according to `new_part_info` and min/max dates from the old name.
|
||||
@ -368,7 +370,8 @@ public:
|
||||
int32_t metadata_version;
|
||||
|
||||
Index getIndex() const;
|
||||
void setIndex(Index index_);
|
||||
void setIndex(const Columns & cols_);
|
||||
void setIndex(Columns && cols_);
|
||||
void unloadIndex();
|
||||
|
||||
/// For data in RAM ('index')
|
||||
|
@ -100,7 +100,7 @@ protected:
|
||||
/// Position and level (of nesting).
|
||||
using ColumnNameLevel = std::optional<std::pair<String, size_t>>;
|
||||
|
||||
/// In case of part of the nested column does not exists, offsets should be
|
||||
/// In case of part of the nested column does not exist, offsets should be
|
||||
/// read, but only the offsets for the current column, that is why it
|
||||
/// returns pair of size_t, not just one.
|
||||
ColumnNameLevel findColumnForOffsets(const NameAndTypePair & column) const;
|
||||
|
@ -1312,6 +1312,46 @@ static constexpr size_t loading_parts_initial_backoff_ms = 100;
|
||||
static constexpr size_t loading_parts_max_backoff_ms = 5000;
|
||||
static constexpr size_t loading_parts_max_tries = 3;
|
||||
|
||||
void MergeTreeData::loadUnexpectedDataPart(UnexpectedPartLoadState & state)
|
||||
{
|
||||
const MergeTreePartInfo & part_info = state.loading_info->info;
|
||||
const String & part_name = state.loading_info->name;
|
||||
const DiskPtr & part_disk_ptr = state.loading_info->disk;
|
||||
LOG_TRACE(log, "Loading unexpected part {} from disk {}", part_name, part_disk_ptr->getName());
|
||||
|
||||
LoadPartResult res;
|
||||
auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + part_name, part_disk_ptr, 0);
|
||||
auto data_part_storage = std::make_shared<DataPartStorageOnDiskFull>(single_disk_volume, relative_data_path, part_name);
|
||||
String part_path = fs::path(relative_data_path) / part_name;
|
||||
|
||||
try
|
||||
{
|
||||
state.part = getDataPartBuilder(part_name, single_disk_volume, part_name)
|
||||
.withPartInfo(part_info)
|
||||
.withPartFormatFromDisk()
|
||||
.build();
|
||||
|
||||
state.part->loadRowsCountFileForUnexpectedPart();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
LOG_DEBUG(log, "Failed to load unexcepted data part {} with exception: {}", part_name, getExceptionMessage(std::current_exception(), false));
|
||||
if (!state.part)
|
||||
{
|
||||
/// Build a fake part and mark it as broken in case of filesystem error.
|
||||
/// If the error impacts part directory instead of single files,
|
||||
/// an exception will be thrown during detach and silently ignored.
|
||||
state.part = getDataPartBuilder(part_name, single_disk_volume, part_name)
|
||||
.withPartStorageType(MergeTreeDataPartStorageType::Full)
|
||||
.withPartType(MergeTreeDataPartType::Wide)
|
||||
.build();
|
||||
}
|
||||
|
||||
state.is_broken = true;
|
||||
tryLogCurrentException(log, fmt::format("while loading unexcepted part {} on path {}", part_name, part_path));
|
||||
}
|
||||
}
|
||||
|
||||
MergeTreeData::LoadPartResult MergeTreeData::loadDataPart(
|
||||
const MergeTreePartInfo & part_info,
|
||||
const String & part_name,
|
||||
@ -1704,6 +1744,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
|
||||
}
|
||||
|
||||
std::vector<PartLoadingTree::PartLoadingInfos> parts_to_load_by_disk(disks.size());
|
||||
std::vector<PartLoadingTree::PartLoadingInfos> unexpected_parts_to_load_by_disk(disks.size());
|
||||
|
||||
ThreadPoolCallbackRunnerLocal<void> runner(getActivePartsLoadingThreadPool().get(), "ActiveParts");
|
||||
|
||||
@ -1714,6 +1755,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
|
||||
continue;
|
||||
|
||||
auto & disk_parts = parts_to_load_by_disk[i];
|
||||
auto & unexpected_disk_parts = unexpected_parts_to_load_by_disk[i];
|
||||
|
||||
runner([&, disk_ptr]()
|
||||
{
|
||||
@ -1725,8 +1767,13 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
|
||||
continue;
|
||||
|
||||
if (auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version))
|
||||
{
|
||||
if (expected_parts && !expected_parts->contains(it->name()))
|
||||
unexpected_disk_parts.emplace_back(*part_info, it->name(), disk_ptr);
|
||||
else
|
||||
disk_parts.emplace_back(*part_info, it->name(), disk_ptr);
|
||||
}
|
||||
}
|
||||
}, Priority{0});
|
||||
}
|
||||
|
||||
@ -1736,6 +1783,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
|
||||
PartLoadingTree::PartLoadingInfos parts_to_load;
|
||||
for (auto & disk_parts : parts_to_load_by_disk)
|
||||
std::move(disk_parts.begin(), disk_parts.end(), std::back_inserter(parts_to_load));
|
||||
PartLoadingTree::PartLoadingInfos unexpected_parts_to_load;
|
||||
for (auto & disk_parts : unexpected_parts_to_load_by_disk)
|
||||
std::move(disk_parts.begin(), disk_parts.end(), std::back_inserter(unexpected_parts_to_load));
|
||||
|
||||
auto loading_tree = PartLoadingTree::build(std::move(parts_to_load));
|
||||
|
||||
@ -1811,7 +1861,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
|
||||
}
|
||||
}
|
||||
|
||||
if (num_parts == 0)
|
||||
if (num_parts == 0 && unexpected_parts_to_load.empty())
|
||||
{
|
||||
resetObjectColumnsFromActiveParts(part_lock);
|
||||
LOG_DEBUG(log, "There are no data parts");
|
||||
@ -1864,6 +1914,36 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
|
||||
calculateColumnAndSecondaryIndexSizesImpl();
|
||||
|
||||
PartLoadingTreeNodes unloaded_parts;
|
||||
|
||||
std::vector<UnexpectedPartLoadState> unexpected_unloaded_data_parts;
|
||||
for (const auto & [info, name, disk] : unexpected_parts_to_load)
|
||||
{
|
||||
bool uncovered = true;
|
||||
for (const auto & part : unexpected_parts_to_load)
|
||||
{
|
||||
if (name != part.name && part.info.contains(info))
|
||||
{
|
||||
uncovered = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
unexpected_unloaded_data_parts.push_back({std::make_shared<PartLoadingTree::Node>(info, name, disk), uncovered, /*is_broken*/ false, /*part*/ nullptr});
|
||||
}
|
||||
|
||||
if (!unexpected_unloaded_data_parts.empty())
|
||||
{
|
||||
LOG_DEBUG(log, "Found {} unexpected data parts. They will be loaded asynchronously", unexpected_unloaded_data_parts.size());
|
||||
{
|
||||
std::lock_guard lock(unexpected_data_parts_mutex);
|
||||
unexpected_data_parts = std::move(unexpected_unloaded_data_parts);
|
||||
unexpected_data_parts_loading_finished = false;
|
||||
}
|
||||
|
||||
unexpected_data_parts_loading_task = getContext()->getSchedulePool().createTask(
|
||||
"MergeTreeData::loadUnexpectedDataParts",
|
||||
[this] { loadUnexpectedDataParts(); });
|
||||
}
|
||||
|
||||
loading_tree.traverse(/*recursive=*/ true, [&](const auto & node)
|
||||
{
|
||||
if (!node->is_loaded)
|
||||
@ -1889,6 +1969,54 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
|
||||
data_parts_loading_finished = true;
|
||||
}
|
||||
|
||||
void MergeTreeData::loadUnexpectedDataParts()
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(unexpected_data_parts_mutex);
|
||||
if (unexpected_data_parts.empty())
|
||||
{
|
||||
unexpected_data_parts_loading_finished = true;
|
||||
unexpected_data_parts_cv.notify_all();
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Loading {} unexpected data parts",
|
||||
unexpected_data_parts.size());
|
||||
}
|
||||
|
||||
ThreadFuzzer::maybeInjectSleep();
|
||||
ThreadPoolCallbackRunnerLocal<void> runner(getUnexpectedPartsLoadingThreadPool().get(), "UnexpectedParts");
|
||||
|
||||
for (auto & load_state : unexpected_data_parts)
|
||||
{
|
||||
std::lock_guard lock(unexpected_data_parts_mutex);
|
||||
chassert(!load_state.part);
|
||||
if (unexpected_data_parts_loading_canceled)
|
||||
{
|
||||
runner.waitForAllToFinishAndRethrowFirstError();
|
||||
return;
|
||||
}
|
||||
runner([&]()
|
||||
{
|
||||
loadUnexpectedDataPart(load_state);
|
||||
|
||||
chassert(load_state.part);
|
||||
if (load_state.is_broken)
|
||||
{
|
||||
load_state.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
|
||||
}
|
||||
}, Priority{});
|
||||
}
|
||||
runner.waitForAllToFinishAndRethrowFirstError();
|
||||
LOG_DEBUG(log, "Loaded {} unexpected data parts", unexpected_data_parts.size());
|
||||
|
||||
{
|
||||
std::lock_guard lock(unexpected_data_parts_mutex);
|
||||
unexpected_data_parts_loading_finished = true;
|
||||
unexpected_data_parts_cv.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeData::loadOutdatedDataParts(bool is_async)
|
||||
try
|
||||
{
|
||||
@ -2024,17 +2152,55 @@ void MergeTreeData::waitForOutdatedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_
|
||||
LOG_TRACE(log, "Finished waiting for outdated data parts to be loaded");
|
||||
}
|
||||
|
||||
void MergeTreeData::startOutdatedDataPartsLoadingTask()
|
||||
void MergeTreeData::waitForUnexpectedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
/// Background tasks are not run if storage is static.
|
||||
if (isStaticStorage())
|
||||
return;
|
||||
|
||||
/// If waiting is not required, do NOT log and do NOT enable/disable turbo mode to make `waitForUnexpectedPartsToBeLoaded` a lightweight check
|
||||
{
|
||||
std::unique_lock lock(unexpected_data_parts_mutex);
|
||||
if (unexpected_data_parts_loading_canceled)
|
||||
throw Exception(ErrorCodes::NOT_INITIALIZED, "Loading of unexpected data parts was already canceled");
|
||||
if (unexpected_data_parts_loading_finished)
|
||||
return;
|
||||
}
|
||||
|
||||
/// We need to load parts as fast as possible
|
||||
getUnexpectedPartsLoadingThreadPool().enableTurboMode();
|
||||
SCOPE_EXIT({
|
||||
/// Let's lower the number of threads e.g. for later ATTACH queries to behave as usual
|
||||
getUnexpectedPartsLoadingThreadPool().disableTurboMode();
|
||||
});
|
||||
|
||||
LOG_TRACE(log, "Will wait for unexpected data parts to be loaded");
|
||||
|
||||
std::unique_lock lock(unexpected_data_parts_mutex);
|
||||
|
||||
unexpected_data_parts_cv.wait(lock, [this]() TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
return unexpected_data_parts_loading_finished || unexpected_data_parts_loading_canceled;
|
||||
});
|
||||
|
||||
if (unexpected_data_parts_loading_canceled)
|
||||
throw Exception(ErrorCodes::NOT_INITIALIZED, "Loading of unexpected data parts was canceled");
|
||||
|
||||
LOG_TRACE(log, "Finished waiting for unexpected data parts to be loaded");
|
||||
}
|
||||
|
||||
void MergeTreeData::startOutdatedAndUnexpectedDataPartsLoadingTask()
|
||||
{
|
||||
if (outdated_data_parts_loading_task)
|
||||
outdated_data_parts_loading_task->activateAndSchedule();
|
||||
if (unexpected_data_parts_loading_task)
|
||||
unexpected_data_parts_loading_task->activateAndSchedule();
|
||||
}
|
||||
|
||||
void MergeTreeData::stopOutdatedDataPartsLoadingTask()
|
||||
void MergeTreeData::stopOutdatedAndUnexpectedDataPartsLoadingTask()
|
||||
{
|
||||
if (outdated_data_parts_loading_task)
|
||||
{
|
||||
if (!outdated_data_parts_loading_task)
|
||||
return;
|
||||
|
||||
{
|
||||
std::lock_guard lock(outdated_data_parts_mutex);
|
||||
outdated_data_parts_loading_canceled = true;
|
||||
@ -2044,6 +2210,18 @@ void MergeTreeData::stopOutdatedDataPartsLoadingTask()
|
||||
outdated_data_parts_cv.notify_all();
|
||||
}
|
||||
|
||||
if (unexpected_data_parts_loading_task)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(unexpected_data_parts_mutex);
|
||||
unexpected_data_parts_loading_canceled = true;
|
||||
}
|
||||
|
||||
unexpected_data_parts_loading_task->deactivate();
|
||||
unexpected_data_parts_cv.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
/// Is the part directory old.
|
||||
/// True if its modification time and the modification time of all files inside it is less then threshold.
|
||||
/// (Only files on the first level of nesting are considered).
|
||||
@ -3013,8 +3191,11 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
|
||||
"Experimental full-text index feature is not enabled (turn on setting 'allow_experimental_inverted_index')");
|
||||
|
||||
for (const auto & disk : getDisks())
|
||||
if (!disk->supportsHardLinks())
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ALTER TABLE is not supported for immutable disk '{}'", disk->getName());
|
||||
if (!disk->supportsHardLinks() && !commands.isSettingsAlter() && !commands.isCommentAlter())
|
||||
throw Exception(
|
||||
ErrorCodes::SUPPORT_IS_DISABLED,
|
||||
"ALTER TABLE commands are not supported on immutable disk '{}', except for setting and comment alteration",
|
||||
disk->getName());
|
||||
|
||||
/// Set of columns that shouldn't be altered.
|
||||
NameSet columns_alter_type_forbidden;
|
||||
@ -4101,16 +4282,13 @@ void MergeTreeData::outdateUnexpectedPartAndCloneToDetached(const DataPartPtr &
|
||||
removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part_to_detach}, true, &lock);
|
||||
}
|
||||
|
||||
void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix, bool restore_covered)
|
||||
void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix)
|
||||
{
|
||||
if (prefix.empty())
|
||||
LOG_INFO(log, "Renaming {} to {} and forgetting it.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name);
|
||||
else
|
||||
LOG_INFO(log, "Renaming {} to {}_{} and forgetting it.", part_to_detach->getDataPartStorage().getPartDirectory(), prefix, part_to_detach->name);
|
||||
|
||||
if (restore_covered)
|
||||
waitForOutdatedPartsToBeLoaded();
|
||||
|
||||
auto lock = lockParts();
|
||||
bool removed_active_part = false;
|
||||
bool restored_active_part = false;
|
||||
@ -4136,132 +4314,6 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
|
||||
LOG_TEST(log, "forcefullyMovePartToDetachedAndRemoveFromMemory: removing {} from data_parts_indexes", part->getNameWithState());
|
||||
data_parts_indexes.erase(it_part);
|
||||
|
||||
if (restore_covered && part->info.level == 0 && part->info.mutation == 0)
|
||||
{
|
||||
LOG_WARNING(log, "Will not recover parts covered by zero-level part {}", part->name);
|
||||
return;
|
||||
}
|
||||
|
||||
/// Let's restore some parts covered by unexpected to avoid partial data
|
||||
if (restore_covered)
|
||||
{
|
||||
Strings restored;
|
||||
Strings error_parts;
|
||||
|
||||
auto is_appropriate_state = [] (const DataPartPtr & part_)
|
||||
{
|
||||
/// In rare cases, we may have a chain of unexpected parts that cover common source parts, e.g. all_1_2_3, all_1_3_4
|
||||
/// It may happen as a result of interrupted cloneReplica
|
||||
bool already_active = part_->getState() == DataPartState::Active;
|
||||
if (!already_active && part_->getState() != DataPartState::Outdated)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to restore a part {} from unexpected state: {}", part_->name, part_->getState());
|
||||
return !already_active;
|
||||
};
|
||||
|
||||
auto activate_part = [this, &restored_active_part](auto it)
|
||||
{
|
||||
/// It's not clear what to do if we try to activate part that was removed in transaction.
|
||||
/// It may happen only in ReplicatedMergeTree, so let's simply throw LOGICAL_ERROR for now.
|
||||
chassert((*it)->version.isRemovalTIDLocked());
|
||||
if ((*it)->version.removal_tid_lock == Tx::PrehistoricTID.getHash())
|
||||
(*it)->version.unlockRemovalTID(Tx::PrehistoricTID, TransactionInfoContext{getStorageID(), (*it)->name});
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot activate part {} that was removed by transaction ({})",
|
||||
(*it)->name, (*it)->version.removal_tid_lock);
|
||||
|
||||
addPartContributionToColumnAndSecondaryIndexSizes(*it);
|
||||
addPartContributionToDataVolume(*it);
|
||||
modifyPartState(it, DataPartState::Active); /// iterator is not invalidated here
|
||||
restored_active_part = true;
|
||||
};
|
||||
|
||||
/// ActiveDataPartSet allows to restore most top-level parts instead of unexpected.
|
||||
/// It can be important in case of assigned merges. If unexpected part is result of some
|
||||
/// finished, but not committed merge then we should restore (at least try to restore)
|
||||
/// closest ancestors for the unexpected part to be able to execute it.
|
||||
/// However it's not guaranteed because outdated parts can intersect
|
||||
ActiveDataPartSet parts_for_replacement(format_version);
|
||||
auto range = getDataPartsPartitionRange(part->info.partition_id);
|
||||
DataPartsVector parts_candidates(range.begin(), range.end());
|
||||
|
||||
/// In case of intersecting outdated parts we want to add bigger parts (with higher level) first
|
||||
auto comparator = [] (const DataPartPtr left, const DataPartPtr right) -> bool
|
||||
{
|
||||
if (left->info.level < right->info.level)
|
||||
return true;
|
||||
else if (left->info.level > right->info.level)
|
||||
return false;
|
||||
else
|
||||
return left->info.mutation < right->info.mutation;
|
||||
};
|
||||
std::sort(parts_candidates.begin(), parts_candidates.end(), comparator);
|
||||
/// From larger to smaller parts
|
||||
for (const auto & part_candidate_in_partition : parts_candidates | std::views::reverse)
|
||||
{
|
||||
if (part->info.contains(part_candidate_in_partition->info)
|
||||
&& is_appropriate_state(part_candidate_in_partition))
|
||||
{
|
||||
String out_reason;
|
||||
/// Outdated parts can itersect legally (because of DROP_PART) here it's okay, we
|
||||
/// are trying to do out best to restore covered parts.
|
||||
auto outcome = parts_for_replacement.tryAddPart(part_candidate_in_partition->info, &out_reason);
|
||||
if (outcome == ActiveDataPartSet::AddPartOutcome::HasIntersectingPart)
|
||||
{
|
||||
error_parts.push_back(part->name);
|
||||
LOG_ERROR(log, "Failed to restore part {}, because of intersection reason '{}'", part->name, out_reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (parts_for_replacement.size() > 0)
|
||||
{
|
||||
std::vector<std::pair<uint64_t, uint64_t>> holes_list;
|
||||
/// Most part of the code below is just to write pretty message
|
||||
auto part_infos = parts_for_replacement.getPartInfos();
|
||||
int64_t current_right_block = part_infos[0].min_block;
|
||||
for (const auto & top_level_part_to_replace : part_infos)
|
||||
{
|
||||
auto data_part_it = data_parts_by_info.find(top_level_part_to_replace);
|
||||
if (data_part_it == data_parts_by_info.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find part {} in own set", top_level_part_to_replace.getPartNameForLogs());
|
||||
activate_part(data_part_it);
|
||||
restored.push_back((*data_part_it)->name);
|
||||
if (top_level_part_to_replace.min_block - current_right_block > 1)
|
||||
holes_list.emplace_back(current_right_block, top_level_part_to_replace.min_block);
|
||||
current_right_block = top_level_part_to_replace.max_block;
|
||||
}
|
||||
if (part->info.max_block != current_right_block)
|
||||
holes_list.emplace_back(current_right_block, part->info.max_block);
|
||||
|
||||
for (const String & name : restored)
|
||||
LOG_INFO(log, "Activated part {} in place of unexpected {}", name, part->name);
|
||||
|
||||
if (!error_parts.empty() || !holes_list.empty())
|
||||
{
|
||||
std::string error_parts_message, holes_list_message;
|
||||
if (!error_parts.empty())
|
||||
error_parts_message = fmt::format(" Parts failed to restore because of intersection: [{}]", fmt::join(error_parts, ", "));
|
||||
if (!holes_list.empty())
|
||||
{
|
||||
if (!error_parts.empty())
|
||||
holes_list_message = ".";
|
||||
|
||||
Strings holes_list_pairs;
|
||||
for (const auto & [left_side, right_side] : holes_list)
|
||||
holes_list_pairs.push_back(fmt::format("({}, {})", left_side + 1, right_side - 1));
|
||||
holes_list_message += fmt::format(" Block ranges failed to restore: [{}]", fmt::join(holes_list_pairs, ", "));
|
||||
}
|
||||
LOG_WARNING(log, "The set of parts restored in place of {} looks incomplete. "
|
||||
"SELECT queries may observe gaps in data until this replica is synchronized with other replicas.{}{}",
|
||||
part->name, error_parts_message, holes_list_message);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_INFO(log, "Don't find any parts for replacement instead of unexpected {}", part->name);
|
||||
}
|
||||
}
|
||||
|
||||
if (removed_active_part || restored_active_part)
|
||||
resetObjectColumnsFromActiveParts(lock);
|
||||
}
|
||||
@ -5074,7 +5126,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
|
||||
|
||||
auto volume = getStoragePolicy()->getVolumeByName(name);
|
||||
if (!volume)
|
||||
throw Exception(ErrorCodes::UNKNOWN_DISK, "Volume {} does not exists on policy {}", name, getStoragePolicy()->getName());
|
||||
throw Exception(ErrorCodes::UNKNOWN_DISK, "Volume {} does not exist on policy {}", name, getStoragePolicy()->getName());
|
||||
|
||||
if (parts.empty())
|
||||
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Nothing to move (check that the partition exists).");
|
||||
|
@ -652,10 +652,9 @@ public:
|
||||
|
||||
/// Renames the part to detached/<prefix>_<part> and removes it from data_parts,
|
||||
//// so it will not be deleted in clearOldParts.
|
||||
/// If restore_covered is true, adds to the working set inactive parts, which were merged into the deleted part.
|
||||
/// NOTE: This method is safe to use only for parts which nobody else holds (like on server start or for parts which was not committed).
|
||||
/// For active parts it's unsafe because this method modifies fields of part (rename) while some other thread can try to read it.
|
||||
void forcefullyMovePartToDetachedAndRemoveFromMemory(const DataPartPtr & part, const String & prefix = "", bool restore_covered = false);
|
||||
void forcefullyMovePartToDetachedAndRemoveFromMemory(const DataPartPtr & part, const String & prefix = "");
|
||||
|
||||
/// This method should not be here, but async loading of Outdated parts is implemented in MergeTreeData
|
||||
virtual void forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & /*part_name*/) {}
|
||||
@ -1072,6 +1071,7 @@ public:
|
||||
scope_guard getTemporaryPartDirectoryHolder(const String & part_dir_name) const;
|
||||
|
||||
void waitForOutdatedPartsToBeLoaded() const;
|
||||
void waitForUnexpectedPartsToBeLoaded() const;
|
||||
bool canUsePolymorphicParts() const;
|
||||
|
||||
/// TODO: make enabled by default in the next release if no problems found.
|
||||
@ -1551,13 +1551,33 @@ protected:
|
||||
PartLoadingTreeNodes outdated_unloaded_data_parts TSA_GUARDED_BY(outdated_data_parts_mutex);
|
||||
bool outdated_data_parts_loading_canceled TSA_GUARDED_BY(outdated_data_parts_mutex) = false;
|
||||
|
||||
mutable std::mutex unexpected_data_parts_mutex;
|
||||
mutable std::condition_variable unexpected_data_parts_cv;
|
||||
|
||||
struct UnexpectedPartLoadState
|
||||
{
|
||||
PartLoadingTree::NodePtr loading_info;
|
||||
/// if it is covered by any unexpected part
|
||||
bool uncovered = true;
|
||||
bool is_broken = false;
|
||||
MutableDataPartPtr part;
|
||||
};
|
||||
|
||||
BackgroundSchedulePool::TaskHolder unexpected_data_parts_loading_task;
|
||||
std::vector<UnexpectedPartLoadState> unexpected_data_parts;
|
||||
bool unexpected_data_parts_loading_canceled TSA_GUARDED_BY(unexpected_data_parts_mutex) = false;
|
||||
|
||||
void loadUnexpectedDataParts();
|
||||
void loadUnexpectedDataPart(UnexpectedPartLoadState & state);
|
||||
|
||||
/// This has to be "true" by default, because in case of empty table or absence of Outdated parts
|
||||
/// it is automatically finished.
|
||||
std::atomic_bool outdated_data_parts_loading_finished = true;
|
||||
std::atomic_bool unexpected_data_parts_loading_finished = true;
|
||||
|
||||
void loadOutdatedDataParts(bool is_async);
|
||||
void startOutdatedDataPartsLoadingTask();
|
||||
void stopOutdatedDataPartsLoadingTask();
|
||||
void startOutdatedAndUnexpectedDataPartsLoadingTask();
|
||||
void stopOutdatedAndUnexpectedDataPartsLoadingTask();
|
||||
|
||||
static void incrementInsertedPartsProfileEvent(MergeTreeDataPartType type);
|
||||
static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type);
|
||||
|
@ -490,11 +490,11 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeIn(
|
||||
if (key_node_function_name == "arrayElement")
|
||||
{
|
||||
/** Try to parse arrayElement for mapKeys index.
|
||||
* It is important to ignore keys like column_map['Key'] IN ('') because if key does not exists in map
|
||||
* we return default value for arrayElement.
|
||||
* It is important to ignore keys like column_map['Key'] IN ('') because if the key does not exist in the map
|
||||
* we return the default value for arrayElement.
|
||||
*
|
||||
* We cannot skip keys that does not exist in map if comparison is with default type value because
|
||||
* that way we skip necessary granules where map key does not exists.
|
||||
* that way we skip necessary granules where the map key does not exist.
|
||||
*/
|
||||
if (!prepared_set)
|
||||
return false;
|
||||
@ -781,11 +781,11 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals(
|
||||
if (key_node_function_name == "arrayElement" && (function_name == "equals" || function_name == "notEquals"))
|
||||
{
|
||||
/** Try to parse arrayElement for mapKeys index.
|
||||
* It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map
|
||||
* It is important to ignore keys like column_map['Key'] = '' because if key does not exist in map
|
||||
* we return default value for arrayElement.
|
||||
*
|
||||
* We cannot skip keys that does not exist in map if comparison is with default type value because
|
||||
* that way we skip necessary granules where map key does not exists.
|
||||
* that way we skip necessary granules where map key does not exist.
|
||||
*/
|
||||
if (value_field == value_type->getDefault())
|
||||
return false;
|
||||
|
@ -444,11 +444,11 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals(
|
||||
if (key_function_node_function_name == "arrayElement")
|
||||
{
|
||||
/** Try to parse arrayElement for mapKeys index.
|
||||
* It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map
|
||||
* It is important to ignore keys like column_map['Key'] = '' because if key does not exist in map
|
||||
* we return default value for arrayElement.
|
||||
*
|
||||
* We cannot skip keys that does not exist in map if comparison is with default type value because
|
||||
* that way we skip necessary granules where map key does not exists.
|
||||
* that way we skip necessary granules where map key does not exist.
|
||||
*/
|
||||
if (value_field == value_type->getDefault())
|
||||
return false;
|
||||
|
@ -477,11 +477,11 @@ bool MergeTreeConditionFullText::traverseASTEquals(
|
||||
if (function.getFunctionName() == "arrayElement")
|
||||
{
|
||||
/** Try to parse arrayElement for mapKeys index.
|
||||
* It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map
|
||||
* It is important to ignore keys like column_map['Key'] = '' because if key does not exist in map
|
||||
* we return default value for arrayElement.
|
||||
*
|
||||
* We cannot skip keys that does not exist in map if comparison is with default type value because
|
||||
* that way we skip necessary granules where map key does not exists.
|
||||
* that way we skip necessary granules where map key does not exist.
|
||||
*/
|
||||
if (value_field == value_type->getDefault())
|
||||
return false;
|
||||
|
@ -232,7 +232,7 @@ bool MergeTreeReaderCompact::needSkipStream(size_t column_pos, const ISerializat
|
||||
///
|
||||
/// Consider the following columns in nested "root":
|
||||
/// - root.array Array(UInt8) - exists
|
||||
/// - root.nested_array Array(Array(UInt8)) - does not exists (only_offsets_level=1)
|
||||
/// - root.nested_array Array(Array(UInt8)) - does not exist (only_offsets_level=1)
|
||||
///
|
||||
/// For root.nested_array it will try to read multiple streams:
|
||||
/// - offsets (substream_path = {ArraySizes})
|
||||
|
@ -181,7 +181,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync(
|
||||
|
||||
new_part->rows_count = rows_count;
|
||||
new_part->modification_time = time(nullptr);
|
||||
new_part->setIndex(std::make_shared<Columns>(writer->releaseIndexColumns()));
|
||||
new_part->setIndex(writer->releaseIndexColumns());
|
||||
new_part->checksums = checksums;
|
||||
new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk());
|
||||
new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk());
|
||||
|
@ -935,7 +935,7 @@ void finalizeMutatedPart(
|
||||
|
||||
new_data_part->rows_count = source_part->rows_count;
|
||||
new_data_part->index_granularity = source_part->index_granularity;
|
||||
new_data_part->setIndex(source_part->getIndex());
|
||||
new_data_part->setIndex(*source_part->getIndex());
|
||||
new_data_part->minmax_idx = source_part->minmax_idx;
|
||||
new_data_part->modification_time = time(nullptr);
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <optional>
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
@ -365,7 +366,11 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
|
||||
auto configuration_snapshot = updateConfigurationAndGetCopy(local_context);
|
||||
|
||||
auto internal_source = std::make_unique<StorageS3Source>(
|
||||
info, configuration.format, getName(), local_context, format_settings,
|
||||
info,
|
||||
configuration.format,
|
||||
getName(),
|
||||
local_context,
|
||||
format_settings,
|
||||
max_block_size,
|
||||
configuration_snapshot.request_settings,
|
||||
configuration_snapshot.compression_method,
|
||||
@ -373,7 +378,9 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
|
||||
configuration_snapshot.url.bucket,
|
||||
configuration_snapshot.url.version_id,
|
||||
configuration_snapshot.url.uri.getHost() + std::to_string(configuration_snapshot.url.uri.getPort()),
|
||||
file_iterator, local_context->getSettingsRef().max_download_threads, false);
|
||||
file_iterator,
|
||||
local_context->getSettingsRef().max_download_threads,
|
||||
false);
|
||||
|
||||
auto file_deleter = [this, bucket = configuration_snapshot.url.bucket, client = configuration_snapshot.client, blob_storage_log = BlobStorageLogWriter::create()](const std::string & path) mutable
|
||||
{
|
||||
@ -608,8 +615,13 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const
|
||||
std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate)
|
||||
{
|
||||
auto glob_iterator = std::make_unique<StorageS3QueueSource::GlobIterator>(
|
||||
*configuration.client, configuration.url, predicate, getVirtualsList(), local_context,
|
||||
/* read_keys */nullptr, configuration.request_settings);
|
||||
*configuration.client,
|
||||
configuration.url,
|
||||
predicate,
|
||||
getVirtualsList(),
|
||||
local_context,
|
||||
/* read_keys */ nullptr,
|
||||
configuration.request_settings);
|
||||
|
||||
return std::make_shared<FileIterator>(
|
||||
files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called, log);
|
||||
|
@ -58,7 +58,7 @@ static inline String generateInnerTableName(const StorageID & view_id)
|
||||
return ".inner." + view_id.getTableName();
|
||||
}
|
||||
|
||||
/// Remove columns from target_header that does not exists in src_header
|
||||
/// Remove columns from target_header that does not exist in src_header
|
||||
static void removeNonCommonColumns(const Block & src_header, Block & target_header)
|
||||
{
|
||||
std::set<size_t> target_only_positions;
|
||||
@ -233,10 +233,10 @@ void StorageMaterializedView::read(
|
||||
auto mv_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, context, processed_stage);
|
||||
auto target_header = query_plan.getCurrentDataStream().header;
|
||||
|
||||
/// No need to convert columns that does not exists in MV
|
||||
/// No need to convert columns that does not exist in MV
|
||||
removeNonCommonColumns(mv_header, target_header);
|
||||
|
||||
/// No need to convert columns that does not exists in the result header.
|
||||
/// No need to convert columns that does not exist in the result header.
|
||||
///
|
||||
/// Distributed storage may process query up to the specific stage, and
|
||||
/// so the result header may not include all the columns from the
|
||||
|
@ -153,7 +153,7 @@ void StorageMergeTree::startup()
|
||||
{
|
||||
background_operations_assignee.start();
|
||||
startBackgroundMovesIfNeeded();
|
||||
startOutdatedDataPartsLoadingTask();
|
||||
startOutdatedAndUnexpectedDataPartsLoadingTask();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -179,7 +179,7 @@ void StorageMergeTree::shutdown(bool)
|
||||
if (shutdown_called.exchange(true))
|
||||
return;
|
||||
|
||||
stopOutdatedDataPartsLoadingTask();
|
||||
stopOutdatedAndUnexpectedDataPartsLoadingTask();
|
||||
|
||||
/// Unlock all waiting mutations
|
||||
{
|
||||
|
@ -1575,18 +1575,12 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
|
||||
* But actually we can't precisely determine that ALL missing parts
|
||||
* covered by this unexpected part. So missing parts will be downloaded.
|
||||
*/
|
||||
DataParts unexpected_parts;
|
||||
|
||||
/// Intersection of local parts and expected parts
|
||||
ActiveDataPartSet local_expected_parts_set(format_version);
|
||||
|
||||
/// Collect unexpected parts
|
||||
for (const auto & part : parts)
|
||||
{
|
||||
if (expected_parts.contains(part->name))
|
||||
local_expected_parts_set.add(part->name);
|
||||
else
|
||||
unexpected_parts.insert(part); /// this parts we will place to detached with ignored_ prefix
|
||||
}
|
||||
|
||||
/// Which parts should be taken from other replicas.
|
||||
@ -1598,18 +1592,15 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
|
||||
|
||||
paranoidCheckForCoveredPartsInZooKeeperOnStart(expected_parts_vec, parts_to_fetch);
|
||||
|
||||
waitForUnexpectedPartsToBeLoaded();
|
||||
|
||||
ActiveDataPartSet set_of_empty_unexpected_parts(format_version);
|
||||
for (const auto & part : parts)
|
||||
for (const auto & load_state : unexpected_data_parts)
|
||||
{
|
||||
if (part->rows_count || part->getState() != MergeTreeDataPartState::Active || expected_parts.contains(part->name))
|
||||
if (load_state.is_broken || load_state.part->rows_count || !load_state.uncovered)
|
||||
continue;
|
||||
|
||||
if (incomplete_list_of_outdated_parts)
|
||||
{
|
||||
LOG_INFO(log, "Outdated parts are not loaded yet, but we may need them to handle dropped parts. Need retry.");
|
||||
return false;
|
||||
}
|
||||
set_of_empty_unexpected_parts.add(part->name);
|
||||
set_of_empty_unexpected_parts.add(load_state.part->name);
|
||||
}
|
||||
if (auto empty_count = set_of_empty_unexpected_parts.size())
|
||||
LOG_WARNING(log, "Found {} empty unexpected parts (probably some dropped parts were not cleaned up before restart): [{}]",
|
||||
@ -1628,33 +1619,35 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
|
||||
std::unordered_set<String> restorable_unexpected_parts;
|
||||
UInt64 uncovered_unexpected_parts_rows = 0;
|
||||
|
||||
for (const auto & part : unexpected_parts)
|
||||
for (const auto & load_state : unexpected_data_parts)
|
||||
{
|
||||
unexpected_parts_rows += part->rows_count;
|
||||
if (load_state.is_broken)
|
||||
continue;
|
||||
unexpected_parts_rows += load_state.part->rows_count;
|
||||
|
||||
/// This part may be covered by some expected part that is active and present locally
|
||||
/// Probably we just did not remove this part from disk before restart (but removed from ZooKeeper)
|
||||
String covering_local_part = local_expected_parts_set.getContainingPart(part->name);
|
||||
String covering_local_part = local_expected_parts_set.getContainingPart(load_state.part->name);
|
||||
if (!covering_local_part.empty())
|
||||
{
|
||||
covered_unexpected_parts.push_back(part->name);
|
||||
covered_unexpected_parts.push_back(load_state.part->name);
|
||||
continue;
|
||||
}
|
||||
|
||||
String covering_empty_part = set_of_empty_unexpected_parts.getContainingPart(part->name);
|
||||
String covering_empty_part = set_of_empty_unexpected_parts.getContainingPart(load_state.part->name);
|
||||
if (!covering_empty_part.empty())
|
||||
{
|
||||
LOG_INFO(log, "Unexpected part {} is covered by empty part {}, assuming it has been dropped just before restart",
|
||||
part->name, covering_empty_part);
|
||||
covered_unexpected_parts.push_back(part->name);
|
||||
load_state.part->name, covering_empty_part);
|
||||
covered_unexpected_parts.push_back(load_state.part->name);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto covered_parts = local_expected_parts_set.getPartInfosCoveredBy(part->info);
|
||||
auto covered_parts = local_expected_parts_set.getPartInfosCoveredBy(load_state.part->info);
|
||||
|
||||
if (MergeTreePartInfo::areAllBlockNumbersCovered(part->info, covered_parts))
|
||||
if (MergeTreePartInfo::areAllBlockNumbersCovered(load_state.part->info, covered_parts))
|
||||
{
|
||||
restorable_unexpected_parts.insert(part->name);
|
||||
restorable_unexpected_parts.insert(load_state.part->name);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1668,13 +1661,13 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
|
||||
}
|
||||
|
||||
/// Part is unexpected and we don't have covering part: it's suspicious
|
||||
uncovered_unexpected_parts.insert(part->name);
|
||||
uncovered_unexpected_parts_rows += part->rows_count;
|
||||
uncovered_unexpected_parts.insert(load_state.part->name);
|
||||
uncovered_unexpected_parts_rows += load_state.part->rows_count;
|
||||
|
||||
if (part->info.level > 0)
|
||||
if (load_state.part->info.level > 0)
|
||||
{
|
||||
++unexpected_parts_nonnew;
|
||||
unexpected_parts_nonnew_rows += part->rows_count;
|
||||
unexpected_parts_nonnew_rows += load_state.part->rows_count;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1700,6 +1693,9 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
|
||||
UInt64 total_rows_on_filesystem = 0;
|
||||
for (const auto & part : parts)
|
||||
total_rows_on_filesystem += part->rows_count;
|
||||
/// We need to sum the rows count of all unexpected data parts;
|
||||
for (const auto & part : unexpected_data_parts)
|
||||
total_rows_on_filesystem += part.part->rows_count;
|
||||
|
||||
const auto storage_settings_ptr = getSettings();
|
||||
bool insane = uncovered_unexpected_parts_rows > total_rows_on_filesystem * storage_settings_ptr->replicated_max_ratio_of_wrong_parts;
|
||||
@ -1741,13 +1737,12 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks)
|
||||
/// Add to the queue jobs to pick up the missing parts from other replicas and remove from ZK the information that we have them.
|
||||
queue.setBrokenPartsToEnqueueFetchesOnLoading(std::move(parts_to_fetch));
|
||||
|
||||
/// Remove extra local parts.
|
||||
for (const DataPartPtr & part : unexpected_parts)
|
||||
/// detached all unexpected data parts after sanity check.
|
||||
for (auto & part_state : unexpected_data_parts)
|
||||
{
|
||||
bool restore_covered = restorable_unexpected_parts.contains(part->name) || uncovered_unexpected_parts.contains(part->name);
|
||||
LOG_ERROR(log, "Renaming unexpected part {} to ignored_{}{}", part->name, part->name, restore_covered ? ", restoring covered parts" : "");
|
||||
forcefullyMovePartToDetachedAndRemoveFromMemory(part, "ignored", restore_covered);
|
||||
part_state.part->renameToDetached("ignored");
|
||||
}
|
||||
unexpected_data_parts.clear();
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -5133,7 +5128,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::fetchExistsPart(
|
||||
void StorageReplicatedMergeTree::startup()
|
||||
{
|
||||
LOG_TRACE(log, "Starting up table");
|
||||
startOutdatedDataPartsLoadingTask();
|
||||
startOutdatedAndUnexpectedDataPartsLoadingTask();
|
||||
if (attach_thread)
|
||||
{
|
||||
attach_thread->start();
|
||||
@ -5336,7 +5331,7 @@ void StorageReplicatedMergeTree::shutdown(bool)
|
||||
}
|
||||
|
||||
session_expired_callback_handler.reset();
|
||||
stopOutdatedDataPartsLoadingTask();
|
||||
stopOutdatedAndUnexpectedDataPartsLoadingTask();
|
||||
|
||||
partialShutdown();
|
||||
|
||||
|
@ -1,4 +1,14 @@
|
||||
#include "config.h"
|
||||
#include <cstddef>
|
||||
#include <iterator>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <Poco/Logger.h>
|
||||
#include "Common/logger_useful.h"
|
||||
#include "IO/CompressionMethod.h"
|
||||
#include "IO/ReadBuffer.h"
|
||||
#include "Interpreters/Context_fwd.h"
|
||||
#include "Storages/MergeTree/ReplicatedMergeTreePartHeader.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
@ -158,10 +168,11 @@ public:
|
||||
, storage(storage_)
|
||||
, read_from_format_info(std::move(read_from_format_info_))
|
||||
, need_only_count(need_only_count_)
|
||||
, query_configuration(storage.getConfigurationCopy())
|
||||
, max_block_size(max_block_size_)
|
||||
, num_streams(num_streams_)
|
||||
{
|
||||
query_configuration = storage.updateConfigurationAndGetCopy(context);
|
||||
query_configuration.update(context);
|
||||
virtual_columns = storage.getVirtualsList();
|
||||
}
|
||||
|
||||
@ -204,7 +215,8 @@ public:
|
||||
, virtual_columns(virtual_columns_)
|
||||
, read_keys(read_keys_)
|
||||
, request_settings(request_settings_)
|
||||
, list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
|
||||
, list_objects_pool(
|
||||
CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
|
||||
, list_objects_scheduler(threadPoolCallbackRunnerUnsafe<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
|
||||
, file_progress_callback(file_progress_callback_)
|
||||
{
|
||||
@ -474,7 +486,8 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
|
||||
KeysWithInfo * read_keys_,
|
||||
const S3Settings::RequestSettings & request_settings_,
|
||||
std::function<void(FileProgress)> file_progress_callback_)
|
||||
: pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_))
|
||||
: pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(
|
||||
client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -562,8 +575,7 @@ StorageS3Source::KeysIterator::KeysIterator(
|
||||
KeysWithInfo * read_keys,
|
||||
std::function<void(FileProgress)> file_progress_callback_)
|
||||
: pimpl(std::make_shared<StorageS3Source::KeysIterator::Impl>(
|
||||
client_, version_id_, keys_, bucket_, request_settings_,
|
||||
read_keys, file_progress_callback_))
|
||||
client_, version_id_, keys_, bucket_, request_settings_, read_keys, file_progress_callback_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -593,7 +605,7 @@ StorageS3Source::ReadTaskIterator::ReadTaskIterator(
|
||||
pool.wait();
|
||||
buffer.reserve(max_threads_count);
|
||||
for (auto & key_future : keys)
|
||||
buffer.emplace_back(std::make_shared<KeyWithInfo>(key_future.get(), std::nullopt));
|
||||
buffer.emplace_back(std::make_shared<KeyWithInfo>(key_future.get()));
|
||||
}
|
||||
|
||||
StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next(size_t) /// NOLINT
|
||||
@ -618,6 +630,124 @@ size_t StorageS3Source::ReadTaskIterator::estimatedKeysCount()
|
||||
return buffer.size();
|
||||
}
|
||||
|
||||
|
||||
StorageS3Source::ArchiveIterator::ArchiveIterator(
|
||||
std::unique_ptr<IIterator> basic_iterator_,
|
||||
const std::string & archive_pattern_,
|
||||
std::shared_ptr<const S3::Client> client_,
|
||||
const String & bucket_,
|
||||
const String & version_id_,
|
||||
const S3Settings::RequestSettings & request_settings_,
|
||||
ContextPtr context_,
|
||||
KeysWithInfo * read_keys_)
|
||||
: WithContext(context_)
|
||||
, basic_iterator(std::move(basic_iterator_))
|
||||
, basic_key_with_info_ptr(nullptr)
|
||||
, client(client_)
|
||||
, bucket(bucket_)
|
||||
, version_id(version_id_)
|
||||
, request_settings(request_settings_)
|
||||
, read_keys(read_keys_)
|
||||
{
|
||||
if (archive_pattern_.find_first_of("*?{") != std::string::npos)
|
||||
{
|
||||
auto matcher = std::make_shared<re2::RE2>(makeRegexpPatternFromGlobs(archive_pattern_));
|
||||
if (!matcher->ok())
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", archive_pattern_, matcher->error());
|
||||
filter = IArchiveReader::NameFilter{[matcher](const std::string & p) mutable { return re2::RE2::FullMatch(p, *matcher); }};
|
||||
}
|
||||
else
|
||||
{
|
||||
path_in_archive = archive_pattern_;
|
||||
}
|
||||
}
|
||||
|
||||
StorageS3Source::KeyWithInfoPtr StorageS3Source::ArchiveIterator::next(size_t)
|
||||
{
|
||||
if (!path_in_archive.empty())
|
||||
{
|
||||
std::unique_lock lock{take_next_mutex};
|
||||
while (true)
|
||||
{
|
||||
basic_key_with_info_ptr = basic_iterator->next();
|
||||
if (!basic_key_with_info_ptr)
|
||||
return {};
|
||||
refreshArchiveReader();
|
||||
bool file_exists = archive_reader->fileExists(path_in_archive);
|
||||
if (file_exists)
|
||||
{
|
||||
KeyWithInfoPtr archive_key_with_info
|
||||
= std::make_shared<KeyWithInfo>(basic_key_with_info_ptr->key, std::nullopt, path_in_archive, archive_reader);
|
||||
if (read_keys != nullptr)
|
||||
read_keys->push_back(archive_key_with_info);
|
||||
return archive_key_with_info;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::unique_lock lock{take_next_mutex};
|
||||
while (true)
|
||||
{
|
||||
if (!file_enumerator)
|
||||
{
|
||||
basic_key_with_info_ptr = basic_iterator->next();
|
||||
if (!basic_key_with_info_ptr)
|
||||
return {};
|
||||
refreshArchiveReader();
|
||||
file_enumerator = archive_reader->firstFile();
|
||||
if (!file_enumerator)
|
||||
{
|
||||
file_enumerator.reset();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (!file_enumerator->nextFile())
|
||||
{
|
||||
file_enumerator.reset();
|
||||
continue;
|
||||
}
|
||||
|
||||
String current_filename = file_enumerator->getFileName();
|
||||
bool satisfies = filter(current_filename);
|
||||
if (satisfies)
|
||||
{
|
||||
KeyWithInfoPtr archive_key_with_info
|
||||
= std::make_shared<KeyWithInfo>(basic_key_with_info_ptr->key, std::nullopt, current_filename, archive_reader);
|
||||
if (read_keys != nullptr)
|
||||
read_keys->push_back(archive_key_with_info);
|
||||
return archive_key_with_info;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t StorageS3Source::ArchiveIterator::estimatedKeysCount()
|
||||
{
|
||||
return basic_iterator->estimatedKeysCount();
|
||||
}
|
||||
|
||||
void StorageS3Source::ArchiveIterator::refreshArchiveReader()
|
||||
{
|
||||
if (basic_key_with_info_ptr)
|
||||
{
|
||||
if (!basic_key_with_info_ptr->info)
|
||||
{
|
||||
basic_key_with_info_ptr->info = S3::getObjectInfo(*client, bucket, basic_key_with_info_ptr->key, version_id, request_settings);
|
||||
}
|
||||
archive_reader = createArchiveReader(
|
||||
basic_key_with_info_ptr->key,
|
||||
[key = basic_key_with_info_ptr->key, archive_size = basic_key_with_info_ptr->info.value().size, context = getContext(), this]()
|
||||
{ return createS3ReadBuffer(key, archive_size, context, client, bucket, version_id, request_settings); },
|
||||
basic_key_with_info_ptr->info.value().size);
|
||||
}
|
||||
else
|
||||
{
|
||||
archive_reader = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
StorageS3Source::StorageS3Source(
|
||||
const ReadFromFormatInfo & info,
|
||||
const String & format_,
|
||||
@ -653,7 +783,8 @@ StorageS3Source::StorageS3Source(
|
||||
, file_iterator(file_iterator_)
|
||||
, max_parsing_threads(max_parsing_threads_)
|
||||
, need_only_count(need_only_count_)
|
||||
, create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
|
||||
, create_reader_pool(
|
||||
CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
|
||||
, create_reader_scheduler(threadPoolCallbackRunnerUnsafe<ReaderHolder>(create_reader_pool, "CreateS3Reader"))
|
||||
{
|
||||
}
|
||||
@ -699,9 +830,18 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader(size_t idx)
|
||||
}
|
||||
else
|
||||
{
|
||||
auto compression_method = chooseCompressionMethod(key_with_info->key, compression_hint);
|
||||
read_buf = createS3ReadBuffer(key_with_info->key, key_with_info->info->size);
|
||||
|
||||
auto compression_method = CompressionMethod::None;
|
||||
if (!key_with_info->path_in_archive.has_value())
|
||||
{
|
||||
compression_method = chooseCompressionMethod(key_with_info->key, compression_hint);
|
||||
read_buf = createS3ReadBuffer(
|
||||
key_with_info->key, key_with_info->info->size, getContext(), client, bucket, version_id, request_settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
compression_method = chooseCompressionMethod(key_with_info->path_in_archive.value(), compression_hint);
|
||||
read_buf = key_with_info->archive_reader->readFile(key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true);
|
||||
}
|
||||
auto input_format = FormatFactory::instance().getInput(
|
||||
format,
|
||||
*read_buf,
|
||||
@ -753,12 +893,20 @@ std::future<StorageS3Source::ReaderHolder> StorageS3Source::createReaderAsync(si
|
||||
return create_reader_scheduler([=, this] { return createReader(idx); }, Priority{});
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & key, size_t object_size)
|
||||
std::unique_ptr<ReadBufferFromFileBase> createS3ReadBuffer(
|
||||
const String & key,
|
||||
size_t object_size,
|
||||
std::shared_ptr<const Context> context,
|
||||
std::shared_ptr<const S3::Client> client_ptr,
|
||||
const String & bucket,
|
||||
const String & version_id,
|
||||
const S3Settings::RequestSettings & request_settings)
|
||||
{
|
||||
auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size);
|
||||
auto read_settings = context->getReadSettings().adjustBufferSize(object_size);
|
||||
read_settings.enable_filesystem_cache = false;
|
||||
auto download_buffer_size = getContext()->getSettings().max_download_buffer_size;
|
||||
auto download_buffer_size = context->getSettings().max_download_buffer_size;
|
||||
const bool object_too_small = object_size <= 2 * download_buffer_size;
|
||||
static LoggerPtr log = getLogger("StorageS3Source");
|
||||
|
||||
// Create a read buffer that will prefetch the first ~1 MB of the file.
|
||||
// When reading lots of tiny files, this prefetching almost doubles the throughput.
|
||||
@ -766,25 +914,38 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & k
|
||||
if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
|
||||
{
|
||||
LOG_TRACE(log, "Downloading object of size {} from S3 with initial prefetch", object_size);
|
||||
return createAsyncS3ReadBuffer(key, read_settings, object_size);
|
||||
return createAsyncS3ReadBuffer(key, read_settings, object_size, context, client_ptr, bucket, version_id, request_settings);
|
||||
}
|
||||
|
||||
|
||||
return std::make_unique<ReadBufferFromS3>(
|
||||
client, bucket, key, version_id, request_settings, read_settings,
|
||||
/*use_external_buffer*/ false, /*offset_*/ 0, /*read_until_position_*/ 0,
|
||||
/*restricted_seek_*/ false, object_size);
|
||||
client_ptr,
|
||||
bucket,
|
||||
key,
|
||||
version_id,
|
||||
request_settings,
|
||||
read_settings,
|
||||
/*use_external_buffer*/ false,
|
||||
/*offset_*/ 0,
|
||||
/*read_until_position_*/ 0,
|
||||
/*restricted_seek_*/ false,
|
||||
object_size);
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> StorageS3Source::createAsyncS3ReadBuffer(
|
||||
const String & key, const ReadSettings & read_settings, size_t object_size)
|
||||
std::unique_ptr<ReadBufferFromFileBase> createAsyncS3ReadBuffer(
|
||||
const String & key,
|
||||
const ReadSettings & read_settings,
|
||||
size_t object_size,
|
||||
std::shared_ptr<const Context> context,
|
||||
std::shared_ptr<const S3::Client> client_ptr,
|
||||
const String & bucket,
|
||||
const String & version_id,
|
||||
const S3Settings::RequestSettings & request_settings)
|
||||
{
|
||||
auto context = getContext();
|
||||
auto read_buffer_creator =
|
||||
[this, read_settings, object_size]
|
||||
(bool restricted_seek, const StoredObject & object) -> std::unique_ptr<ReadBufferFromFileBase>
|
||||
auto read_buffer_creator = [=](bool restricted_seek, const StoredObject & object) -> std::unique_ptr<ReadBufferFromFileBase>
|
||||
{
|
||||
return std::make_unique<ReadBufferFromS3>(
|
||||
client,
|
||||
client_ptr,
|
||||
bucket,
|
||||
object.remote_path,
|
||||
version_id,
|
||||
@ -809,12 +970,12 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createAsyncS3ReadBuffer(
|
||||
StoredObjects{StoredObject{key, /* local_path */ "", object_size}},
|
||||
"",
|
||||
read_settings,
|
||||
/* cache_log */nullptr, /* use_external_buffer */true);
|
||||
/* cache_log */ nullptr,
|
||||
/* use_external_buffer */ true);
|
||||
|
||||
auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER);
|
||||
auto async_reader = std::make_unique<AsynchronousBoundedReadBuffer>(
|
||||
std::move(s3_impl), pool_reader, modified_settings,
|
||||
context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog());
|
||||
std::move(s3_impl), pool_reader, modified_settings, context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog());
|
||||
|
||||
async_reader->setReadUntilEnd();
|
||||
if (read_settings.remote_fs_prefetch)
|
||||
@ -855,12 +1016,14 @@ Chunk StorageS3Source::generate()
|
||||
if (const auto * input_format = reader.getInputFormat())
|
||||
chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk();
|
||||
progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
|
||||
VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize());
|
||||
String file_name = reader.getFile();
|
||||
VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
|
||||
chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize(), reader.isArchive() ? (&file_name) : nullptr);
|
||||
return chunk;
|
||||
}
|
||||
|
||||
if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files)
|
||||
addNumRowsToCache(reader.getFile(), total_rows_in_file);
|
||||
addNumRowsToCache(reader.getPath(), total_rows_in_file);
|
||||
|
||||
total_rows_in_file = 0;
|
||||
|
||||
@ -879,9 +1042,9 @@ Chunk StorageS3Source::generate()
|
||||
return {};
|
||||
}
|
||||
|
||||
void StorageS3Source::addNumRowsToCache(const String & key, size_t num_rows)
|
||||
void StorageS3Source::addNumRowsToCache(const String & bucket_with_key, size_t num_rows)
|
||||
{
|
||||
String source = fs::path(url_host_and_port) / bucket / key;
|
||||
String source = fs::path(url_host_and_port) / bucket_with_key;
|
||||
auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext());
|
||||
StorageS3::getSchemaCache(getContext()).addNumRows(cache_key, num_rows);
|
||||
}
|
||||
@ -890,10 +1053,7 @@ std::optional<size_t> StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo
|
||||
{
|
||||
String source = fs::path(url_host_and_port) / bucket / key_with_info.key;
|
||||
auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext());
|
||||
auto get_last_mod_time = [&]() -> std::optional<time_t>
|
||||
{
|
||||
return key_with_info.info->last_modification_time;
|
||||
};
|
||||
auto get_last_mod_time = [&]() -> std::optional<time_t> { return key_with_info.info->last_modification_time; };
|
||||
|
||||
return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time);
|
||||
}
|
||||
@ -910,9 +1070,7 @@ public:
|
||||
const StorageS3::Configuration & configuration_,
|
||||
const String & bucket,
|
||||
const String & key)
|
||||
: SinkToStorage(sample_block_)
|
||||
, sample_block(sample_block_)
|
||||
, format_settings(format_settings_)
|
||||
: SinkToStorage(sample_block_), sample_block(sample_block_), format_settings(format_settings_)
|
||||
{
|
||||
BlobStorageLogWriterPtr blob_log = nullptr;
|
||||
if (auto blob_storage_log = context->getBlobStorageLog())
|
||||
@ -1013,9 +1171,13 @@ private:
|
||||
|
||||
namespace
|
||||
{
|
||||
std::optional<String> checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, const StorageS3::Configuration & configuration, const String & key, size_t sequence_number)
|
||||
|
||||
std::optional<String> checkAndGetNewFileOnInsertIfNeeded(
|
||||
const ContextPtr & context, const StorageS3::Configuration & configuration, const String & key, size_t sequence_number)
|
||||
{
|
||||
if (context->getSettingsRef().s3_truncate_on_insert || !S3::objectExists(*configuration.client, configuration.url.bucket, key, configuration.url.version_id, configuration.request_settings))
|
||||
if (context->getSettingsRef().s3_truncate_on_insert
|
||||
|| !S3::objectExists(
|
||||
*configuration.client, configuration.url.bucket, key, configuration.url.version_id, configuration.request_settings))
|
||||
return std::nullopt;
|
||||
|
||||
if (context->getSettingsRef().s3_create_new_file_on_insert)
|
||||
@ -1026,8 +1188,8 @@ namespace
|
||||
{
|
||||
new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos));
|
||||
++sequence_number;
|
||||
}
|
||||
while (S3::objectExists(*configuration.client, configuration.url.bucket, new_key, configuration.url.version_id, configuration.request_settings));
|
||||
} while (S3::objectExists(
|
||||
*configuration.client, configuration.url.bucket, new_key, configuration.url.version_id, configuration.request_settings));
|
||||
|
||||
return new_key;
|
||||
}
|
||||
@ -1055,7 +1217,8 @@ public:
|
||||
const StorageS3::Configuration & configuration_,
|
||||
const String & bucket_,
|
||||
const String & key_)
|
||||
: PartitionedSink(partition_by, context_, sample_block_), WithContext(context_)
|
||||
: PartitionedSink(partition_by, context_, sample_block_)
|
||||
, WithContext(context_)
|
||||
, format(format_)
|
||||
, sample_block(sample_block_)
|
||||
, compression_method(compression_method_)
|
||||
@ -1078,15 +1241,7 @@ public:
|
||||
partition_key = *new_key;
|
||||
|
||||
return std::make_shared<StorageS3Sink>(
|
||||
format,
|
||||
sample_block,
|
||||
getContext(),
|
||||
format_settings,
|
||||
compression_method,
|
||||
configuration,
|
||||
partition_bucket,
|
||||
partition_key
|
||||
);
|
||||
format, sample_block, getContext(), format_settings, compression_method, configuration, partition_bucket, partition_key);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -1167,7 +1322,8 @@ StorageS3::StorageS3(
|
||||
|
||||
/// We don't allow special columns in S3 storage.
|
||||
if (!columns_.hasOnlyOrdinary())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
|
||||
storage_metadata.setColumns(columns_);
|
||||
}
|
||||
|
||||
@ -1178,24 +1334,36 @@ StorageS3::StorageS3(
|
||||
}
|
||||
|
||||
static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
|
||||
const StorageS3::Configuration & configuration,
|
||||
StorageS3::Configuration configuration,
|
||||
bool distributed_processing,
|
||||
ContextPtr local_context,
|
||||
const ActionsDAG::Node * predicate,
|
||||
const NamesAndTypesList & virtual_columns,
|
||||
StorageS3::KeysWithInfo * read_keys = nullptr,
|
||||
StorageS3Source::KeysWithInfo * read_keys = nullptr,
|
||||
std::function<void(FileProgress)> file_progress_callback = {})
|
||||
{
|
||||
if (distributed_processing)
|
||||
{
|
||||
return std::make_shared<StorageS3Source::ReadTaskIterator>(local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads);
|
||||
return std::make_shared<StorageS3Source::ReadTaskIterator>(
|
||||
local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads);
|
||||
}
|
||||
else if (configuration.withGlobs())
|
||||
else
|
||||
{
|
||||
auto basic_iterator = [&]() -> std::unique_ptr<StorageS3Source::IIterator>
|
||||
{
|
||||
StorageS3Source::KeysWithInfo * local_read_keys = configuration.url.archive_pattern.has_value() ? nullptr : read_keys;
|
||||
if (configuration.withGlobs())
|
||||
{
|
||||
/// Iterate through disclosed globs and make a source for each file
|
||||
return std::make_shared<StorageS3Source::DisclosedGlobIterator>(
|
||||
*configuration.client, configuration.url, predicate, virtual_columns,
|
||||
local_context, read_keys, configuration.request_settings, file_progress_callback);
|
||||
return std::make_unique<StorageS3Source::DisclosedGlobIterator>(
|
||||
*configuration.client,
|
||||
configuration.url,
|
||||
predicate,
|
||||
virtual_columns,
|
||||
local_context,
|
||||
local_read_keys,
|
||||
configuration.request_settings,
|
||||
file_progress_callback);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1209,26 +1377,48 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
|
||||
paths.push_back(fs::path(configuration.url.bucket) / key);
|
||||
VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context);
|
||||
}
|
||||
|
||||
return std::make_shared<StorageS3Source::KeysIterator>(
|
||||
*configuration.client, configuration.url.version_id, keys,
|
||||
configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback);
|
||||
return std::make_unique<StorageS3Source::KeysIterator>(
|
||||
*configuration.client,
|
||||
configuration.url.version_id,
|
||||
keys,
|
||||
configuration.url.bucket,
|
||||
configuration.request_settings,
|
||||
local_read_keys,
|
||||
file_progress_callback);
|
||||
}
|
||||
}();
|
||||
if (configuration.url.archive_pattern.has_value())
|
||||
{
|
||||
return std::make_shared<StorageS3Source::ArchiveIterator>(
|
||||
std::move(basic_iterator),
|
||||
configuration.url.archive_pattern.value(),
|
||||
configuration.client,
|
||||
configuration.url.bucket,
|
||||
configuration.url.version_id,
|
||||
configuration.request_settings,
|
||||
local_context,
|
||||
read_keys);
|
||||
}
|
||||
else
|
||||
{
|
||||
return basic_iterator;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool StorageS3::supportsSubsetOfColumns(const ContextPtr & context) const
|
||||
{
|
||||
return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings);
|
||||
return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getFormatCopy(), context, format_settings);
|
||||
}
|
||||
|
||||
bool StorageS3::prefersLargeBlocks() const
|
||||
{
|
||||
return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format);
|
||||
return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(getFormatCopy());
|
||||
}
|
||||
|
||||
bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const
|
||||
{
|
||||
return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context);
|
||||
return FormatFactory::instance().checkParallelizeOutputAfterReading(getFormatCopy(), context);
|
||||
}
|
||||
|
||||
void StorageS3::read(
|
||||
@ -1241,6 +1431,7 @@ void StorageS3::read(
|
||||
size_t max_block_size,
|
||||
size_t num_streams)
|
||||
{
|
||||
updateConfiguration(local_context);
|
||||
auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context));
|
||||
|
||||
bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
|
||||
@ -1267,7 +1458,6 @@ void ReadFromStorageS3Step::applyFilters(ActionDAGNodes added_filter_nodes)
|
||||
const ActionsDAG::Node * predicate = nullptr;
|
||||
if (filter_actions_dag)
|
||||
predicate = filter_actions_dag->getOutputs().at(0);
|
||||
|
||||
createIterator(predicate);
|
||||
}
|
||||
|
||||
@ -1277,8 +1467,13 @@ void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate)
|
||||
return;
|
||||
|
||||
iterator_wrapper = createFileIterator(
|
||||
query_configuration, storage.distributed_processing, context, predicate,
|
||||
virtual_columns, nullptr, context->getFileProgressCallback());
|
||||
storage.getConfigurationCopy(),
|
||||
storage.distributed_processing,
|
||||
context,
|
||||
predicate,
|
||||
virtual_columns,
|
||||
nullptr,
|
||||
context->getFileProgressCallback());
|
||||
}
|
||||
|
||||
void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
|
||||
@ -1287,7 +1482,6 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet");
|
||||
|
||||
createIterator(nullptr);
|
||||
|
||||
size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
|
||||
if (estimated_keys_count > 1)
|
||||
num_streams = std::min(num_streams, estimated_keys_count);
|
||||
@ -1297,9 +1491,8 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
|
||||
num_streams = 1;
|
||||
}
|
||||
|
||||
const auto & settings = context->getSettingsRef();
|
||||
const size_t max_parsing_threads = num_streams >= settings.max_parsing_threads ? 1 : (settings.max_parsing_threads / std::max(num_streams, 1ul));
|
||||
LOG_DEBUG(getLogger("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads);
|
||||
const size_t max_threads = context->getSettingsRef().max_threads;
|
||||
const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul));
|
||||
|
||||
Pipes pipes;
|
||||
pipes.reserve(num_streams);
|
||||
@ -1336,7 +1529,8 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
|
||||
pipeline.init(std::move(pipe));
|
||||
}
|
||||
|
||||
SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
|
||||
SinkToStoragePtr StorageS3::write(
|
||||
const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
|
||||
{
|
||||
auto query_configuration = updateConfigurationAndGetCopy(local_context);
|
||||
auto key = query_configuration.keys.front();
|
||||
@ -1367,8 +1561,9 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr
|
||||
}
|
||||
else
|
||||
{
|
||||
if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(local_context, configuration, query_configuration.keys.front(), query_configuration.keys.size()))
|
||||
if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(local_context, query_configuration, query_configuration.keys.front(), query_configuration.keys.size()))
|
||||
{
|
||||
std::lock_guard lock{configuration_update_mutex};
|
||||
query_configuration.keys.push_back(*new_key);
|
||||
configuration.keys.push_back(*new_key);
|
||||
key = *new_key;
|
||||
@ -1417,10 +1612,9 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &,
|
||||
const auto * response_error = response.IsSuccess() ? nullptr : &response.GetError();
|
||||
auto time_now = std::chrono::system_clock::now();
|
||||
if (auto blob_storage_log = BlobStorageLogWriter::create())
|
||||
{
|
||||
for (const auto & key : query_configuration.keys)
|
||||
blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now);
|
||||
}
|
||||
blob_storage_log->addEvent(
|
||||
BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now);
|
||||
|
||||
if (!response.IsSuccess())
|
||||
{
|
||||
@ -1445,18 +1639,24 @@ void StorageS3::updateConfiguration(const ContextPtr & local_context)
|
||||
configuration.update(local_context);
|
||||
}
|
||||
|
||||
void StorageS3::useConfiguration(const Configuration & new_configuration)
|
||||
void StorageS3::useConfiguration(const StorageS3::Configuration & new_configuration)
|
||||
{
|
||||
std::lock_guard lock(configuration_update_mutex);
|
||||
configuration = new_configuration;
|
||||
}
|
||||
|
||||
const StorageS3::Configuration & StorageS3::getConfiguration()
|
||||
StorageS3::Configuration StorageS3::getConfigurationCopy() const
|
||||
{
|
||||
std::lock_guard lock(configuration_update_mutex);
|
||||
return configuration;
|
||||
}
|
||||
|
||||
String StorageS3::getFormatCopy() const
|
||||
{
|
||||
std::lock_guard lock(configuration_update_mutex);
|
||||
return configuration.format;
|
||||
}
|
||||
|
||||
bool StorageS3::Configuration::update(const ContextPtr & context)
|
||||
{
|
||||
auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName());
|
||||
@ -1511,7 +1711,8 @@ void StorageS3::Configuration::connect(const ContextPtr & context)
|
||||
.is_s3express_bucket = S3::isS3ExpressEndpoint(url.endpoint),
|
||||
};
|
||||
|
||||
auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token);
|
||||
auto credentials
|
||||
= Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token);
|
||||
client = S3::ClientFactory::instance().create(
|
||||
client_configuration,
|
||||
client_settings,
|
||||
@ -1552,10 +1753,12 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur
|
||||
configuration.auth_settings.secret_access_key = collection.getOrDefault<String>("secret_access_key", "");
|
||||
configuration.auth_settings.use_environment_credentials = collection.getOrDefault<UInt64>("use_environment_credentials", 1);
|
||||
configuration.auth_settings.no_sign_request = collection.getOrDefault<bool>("no_sign_request", false);
|
||||
configuration.auth_settings.expiration_window_seconds = collection.getOrDefault<UInt64>("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS);
|
||||
configuration.auth_settings.expiration_window_seconds
|
||||
= collection.getOrDefault<UInt64>("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS);
|
||||
|
||||
configuration.format = collection.getOrDefault<String>("format", configuration.format);
|
||||
configuration.compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
|
||||
configuration.compression_method
|
||||
= collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
|
||||
configuration.structure = collection.getOrDefault<String>("structure", "auto");
|
||||
|
||||
configuration.request_settings = S3Settings::RequestSettings(collection);
|
||||
@ -1591,8 +1794,8 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C
|
||||
|
||||
if (count == 0 || count > 6)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Storage S3 requires 1 to 5 arguments: "
|
||||
"url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]");
|
||||
"Storage S3 requires 1 to 6 positional arguments: "
|
||||
"url, [NOSIGN | access_key_id, secret_access_key], [session_token], [name of used format], [compression_method], [headers], [extra_credentials]");
|
||||
|
||||
std::unordered_map<std::string_view, size_t> engine_args_to_idx;
|
||||
bool no_sign_request = false;
|
||||
@ -1644,15 +1847,11 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C
|
||||
{
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "session_token/format");
|
||||
if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg))
|
||||
{
|
||||
engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}};
|
||||
}
|
||||
else
|
||||
{
|
||||
engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}};
|
||||
}
|
||||
}
|
||||
}
|
||||
/// For 5 arguments we support 2 possible variants:
|
||||
/// - s3(source, access_key_id, secret_access_key, session_token, format)
|
||||
/// - s3(source, access_key_id, secret_access_key, format, compression)
|
||||
@ -1660,17 +1859,14 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C
|
||||
{
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "session_token/format");
|
||||
if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg))
|
||||
{
|
||||
engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}};
|
||||
}
|
||||
else
|
||||
{
|
||||
engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}};
|
||||
}
|
||||
}
|
||||
else if (count == 6)
|
||||
{
|
||||
engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}};
|
||||
engine_args_to_idx
|
||||
= {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}};
|
||||
}
|
||||
|
||||
/// This argument is always the first
|
||||
@ -1680,49 +1876,60 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C
|
||||
configuration.format = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["format"]], "format");
|
||||
|
||||
if (engine_args_to_idx.contains("compression_method"))
|
||||
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["compression_method"]], "compression_method");
|
||||
configuration.compression_method
|
||||
= checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["compression_method"]], "compression_method");
|
||||
|
||||
if (engine_args_to_idx.contains("access_key_id"))
|
||||
configuration.auth_settings.access_key_id = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id");
|
||||
configuration.auth_settings.access_key_id
|
||||
= checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id");
|
||||
|
||||
if (engine_args_to_idx.contains("secret_access_key"))
|
||||
configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key");
|
||||
configuration.auth_settings.secret_access_key
|
||||
= checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key");
|
||||
|
||||
if (engine_args_to_idx.contains("session_token"))
|
||||
configuration.auth_settings.session_token = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["session_token"]], "session_token");
|
||||
configuration.auth_settings.session_token
|
||||
= checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["session_token"]], "session_token");
|
||||
|
||||
if (no_sign_request)
|
||||
configuration.auth_settings.no_sign_request = no_sign_request;
|
||||
}
|
||||
|
||||
configuration.static_configuration = !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value();
|
||||
configuration.static_configuration
|
||||
= !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value();
|
||||
|
||||
configuration.keys = {configuration.url.key};
|
||||
|
||||
if (configuration.format == "auto" && get_format_from_file)
|
||||
configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.url.key).value_or("auto");
|
||||
{
|
||||
if (configuration.url.archive_pattern.has_value())
|
||||
{
|
||||
configuration.format = FormatFactory::instance()
|
||||
.tryGetFormatFromFileName(Poco::URI(configuration.url.archive_pattern.value()).getPath())
|
||||
.value_or("auto");
|
||||
}
|
||||
else
|
||||
{
|
||||
configuration.format
|
||||
= FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(configuration.url.uri_str).getPath()).value_or("auto");
|
||||
}
|
||||
}
|
||||
|
||||
return configuration;
|
||||
}
|
||||
|
||||
ColumnsDescription StorageS3::getTableStructureFromData(
|
||||
const StorageS3::Configuration & configuration,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
const ContextPtr & ctx)
|
||||
const StorageS3::Configuration & configuration_, const std::optional<FormatSettings> & format_settings_, const ContextPtr & ctx)
|
||||
{
|
||||
return getTableStructureAndFormatFromDataImpl(configuration.format, configuration, format_settings, ctx).first;
|
||||
return getTableStructureAndFormatFromDataImpl(configuration_.format, configuration_, format_settings_, ctx).first;
|
||||
}
|
||||
|
||||
std::pair<ColumnsDescription, String> StorageS3::getTableStructureAndFormatFromData(
|
||||
const StorageS3::Configuration & configuration,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
const ContextPtr & ctx)
|
||||
const StorageS3::Configuration & configuration, const std::optional<FormatSettings> & format_settings, const ContextPtr & ctx)
|
||||
{
|
||||
return getTableStructureAndFormatFromDataImpl(std::nullopt, configuration, format_settings, ctx);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class ReadBufferIterator : public IReadBufferIterator, WithContext
|
||||
{
|
||||
public:
|
||||
@ -1732,7 +1939,7 @@ namespace
|
||||
const StorageS3::Configuration & configuration_,
|
||||
std::optional<String> format_,
|
||||
const std::optional<FormatSettings> & format_settings_,
|
||||
const ContextPtr & context_)
|
||||
ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, file_iterator(file_iterator_)
|
||||
, read_keys(read_keys_)
|
||||
@ -1753,7 +1960,7 @@ namespace
|
||||
{
|
||||
for (const auto & key_with_info : read_keys)
|
||||
{
|
||||
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->key))
|
||||
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->getFileName()))
|
||||
{
|
||||
format = format_from_file_name;
|
||||
break;
|
||||
@ -1793,7 +2000,6 @@ namespace
|
||||
return {nullptr, std::nullopt, format};
|
||||
}
|
||||
|
||||
/// S3 file iterator could get new keys after new iteration
|
||||
if (read_keys.size() > prev_read_keys_size)
|
||||
{
|
||||
/// If format is unknown we can try to determine it by new file names.
|
||||
@ -1801,7 +2007,7 @@ namespace
|
||||
{
|
||||
for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
|
||||
{
|
||||
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->key))
|
||||
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName()))
|
||||
{
|
||||
format = format_from_file_name;
|
||||
break;
|
||||
@ -1826,7 +2032,7 @@ namespace
|
||||
/// In union mode, check cached columns only for current key.
|
||||
if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
|
||||
{
|
||||
StorageS3::KeysWithInfo keys = {current_key_with_info};
|
||||
StorageS3Source::KeysWithInfo keys = {current_key_with_info};
|
||||
if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end()))
|
||||
{
|
||||
first = false;
|
||||
@ -1835,11 +2041,36 @@ namespace
|
||||
}
|
||||
|
||||
int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
|
||||
auto impl = std::make_unique<ReadBufferFromS3>(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings());
|
||||
std::unique_ptr<ReadBufferFromFileBase> impl;
|
||||
|
||||
if (!current_key_with_info->path_in_archive.has_value())
|
||||
{
|
||||
impl = std::make_unique<ReadBufferFromS3>(
|
||||
configuration.client,
|
||||
configuration.url.bucket,
|
||||
current_key_with_info->key,
|
||||
configuration.url.version_id,
|
||||
configuration.request_settings,
|
||||
getContext()->getReadSettings());
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(current_key_with_info->archive_reader);
|
||||
impl = current_key_with_info->archive_reader->readFile(
|
||||
current_key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true);
|
||||
}
|
||||
if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof())
|
||||
{
|
||||
first = false;
|
||||
return {wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max), std::nullopt, format};
|
||||
return {
|
||||
wrapReadBufferWithCompressionMethod(
|
||||
std::move(impl),
|
||||
current_key_with_info->path_in_archive.has_value()
|
||||
? chooseCompressionMethod(current_key_with_info->path_in_archive.value(), configuration.compression_method)
|
||||
: chooseCompressionMethod(current_key_with_info->key, configuration.compression_method),
|
||||
zstd_window_log_max),
|
||||
std::nullopt,
|
||||
format};
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1849,7 +2080,8 @@ namespace
|
||||
if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
|
||||
return;
|
||||
|
||||
String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key;
|
||||
String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort()))
|
||||
/ configuration.url.bucket / current_key_with_info->getPath();
|
||||
auto key = getKeyForSchemaCache(source, *format, format_settings, getContext());
|
||||
StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows);
|
||||
}
|
||||
@ -1860,7 +2092,8 @@ namespace
|
||||
|| getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
|
||||
return;
|
||||
|
||||
String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key;
|
||||
String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort()))
|
||||
/ configuration.url.bucket / current_key_with_info->getPath();
|
||||
auto cache_key = getKeyForSchemaCache(source, *format, format_settings, getContext());
|
||||
StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns);
|
||||
}
|
||||
@ -1874,7 +2107,11 @@ namespace
|
||||
auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket;
|
||||
Strings sources;
|
||||
sources.reserve(read_keys.size());
|
||||
std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem->key; });
|
||||
std::transform(
|
||||
read_keys.begin(),
|
||||
read_keys.end(),
|
||||
std::back_inserter(sources),
|
||||
[&](const auto & elem) { return host_and_bucket / elem->getPath(); });
|
||||
auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext());
|
||||
StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
|
||||
}
|
||||
@ -1887,7 +2124,7 @@ namespace
|
||||
String getLastFileName() const override
|
||||
{
|
||||
if (current_key_with_info)
|
||||
return current_key_with_info->key;
|
||||
return current_key_with_info->getPath();
|
||||
return "";
|
||||
}
|
||||
|
||||
@ -1903,8 +2140,7 @@ namespace
|
||||
|
||||
private:
|
||||
std::optional<ColumnsDescription> tryGetColumnsFromCache(
|
||||
const StorageS3::KeysWithInfo::const_iterator & begin,
|
||||
const StorageS3::KeysWithInfo::const_iterator & end)
|
||||
const StorageS3Source::KeysWithInfo::const_iterator & begin, const StorageS3Source::KeysWithInfo::const_iterator & end)
|
||||
{
|
||||
auto context = getContext();
|
||||
if (!context->getSettingsRef().schema_inference_use_cache_for_s3)
|
||||
@ -1937,8 +2173,8 @@ namespace
|
||||
|
||||
return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt;
|
||||
};
|
||||
String path = fs::path(configuration.url.bucket) / (*it)->getPath();
|
||||
|
||||
String path = fs::path(configuration.url.bucket) / (*it)->key;
|
||||
String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path;
|
||||
|
||||
if (format)
|
||||
@ -1951,7 +2187,7 @@ namespace
|
||||
{
|
||||
/// If format is unknown, we can iterate through all possible input formats
|
||||
/// and check if we have an entry with this format and this file in schema cache.
|
||||
/// If we have such entry for some format, we can use this format to read the file.
|
||||
/// If we have such entry fcreateor some format, we can use this format to read the file.
|
||||
for (const auto & format_name : FormatFactory::instance().getAllInputFormats())
|
||||
{
|
||||
auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context);
|
||||
@ -1978,11 +2214,9 @@ namespace
|
||||
bool first = true;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
std::pair<ColumnsDescription, String> StorageS3::getTableStructureAndFormatFromDataImpl(
|
||||
std::optional<String> format,
|
||||
const Configuration & configuration,
|
||||
const StorageS3::Configuration & configuration,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
const ContextPtr & ctx)
|
||||
{
|
||||
@ -2071,7 +2305,6 @@ SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx)
|
||||
static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS));
|
||||
return schema_cache;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,7 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <memory>
|
||||
#include <IO/ReadBufferFromS3.h>
|
||||
#include "IO/Archives/IArchiveReader.h"
|
||||
#include "IO/Archives/createArchiveReader.h"
|
||||
#include "IO/ReadBuffer.h"
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <Compression/CompressionInfo.h>
|
||||
@ -23,36 +26,52 @@
|
||||
#include <Poco/URI.h>
|
||||
#include <Common/threadPoolCallbackRunner.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
class PullingPipelineExecutor;
|
||||
class NamedCollection;
|
||||
|
||||
class StorageS3Source : public SourceWithKeyCondition, WithContext
|
||||
{
|
||||
public:
|
||||
|
||||
struct KeyWithInfo
|
||||
{
|
||||
KeyWithInfo() = default;
|
||||
|
||||
explicit KeyWithInfo(String key_, std::optional<S3::ObjectInfo> info_ = std::nullopt)
|
||||
: key(std::move(key_)), info(std::move(info_)) {}
|
||||
explicit KeyWithInfo(
|
||||
String key_,
|
||||
std::optional<S3::ObjectInfo> info_ = std::nullopt,
|
||||
std::optional<String> path_in_archive_ = std::nullopt,
|
||||
std::shared_ptr<IArchiveReader> archive_reader_ = nullptr)
|
||||
: key(std::move(key_))
|
||||
, info(std::move(info_))
|
||||
, path_in_archive(std::move(path_in_archive_))
|
||||
, archive_reader(std::move(archive_reader_))
|
||||
{
|
||||
if (path_in_archive.has_value() != (archive_reader != nullptr))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Archive reader and path in archive must exist simultaneously");
|
||||
}
|
||||
|
||||
virtual ~KeyWithInfo() = default;
|
||||
|
||||
String key;
|
||||
std::optional<S3::ObjectInfo> info;
|
||||
std::optional<String> path_in_archive;
|
||||
std::shared_ptr<IArchiveReader> archive_reader;
|
||||
|
||||
String getPath() const { return path_in_archive.has_value() ? (key + "::" + path_in_archive.value()) : key; }
|
||||
String getFileName() const { return path_in_archive.has_value() ? path_in_archive.value() : key; }
|
||||
};
|
||||
|
||||
using KeyWithInfoPtr = std::shared_ptr<KeyWithInfo>;
|
||||
|
||||
using KeysWithInfo = std::vector<KeyWithInfoPtr>;
|
||||
|
||||
class IIterator
|
||||
{
|
||||
public:
|
||||
@ -126,6 +145,41 @@ public:
|
||||
ReadTaskCallback callback;
|
||||
};
|
||||
|
||||
class ArchiveIterator : public IIterator, public WithContext
|
||||
{
|
||||
public:
|
||||
explicit ArchiveIterator(
|
||||
std::unique_ptr<IIterator> basic_iterator_,
|
||||
const std::string & archive_pattern_,
|
||||
std::shared_ptr<const S3::Client> client_,
|
||||
const String & bucket_,
|
||||
const String & version_id_,
|
||||
const S3Settings::RequestSettings & request_settings,
|
||||
ContextPtr context_,
|
||||
KeysWithInfo * read_keys_);
|
||||
|
||||
KeyWithInfoPtr next(size_t) override; /// NOLINT
|
||||
size_t estimatedKeysCount() override;
|
||||
void refreshArchiveReader();
|
||||
|
||||
private:
|
||||
std::unique_ptr<IIterator> basic_iterator;
|
||||
KeyWithInfoPtr basic_key_with_info_ptr;
|
||||
std::unique_ptr<ReadBufferFromFileBase> basic_read_buffer;
|
||||
std::shared_ptr<IArchiveReader> archive_reader{nullptr};
|
||||
std::unique_ptr<IArchiveReader::FileEnumerator> file_enumerator = nullptr;
|
||||
std::string path_in_archive = {}; // used when reading a single file from archive
|
||||
IArchiveReader::NameFilter filter = {}; // used when files inside archive are defined with a glob
|
||||
std::shared_ptr<const S3::Client> client;
|
||||
const String bucket;
|
||||
const String version_id;
|
||||
S3Settings::RequestSettings request_settings;
|
||||
std::mutex take_next_mutex;
|
||||
KeysWithInfo * read_keys;
|
||||
};
|
||||
|
||||
friend StorageS3Source::ArchiveIterator;
|
||||
|
||||
StorageS3Source(
|
||||
const ReadFromFormatInfo & info,
|
||||
const String & format,
|
||||
@ -194,10 +248,7 @@ private:
|
||||
ReaderHolder(const ReaderHolder & other) = delete;
|
||||
ReaderHolder & operator=(const ReaderHolder & other) = delete;
|
||||
|
||||
ReaderHolder(ReaderHolder && other) noexcept
|
||||
{
|
||||
*this = std::move(other);
|
||||
}
|
||||
ReaderHolder(ReaderHolder && other) noexcept { *this = std::move(other); }
|
||||
|
||||
ReaderHolder & operator=(ReaderHolder && other) noexcept
|
||||
{
|
||||
@ -215,8 +266,9 @@ private:
|
||||
explicit operator bool() const { return reader != nullptr; }
|
||||
PullingPipelineExecutor * operator->() { return reader.get(); }
|
||||
const PullingPipelineExecutor * operator->() const { return reader.get(); }
|
||||
String getPath() const { return fs::path(bucket) / key_with_info->key; }
|
||||
const String & getFile() const { return key_with_info->key; }
|
||||
String getPath() const { return bucket + "/" + key_with_info->getPath(); }
|
||||
String getFile() const { return key_with_info->getFileName(); }
|
||||
bool isArchive() { return key_with_info->path_in_archive.has_value(); }
|
||||
const KeyWithInfo & getKeyWithInfo() const { return *key_with_info; }
|
||||
std::optional<size_t> getFileSize() const { return key_with_info->info ? std::optional(key_with_info->info->size) : std::nullopt; }
|
||||
|
||||
@ -255,10 +307,7 @@ private:
|
||||
ReaderHolder createReader(size_t idx = 0);
|
||||
std::future<ReaderHolder> createReaderAsync(size_t idx = 0);
|
||||
|
||||
std::unique_ptr<ReadBuffer> createS3ReadBuffer(const String & key, size_t object_size);
|
||||
std::unique_ptr<ReadBuffer> createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size);
|
||||
|
||||
void addNumRowsToCache(const String & key, size_t num_rows);
|
||||
void addNumRowsToCache(const String & bucket_with_key, size_t num_rows);
|
||||
std::optional<size_t> tryGetNumRowsFromCache(const KeyWithInfo & key_with_info);
|
||||
};
|
||||
|
||||
@ -285,8 +334,7 @@ public:
|
||||
bool withPartitionWildcard() const
|
||||
{
|
||||
static const String PARTITION_ID_WILDCARD = "{_partition_id}";
|
||||
return url.bucket.find(PARTITION_ID_WILDCARD) != String::npos
|
||||
|| keys.back().find(PARTITION_ID_WILDCARD) != String::npos;
|
||||
return url.bucket.find(PARTITION_ID_WILDCARD) != String::npos || keys.back().find(PARTITION_ID_WILDCARD) != String::npos;
|
||||
}
|
||||
|
||||
bool withGlobsIgnorePartitionWildcard() const;
|
||||
@ -315,10 +363,7 @@ public:
|
||||
bool distributed_processing_ = false,
|
||||
ASTPtr partition_by_ = nullptr);
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
String getName() const override { return name; }
|
||||
|
||||
void read(
|
||||
QueryPlan & query_plan,
|
||||
@ -330,27 +375,25 @@ public:
|
||||
size_t max_block_size,
|
||||
size_t num_streams) override;
|
||||
|
||||
SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override;
|
||||
SinkToStoragePtr
|
||||
write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override;
|
||||
|
||||
void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override;
|
||||
void truncate(
|
||||
const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override;
|
||||
|
||||
bool supportsPartitionBy() const override;
|
||||
|
||||
static void processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection);
|
||||
static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection);
|
||||
|
||||
static SchemaCache & getSchemaCache(const ContextPtr & ctx);
|
||||
|
||||
static StorageS3::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file = true);
|
||||
static Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file = true);
|
||||
|
||||
static ColumnsDescription getTableStructureFromData(
|
||||
const StorageS3::Configuration & configuration,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
const ContextPtr & ctx);
|
||||
const Configuration & configuration_, const std::optional<FormatSettings> & format_settings_, const ContextPtr & ctx);
|
||||
|
||||
static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromData(
|
||||
const StorageS3::Configuration & configuration,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
const ContextPtr & ctx);
|
||||
const Configuration & configuration, const std::optional<FormatSettings> & format_settings, const ContextPtr & ctx);
|
||||
|
||||
using KeysWithInfo = StorageS3Source::KeysWithInfo;
|
||||
|
||||
@ -363,7 +406,9 @@ protected:
|
||||
|
||||
void useConfiguration(const Configuration & new_configuration);
|
||||
|
||||
const Configuration & getConfiguration();
|
||||
Configuration getConfigurationCopy() const;
|
||||
|
||||
String getFormatCopy() const;
|
||||
|
||||
private:
|
||||
friend class StorageS3Cluster;
|
||||
@ -372,7 +417,7 @@ private:
|
||||
friend class ReadFromStorageS3Step;
|
||||
|
||||
Configuration configuration;
|
||||
std::mutex configuration_update_mutex;
|
||||
mutable std::mutex configuration_update_mutex;
|
||||
|
||||
String name;
|
||||
const bool distributed_processing;
|
||||
@ -394,6 +439,24 @@ private:
|
||||
bool parallelizeOutputAfterReading(ContextPtr context) const override;
|
||||
};
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> createS3ReadBuffer(
|
||||
const String & key,
|
||||
size_t object_size,
|
||||
std::shared_ptr<const Context> context,
|
||||
std::shared_ptr<const S3::Client> client_ptr,
|
||||
const String & bucket,
|
||||
const String & version_id,
|
||||
const S3Settings::RequestSettings & request_settings);
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> createAsyncS3ReadBuffer(
|
||||
const String & key,
|
||||
const ReadSettings & read_settings,
|
||||
size_t object_size,
|
||||
std::shared_ptr<const Context> context,
|
||||
std::shared_ptr<const S3::Client> client_ptr,
|
||||
const String & bucket,
|
||||
const String & version_id,
|
||||
const S3Settings::RequestSettings & request_settings);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -91,7 +91,14 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context)
|
||||
RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
|
||||
{
|
||||
auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
|
||||
*s3_configuration.client, s3_configuration.url, predicate, getVirtualsList(), context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback());
|
||||
*s3_configuration.client,
|
||||
s3_configuration.url,
|
||||
predicate,
|
||||
getVirtualsList(),
|
||||
context,
|
||||
nullptr,
|
||||
s3_configuration.request_settings,
|
||||
context->getFileProgressCallback());
|
||||
|
||||
auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String
|
||||
{
|
||||
|
@ -216,7 +216,19 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
|
||||
configuration.auth_settings.no_sign_request = no_sign_request;
|
||||
|
||||
if (configuration.format == "auto")
|
||||
configuration.format = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(url).getPath()).value_or("auto");
|
||||
{
|
||||
if (configuration.url.archive_pattern.has_value())
|
||||
{
|
||||
configuration.format = FormatFactory::instance()
|
||||
.tryGetFormatFromFileName(Poco::URI(configuration.url.archive_pattern.value()).getPath())
|
||||
.value_or("auto");
|
||||
}
|
||||
else
|
||||
{
|
||||
configuration.format
|
||||
= FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(configuration.url.uri_str).getPath()).value_or("auto");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
configuration.keys = {configuration.url.key};
|
||||
|
@ -75,6 +75,12 @@ def get_options(i: int, upgrade_check: bool) -> str:
|
||||
if not upgrade_check:
|
||||
client_options.append("ignore_drop_queries_probability=0.5")
|
||||
|
||||
if random.random() < 0.2:
|
||||
client_options.append("allow_experimental_parallel_reading_from_replicas=1")
|
||||
client_options.append("max_parallel_replicas=3")
|
||||
client_options.append("cluster_for_parallel_replicas='parallel_replicas'")
|
||||
client_options.append("parallel_replicas_for_non_replicated_merge_tree=1")
|
||||
|
||||
if client_options:
|
||||
options.append(" --client-option " + " ".join(client_options))
|
||||
|
||||
|
@ -300,11 +300,44 @@ list_children () {
|
||||
echo "$children"
|
||||
}
|
||||
|
||||
while true; do
|
||||
runner_pid=$(pgrep Runner.Listener)
|
||||
echo "Got runner pid '$runner_pid'"
|
||||
# There's possibility that it fails because the runner's version is outdated,
|
||||
# so after the first failure we'll try to launch it with enabled autoupdate.
|
||||
#
|
||||
# We'll fail and terminate after 10 consequent failures.
|
||||
ATTEMPT=0
|
||||
# In `kill` 0 means "all processes in process group", -1 is "all but PID 1"
|
||||
# We use `-2` to get an error
|
||||
RUNNER_PID=-2
|
||||
|
||||
while true; do
|
||||
# Does not send signal, but checks that the process $RUNNER_PID is running
|
||||
if kill -0 -- $RUNNER_PID; then
|
||||
ATTEMPT=0
|
||||
echo "Runner is working with pid $RUNNER_PID, checking the metadata in background"
|
||||
check_proceed_spot_termination
|
||||
|
||||
if ! is_job_assigned; then
|
||||
RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$RUNNER_PID" 2>/dev/null || date +%s) ))
|
||||
echo "The runner is launched $RUNNER_AGE seconds ago and still hasn't received a job"
|
||||
if (( 60 < RUNNER_AGE )); then
|
||||
echo "Attempt to delete the runner for a graceful shutdown"
|
||||
sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \
|
||||
|| continue
|
||||
echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down"
|
||||
terminate_and_exit
|
||||
fi
|
||||
fi
|
||||
else
|
||||
if [ "$RUNNER_PID" != "-2" ]; then
|
||||
wait $RUNNER_PID \
|
||||
&& echo "Runner with PID $RUNNER_PID successfully finished" \
|
||||
|| echo "Attempt $((++ATTEMPT)) to start the runner"
|
||||
fi
|
||||
if (( ATTEMPT > 10 )); then
|
||||
echo "The runner has failed to start after $ATTEMPT attempt. Give up and terminate it"
|
||||
terminate_and_exit
|
||||
fi
|
||||
|
||||
if [ -z "$runner_pid" ]; then
|
||||
cd $RUNNER_HOME || terminate_and_exit
|
||||
detect_delayed_termination
|
||||
# If runner is not active, check that it needs to terminate itself
|
||||
@ -314,37 +347,50 @@ while true; do
|
||||
check_proceed_spot_termination force
|
||||
|
||||
echo "Going to configure runner"
|
||||
sudo -u ubuntu ./config.sh --url $RUNNER_URL --token "$(get_runner_token)" \
|
||||
--ephemeral --disableupdate --unattended \
|
||||
--runnergroup Default --labels "$LABELS" --work _work --name "$INSTANCE_ID"
|
||||
token_args=(--token "$(get_runner_token)")
|
||||
config_args=(
|
||||
"${token_args[@]}" --url "$RUNNER_URL"
|
||||
--ephemeral --unattended --replace --runnergroup Default
|
||||
--labels "$LABELS" --work _work --name "$INSTANCE_ID"
|
||||
)
|
||||
if (( ATTEMPT > 1 )); then
|
||||
echo 'The runner failed to start at least once. Removing it and then configuring with autoupdate enabled.'
|
||||
sudo -u ubuntu ./config.sh remove "${token_args[@]}"
|
||||
sudo -u ubuntu ./config.sh "${config_args[@]}"
|
||||
else
|
||||
echo "Configure runner with disabled autoupdate"
|
||||
config_args+=("--disableupdate")
|
||||
sudo -u ubuntu ./config.sh "${config_args[@]}"
|
||||
fi
|
||||
|
||||
echo "Another one check to avoid race between runner and infrastructure"
|
||||
no_terminating_metadata || terminate_on_event
|
||||
check_spot_instance_is_old && terminate_and_exit
|
||||
check_proceed_spot_termination force
|
||||
|
||||
# There were some failures to start the Job because of trash in _work
|
||||
rm -rf _work
|
||||
|
||||
# https://github.com/actions/runner/issues/3266
|
||||
# We're unable to know if the runner is failed to start.
|
||||
echo 'Monkey-patching run helpers to get genuine exit code of the runner'
|
||||
for script in run.sh run-helper.sh.template; do
|
||||
# shellcheck disable=SC2016
|
||||
grep -q 'exit 0$' "$script" && \
|
||||
sed 's/exit 0/exit $returnCode/' -i "$script" && \
|
||||
echo "Script $script is patched"
|
||||
done
|
||||
|
||||
echo "Run"
|
||||
sudo -u ubuntu \
|
||||
ACTIONS_RUNNER_HOOK_JOB_STARTED=/tmp/actions-hooks/pre-run.sh \
|
||||
ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/tmp/actions-hooks/post-run.sh \
|
||||
./run.sh &
|
||||
sleep 10
|
||||
else
|
||||
echo "Runner is working with pid $runner_pid, checking the metadata in background"
|
||||
check_proceed_spot_termination
|
||||
RUNNER_PID=$!
|
||||
|
||||
if ! is_job_assigned; then
|
||||
RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$runner_pid" 2>/dev/null || date +%s) ))
|
||||
echo "The runner is launched $RUNNER_AGE seconds ago and still has hot received the job"
|
||||
if (( 60 < RUNNER_AGE )); then
|
||||
echo "Attempt to delete the runner for a graceful shutdown"
|
||||
sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \
|
||||
|| continue
|
||||
echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down"
|
||||
terminate_and_exit
|
||||
fi
|
||||
fi
|
||||
sleep 10
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
|
||||
|
@ -9,7 +9,7 @@ set -xeuo pipefail
|
||||
|
||||
echo "Running prepare script"
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
export RUNNER_VERSION=2.315.0
|
||||
export RUNNER_VERSION=2.316.1
|
||||
export RUNNER_HOME=/home/ubuntu/actions-runner
|
||||
|
||||
deb_arch() {
|
||||
@ -155,11 +155,10 @@ apt-get install tailscale --yes --no-install-recommends
|
||||
|
||||
# Create a common script for the instances
|
||||
mkdir /usr/local/share/scripts -p
|
||||
cat > /usr/local/share/scripts/init-network.sh << 'EOF'
|
||||
#!/usr/bin/env bash
|
||||
|
||||
setup_cloudflare_dns() {
|
||||
# Add cloudflare DNS as a fallback
|
||||
# Get default gateway interface
|
||||
local IFACE ETH_DNS CLOUDFLARE_NS new_dns
|
||||
IFACE=$(ip --json route list | jq '.[]|select(.dst == "default").dev' --raw-output)
|
||||
# `Link 2 (eth0): 172.31.0.2`
|
||||
ETH_DNS=$(resolvectl dns "$IFACE") || :
|
||||
@ -171,15 +170,41 @@ if [[ "$ETH_DNS" ]] && [[ "${ETH_DNS#*: }" != *"$CLOUDFLARE_NS"* ]]; then
|
||||
new_dns=(${ETH_DNS} "$CLOUDFLARE_NS")
|
||||
resolvectl dns "$IFACE" "${new_dns[@]}"
|
||||
fi
|
||||
}
|
||||
|
||||
setup_tailscale() {
|
||||
# Setup tailscale, the very first action
|
||||
local TS_API_CLIENT_ID TS_API_CLIENT_SECRET TS_AUTHKEY RUNNER_TYPE
|
||||
TS_API_CLIENT_ID=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-id --query 'Parameter.Value' --output text --with-decryption)
|
||||
TS_API_CLIENT_SECRET=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-secret --query 'Parameter.Value' --output text --with-decryption)
|
||||
export TS_API_CLIENT_ID TS_API_CLIENT_SECRET
|
||||
TS_AUTHKEY=$(get-authkey -tags tag:svc-core-ci-github -reusable -ephemeral)
|
||||
tailscale up --ssh --auth-key="$TS_AUTHKEY" --hostname="ci-runner-$INSTANCE_ID"
|
||||
|
||||
RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text)
|
||||
RUNNER_TYPE=${RUNNER_TYPE:-unknown}
|
||||
# Clean possible garbage from the runner type
|
||||
RUNNER_TYPE=${RUNNER_TYPE//[^0-9a-z]/-}
|
||||
TS_AUTHKEY=$(TS_API_CLIENT_ID="$TS_API_CLIENT_ID" TS_API_CLIENT_SECRET="$TS_API_CLIENT_SECRET" \
|
||||
get-authkey -tags tag:svc-core-ci-github -ephemeral)
|
||||
tailscale up --ssh --auth-key="$TS_AUTHKEY" --hostname="ci-runner-$RUNNER_TYPE-$INSTANCE_ID"
|
||||
}
|
||||
|
||||
cat > /usr/local/share/scripts/init-network.sh << EOF
|
||||
!/usr/bin/env bash
|
||||
$(declare -f setup_cloudflare_dns)
|
||||
|
||||
$(declare -f setup_tailscale)
|
||||
|
||||
# If the script is sourced, it will return now and won't execute functions
|
||||
return 0 &>/dev/null || :
|
||||
|
||||
echo Setup Cloudflare DNS
|
||||
setup_cloudflare_dns
|
||||
|
||||
echo Setup Tailscale VPN
|
||||
setup_tailscale
|
||||
EOF
|
||||
|
||||
chmod +x /usr/local/share/scripts/init-network.sh
|
||||
|
||||
|
||||
# The following line is used in aws TOE check.
|
||||
touch /var/tmp/clickhouse-ci-ami.success
|
||||
|
@ -2549,15 +2549,15 @@ def reportLogStats(args):
|
||||
WITH
|
||||
240 AS mins,
|
||||
(
|
||||
SELECT (count(), sum(length(message)))
|
||||
SELECT (count(), sum(length(toValidUTF8(message))))
|
||||
FROM system.text_log
|
||||
WHERE (now() - toIntervalMinute(mins)) < event_time
|
||||
) AS total
|
||||
SELECT
|
||||
count() AS count,
|
||||
round(count / (total.1), 3) AS `count_%`,
|
||||
formatReadableSize(sum(length(message))) AS size,
|
||||
round(sum(length(message)) / (total.2), 3) AS `size_%`,
|
||||
formatReadableSize(sum(length(toValidUTF8(message)))) AS size,
|
||||
round(sum(length(toValidUTF8(message))) / (total.2), 3) AS `size_%`,
|
||||
countDistinct(logger_name) AS uniq_loggers,
|
||||
countDistinct(thread_id) AS uniq_threads,
|
||||
groupArrayDistinct(toString(level)) AS levels,
|
||||
@ -2580,8 +2580,8 @@ def reportLogStats(args):
|
||||
240 AS mins
|
||||
SELECT
|
||||
count() AS count,
|
||||
substr(replaceRegexpAll(message, '[^A-Za-z]+', ''), 1, 32) AS pattern,
|
||||
substr(any(message), 1, 256) as runtime_message,
|
||||
substr(replaceRegexpAll(toValidUTF8(message), '[^A-Za-z]+', ''), 1, 32) AS pattern,
|
||||
substr(any(toValidUTF8(message)), 1, 256) as runtime_message,
|
||||
any((extract(source_file, '/[a-zA-Z0-9_]+\\.[a-z]+'), source_line)) as line
|
||||
FROM system.text_log
|
||||
WHERE (now() - toIntervalMinute(mins)) < event_time AND message_format_string = ''
|
||||
@ -2596,7 +2596,7 @@ def reportLogStats(args):
|
||||
print("\n")
|
||||
|
||||
query = """
|
||||
SELECT message_format_string, count(), any(message) AS any_message
|
||||
SELECT message_format_string, count(), any(toValidUTF8(message)) AS any_message
|
||||
FROM system.text_log
|
||||
WHERE (now() - toIntervalMinute(240)) < event_time
|
||||
AND (message NOT LIKE (replaceRegexpAll(message_format_string, '{[:.0-9dfx]*}', '%') AS s))
|
||||
@ -2631,8 +2631,8 @@ def reportLogStats(args):
|
||||
'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}',
|
||||
'Attempt to read after eof', 'String size is too big ({}), maximum: {}'
|
||||
) AS known_short_messages
|
||||
SELECT count() AS c, message_format_string, substr(any(message), 1, 120),
|
||||
min(if(length(regexpExtract(message, '(.*)\\([A-Z0-9_]+\\)')) as prefix_len > 0, prefix_len, length(message)) - 26 AS length_without_exception_boilerplate) AS min_length_without_exception_boilerplate
|
||||
SELECT count() AS c, message_format_string, substr(any(toValidUTF8(message)), 1, 120),
|
||||
min(if(length(regexpExtract(toValidUTF8(message), '(.*)\\([A-Z0-9_]+\\)')) as prefix_len > 0, prefix_len, length(toValidUTF8(message))) - 26 AS length_without_exception_boilerplate) AS min_length_without_exception_boilerplate
|
||||
FROM system.text_log
|
||||
WHERE (now() - toIntervalMinute(240)) < event_time
|
||||
AND (length(message_format_string) < 16
|
||||
|
@ -42,7 +42,7 @@ def test_access_rights_for_function():
|
||||
function_resolution_error = instance.query_and_get_error("SELECT MySum(1, 2)")
|
||||
assert (
|
||||
"Unknown function MySum" in function_resolution_error
|
||||
or "Function with name 'MySum' does not exists." in function_resolution_error
|
||||
or "Function with name 'MySum' does not exist." in function_resolution_error
|
||||
)
|
||||
|
||||
instance.query("REVOKE CREATE FUNCTION ON *.* FROM A")
|
||||
|
71
tests/integration/test_intersecting_parts/test.py
Normal file
71
tests/integration/test_intersecting_parts/test.py
Normal file
@ -0,0 +1,71 @@
|
||||
import pytest
|
||||
import logging
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node = cluster.add_instance("node", with_zookeeper=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
|
||||
yield cluster
|
||||
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
# This test construct intersecting parts intentially. It's not a elegent test.
|
||||
# TODO(hanfei): write a test which select part 1_1 merging with part 2_2 and drop range.
|
||||
def test_intersect_parts_when_restart(started_cluster):
|
||||
node.query(
|
||||
"""
|
||||
CREATE TABLE data (
|
||||
key Int
|
||||
)
|
||||
ENGINE = ReplicatedMergeTree('/ch/tables/default/data', 'node')
|
||||
ORDER BY key;
|
||||
"""
|
||||
)
|
||||
node.query("system stop cleanup data")
|
||||
node.query("INSERT INTO data values (1)")
|
||||
node.query("INSERT INTO data values (2)")
|
||||
node.query("INSERT INTO data values (3)")
|
||||
node.query("INSERT INTO data values (4)")
|
||||
node.query("ALTER TABLE data DROP PART 'all_1_1_0'")
|
||||
node.query("ALTER TABLE data DROP PART 'all_2_2_0'")
|
||||
node.query("OPTIMIZE TABLE data FINAL")
|
||||
|
||||
part_path = node.query(
|
||||
"SELECT path FROM system.parts WHERE table = 'data' and name = 'all_0_3_1'"
|
||||
).strip()
|
||||
|
||||
assert len(part_path) != 0
|
||||
|
||||
node.query("detach table data")
|
||||
new_path = part_path[:-6] + "1_2_3"
|
||||
node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"cp -r {p} {p1}".format(p=part_path, p1=new_path),
|
||||
],
|
||||
privileged=True,
|
||||
)
|
||||
|
||||
# mock empty part
|
||||
node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"echo -n 0 > {p1}/count.txt".format(p1=new_path),
|
||||
],
|
||||
privileged=True,
|
||||
)
|
||||
|
||||
node.query("attach table data")
|
||||
data_size = node.query("SELECT sum(key) FROM data").strip()
|
||||
assert data_size == "5"
|
@ -47,24 +47,25 @@ def test_big_family(client: KeeperClient):
|
||||
|
||||
assert response == TSV(
|
||||
[
|
||||
["/test_big_family/1", "5"],
|
||||
["/test_big_family/2", "3"],
|
||||
["/test_big_family/2/3", "0"],
|
||||
["/test_big_family/2/2", "0"],
|
||||
["/test_big_family/2/1", "0"],
|
||||
["/test_big_family/1/5", "0"],
|
||||
["/test_big_family/1/4", "0"],
|
||||
["/test_big_family/1/3", "0"],
|
||||
["/test_big_family/1/2", "0"],
|
||||
["/test_big_family/1/1", "0"],
|
||||
["/test_big_family", "11"],
|
||||
["/test_big_family/1", "6"],
|
||||
["/test_big_family/2", "4"],
|
||||
["/test_big_family/2/3", "1"],
|
||||
["/test_big_family/2/2", "1"],
|
||||
["/test_big_family/2/1", "1"],
|
||||
["/test_big_family/1/5", "1"],
|
||||
["/test_big_family/1/4", "1"],
|
||||
["/test_big_family/1/3", "1"],
|
||||
["/test_big_family/1/2", "1"],
|
||||
]
|
||||
)
|
||||
|
||||
response = client.find_big_family("/test_big_family", 1)
|
||||
response = client.find_big_family("/test_big_family", 2)
|
||||
|
||||
assert response == TSV(
|
||||
[
|
||||
["/test_big_family/1", "5"],
|
||||
["/test_big_family", "11"],
|
||||
["/test_big_family/1", "6"],
|
||||
]
|
||||
)
|
||||
|
||||
|
@ -223,4 +223,4 @@ def test_corrupted_unexpected_part_ultimate():
|
||||
== "1\n"
|
||||
)
|
||||
|
||||
assert node.query("SELECT sum(key) FROM broken_table_3") == "190\n"
|
||||
assert node.query("SELECT sum(key) FROM broken_table_3") == "145\n"
|
||||
|
@ -2,7 +2,7 @@ import random
|
||||
import string
|
||||
|
||||
import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.cluster import ClickHouseCluster, is_arm
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
@ -255,6 +255,11 @@ def test_uncompressed_cache_plus_zstd_codec(start_cluster):
|
||||
|
||||
|
||||
def test_preconfigured_deflateqpl_codec(start_cluster):
|
||||
if is_arm():
|
||||
pytest.skip(
|
||||
"Skipping test because it's special test for Intel code (doesn't work on ARM)"
|
||||
)
|
||||
|
||||
node6.query(
|
||||
"""
|
||||
CREATE TABLE compression_codec_multiple_with_key (
|
||||
|
@ -5,4 +5,5 @@
|
||||
<initialization_retry_period>10</initialization_retry_period>
|
||||
</merge_tree>
|
||||
<max_database_replicated_create_table_thread_pool_size>50</max_database_replicated_create_table_thread_pool_size>
|
||||
<allow_experimental_transactions>42</allow_experimental_transactions>
|
||||
</clickhouse>
|
||||
|
@ -0,0 +1,18 @@
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<allow_drop_detached>1</allow_drop_detached>
|
||||
<allow_experimental_alter_materialized_view_structure>1</allow_experimental_alter_materialized_view_structure>
|
||||
<allow_experimental_object_type>0</allow_experimental_object_type>
|
||||
<allow_suspicious_codecs>0</allow_suspicious_codecs>
|
||||
|
||||
<throw_on_unsupported_query_inside_transaction>0</throw_on_unsupported_query_inside_transaction>
|
||||
<implicit_transaction>1</implicit_transaction>
|
||||
</default>
|
||||
</profiles>
|
||||
<users>
|
||||
<default>
|
||||
<profile>default</profile>
|
||||
</default>
|
||||
</users>
|
||||
</clickhouse>
|
@ -0,0 +1,17 @@
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<allow_drop_detached>1</allow_drop_detached>
|
||||
<allow_experimental_alter_materialized_view_structure>1</allow_experimental_alter_materialized_view_structure>
|
||||
<allow_experimental_object_type>0</allow_experimental_object_type>
|
||||
<allow_suspicious_codecs>0</allow_suspicious_codecs>
|
||||
|
||||
<throw_on_unsupported_query_inside_transaction>0</throw_on_unsupported_query_inside_transaction>
|
||||
</default>
|
||||
</profiles>
|
||||
<users>
|
||||
<default>
|
||||
<profile>default</profile>
|
||||
</default>
|
||||
</users>
|
||||
</clickhouse>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user