mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge branch 'master' into uninteresting-changes
This commit is contained in:
commit
f7ca8513d0
35
docs/en/operations/system-tables/detached_tables.md
Normal file
35
docs/en/operations/system-tables/detached_tables.md
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
---
|
||||||
|
slug: /en/operations/system-tables/detached_tables
|
||||||
|
---
|
||||||
|
# detached_tables
|
||||||
|
|
||||||
|
Contains information of each detached table.
|
||||||
|
|
||||||
|
Columns:
|
||||||
|
|
||||||
|
- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in.
|
||||||
|
|
||||||
|
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||||
|
|
||||||
|
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database).
|
||||||
|
|
||||||
|
- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system.
|
||||||
|
|
||||||
|
- `is_permanently` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag indicates that the table was detached PERMANENTLY.
|
||||||
|
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM system.detached_tables FORMAT Vertical;
|
||||||
|
```
|
||||||
|
|
||||||
|
```text
|
||||||
|
Row 1:
|
||||||
|
──────
|
||||||
|
database: base
|
||||||
|
table: t1
|
||||||
|
uuid: 81b1c20a-b7c6-4116-a2ce-7583fb6b6736
|
||||||
|
metadata_path: /var/lib/clickhouse/store/461/461cf698-fd0b-406d-8c01-5d8fd5748a91/t1.sql
|
||||||
|
is_permanently: 1
|
||||||
|
```
|
@ -567,12 +567,13 @@ While no standard or recommendation exists for the epoch of Snowflake IDs, imple
|
|||||||
**Syntax**
|
**Syntax**
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
generateSnowflakeID([expr])
|
generateSnowflakeID([expr, [machine_id]])
|
||||||
```
|
```
|
||||||
|
|
||||||
**Arguments**
|
**Arguments**
|
||||||
|
|
||||||
- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional.
|
- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional.
|
||||||
|
- `machine_id` — A machine ID, the lowest 10 bits are used. [Int64](../data-types/int-uint.md). Optional.
|
||||||
|
|
||||||
**Returned value**
|
**Returned value**
|
||||||
|
|
||||||
@ -608,6 +609,16 @@ SELECT generateSnowflakeID(1), generateSnowflakeID(2);
|
|||||||
└────────────────────────┴────────────────────────┘
|
└────────────────────────┴────────────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Example with expression and a machine ID**
|
||||||
|
|
||||||
|
```
|
||||||
|
SELECT generateSnowflakeID('expr', 1);
|
||||||
|
|
||||||
|
┌─generateSnowflakeID('expr', 1)─┐
|
||||||
|
│ 7201148511606784002 │
|
||||||
|
└────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
## snowflakeToDateTime
|
## snowflakeToDateTime
|
||||||
|
|
||||||
:::warning
|
:::warning
|
||||||
|
@ -82,14 +82,14 @@ FROM LEFT_RIGHT
|
|||||||
SELECT
|
SELECT
|
||||||
left,
|
left,
|
||||||
right,
|
right,
|
||||||
if(left < right, 'left is smaller than right', 'right is greater or equal than left') AS is_smaller
|
if(left < right, 'left is smaller than right', 'right is smaller or equal than left') AS is_smaller
|
||||||
FROM LEFT_RIGHT
|
FROM LEFT_RIGHT
|
||||||
WHERE isNotNull(left) AND isNotNull(right)
|
WHERE isNotNull(left) AND isNotNull(right)
|
||||||
|
|
||||||
┌─left─┬─right─┬─is_smaller──────────────────────────┐
|
┌─left─┬─right─┬─is_smaller──────────────────────────┐
|
||||||
│ 1 │ 3 │ left is smaller than right │
|
│ 1 │ 3 │ left is smaller than right │
|
||||||
│ 2 │ 2 │ right is greater or equal than left │
|
│ 2 │ 2 │ right is smaller or equal than left │
|
||||||
│ 3 │ 1 │ right is greater or equal than left │
|
│ 3 │ 1 │ right is smaller or equal than left │
|
||||||
└──────┴───────┴─────────────────────────────────────┘
|
└──────┴───────┴─────────────────────────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -186,6 +186,8 @@ void Client::parseConnectionsCredentials(Poco::Util::AbstractConfiguration & con
|
|||||||
history_file = home_path + "/" + history_file.substr(1);
|
history_file = home_path + "/" + history_file.substr(1);
|
||||||
config.setString("history_file", history_file);
|
config.setString("history_file", history_file);
|
||||||
}
|
}
|
||||||
|
if (config.has(prefix + ".accept-invalid-certificate"))
|
||||||
|
config.setBool("accept-invalid-certificate", config.getBool(prefix + ".accept-invalid-certificate"));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!connection_name.empty() && !connection_found)
|
if (!connection_name.empty() && !connection_found)
|
||||||
@ -277,6 +279,12 @@ void Client::initialize(Poco::Util::Application & self)
|
|||||||
else if (config().has("connection"))
|
else if (config().has("connection"))
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "--connection was specified, but config does not exist");
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "--connection was specified, but config does not exist");
|
||||||
|
|
||||||
|
if (config().has("accept-invalid-certificate"))
|
||||||
|
{
|
||||||
|
config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler");
|
||||||
|
config().setString("openSSL.client.verificationMode", "none");
|
||||||
|
}
|
||||||
|
|
||||||
/** getenv is thread-safe in Linux glibc and in all sane libc implementations.
|
/** getenv is thread-safe in Linux glibc and in all sane libc implementations.
|
||||||
* But the standard does not guarantee that subsequent calls will not rewrite the value by returned pointer.
|
* But the standard does not guarantee that subsequent calls will not rewrite the value by returned pointer.
|
||||||
*
|
*
|
||||||
@ -731,7 +739,7 @@ bool Client::processWithFuzzing(const String & full_query)
|
|||||||
}
|
}
|
||||||
if (auto *q = orig_ast->as<ASTSetQuery>())
|
if (auto *q = orig_ast->as<ASTSetQuery>())
|
||||||
{
|
{
|
||||||
if (auto *setDialect = q->changes.tryGet("dialect"); setDialect && setDialect->safeGet<String>() == "kusto")
|
if (auto *set_dialect = q->changes.tryGet("dialect"); set_dialect && set_dialect->safeGet<String>() == "kusto")
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
<!-- Config set into /etc/clickhouse-client/. It's used if no other configs are found. -->
|
<!-- Config set into /etc/clickhouse-client/. It's used if no other configs are found. -->
|
||||||
<config>
|
<config>
|
||||||
|
<!-- Shorthand for self-signed combination in openSSL section below: <accept-invalid-certificate>1</accept-invalid-certificate> -->
|
||||||
<openSSL>
|
<openSSL>
|
||||||
<client> <!-- Used for connection to server's secure tcp port -->
|
<client> <!-- Used for connection to server's secure tcp port -->
|
||||||
<loadDefaultCAFile>true</loadDefaultCAFile>
|
<loadDefaultCAFile>true</loadDefaultCAFile>
|
||||||
@ -72,6 +73,7 @@
|
|||||||
|
|
||||||
Default: "hostname" will be used. -->
|
Default: "hostname" will be used. -->
|
||||||
<name>default</name>
|
<name>default</name>
|
||||||
|
<!-- For self-signed server certificate when connecting to secure tcp: <accept-invalid-certificate>1</accept-invalid-certificate> -->
|
||||||
<!-- Host that will be used for connection. -->
|
<!-- Host that will be used for connection. -->
|
||||||
<hostname>127.0.0.1</hostname>
|
<hostname>127.0.0.1</hostname>
|
||||||
<port>9000</port>
|
<port>9000</port>
|
||||||
|
@ -1101,4 +1101,10 @@ void ColumnObject::finalize()
|
|||||||
checkObjectHasNoAmbiguosPaths(getKeys());
|
checkObjectHasNoAmbiguosPaths(getKeys());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ColumnObject::updateHashFast(SipHash & hash) const
|
||||||
|
{
|
||||||
|
for (const auto & entry : subcolumns)
|
||||||
|
for (auto & part : entry->data.data)
|
||||||
|
part->updateHashFast(hash);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -253,7 +253,7 @@ public:
|
|||||||
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
|
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
|
||||||
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
|
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
|
||||||
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
|
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
|
||||||
void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
|
void updateHashFast(SipHash & hash) const override;
|
||||||
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
|
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
|
||||||
bool hasEqualValues() const override { throwMustBeConcrete(); }
|
bool hasEqualValues() const override { throwMustBeConcrete(); }
|
||||||
size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); }
|
size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); }
|
||||||
|
184
src/Common/CollectionOfDerived.h
Normal file
184
src/Common/CollectionOfDerived.h
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <base/defines.h>
|
||||||
|
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <memory>
|
||||||
|
#include <typeindex>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This is a collections of objects derived from ItemBase.
|
||||||
|
* Collection contains no more than one instance for each derived type.
|
||||||
|
* The derived type is used to access the instance.
|
||||||
|
*/
|
||||||
|
|
||||||
|
template<class ItemBase>
|
||||||
|
class CollectionOfDerivedItems
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using Self = CollectionOfDerivedItems<ItemBase>;
|
||||||
|
using ItemPtr = std::shared_ptr<ItemBase>;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct Rec
|
||||||
|
{
|
||||||
|
std::type_index type_idx;
|
||||||
|
ItemPtr ptr;
|
||||||
|
|
||||||
|
bool operator<(const Rec & other) const
|
||||||
|
{
|
||||||
|
return type_idx < other.type_idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator<(const std::type_index & value) const
|
||||||
|
{
|
||||||
|
return type_idx < value;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const Rec & other) const
|
||||||
|
{
|
||||||
|
return type_idx == other.type_idx;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
using Records = std::vector<Rec>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
void swap(Self & other) noexcept
|
||||||
|
{
|
||||||
|
records.swap(other.records);
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
records.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool empty() const
|
||||||
|
{
|
||||||
|
return records.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size() const
|
||||||
|
{
|
||||||
|
return records.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
Self clone() const
|
||||||
|
{
|
||||||
|
Self result;
|
||||||
|
result.records.reserve(records.size());
|
||||||
|
for (const auto & rec : records)
|
||||||
|
result.records.emplace_back(rec.type_idx, rec.ptr->clone());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void append(Self && other)
|
||||||
|
{
|
||||||
|
auto middle_idx = records.size();
|
||||||
|
std::move(other.records.begin(), other.records.end(), std::back_inserter(records));
|
||||||
|
std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end());
|
||||||
|
chassert(isUniqTypes());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void add(std::shared_ptr<T> info)
|
||||||
|
{
|
||||||
|
static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
|
||||||
|
return addImpl(std::type_index(typeid(T)), std::move(info));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
std::shared_ptr<T> get() const
|
||||||
|
{
|
||||||
|
static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
|
||||||
|
auto it = getImpl(std::type_index(typeid(T)));
|
||||||
|
if (it == records.cend())
|
||||||
|
return nullptr;
|
||||||
|
auto cast = std::dynamic_pointer_cast<T>(it->ptr);
|
||||||
|
chassert(cast);
|
||||||
|
return cast;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
std::shared_ptr<T> extract()
|
||||||
|
{
|
||||||
|
static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
|
||||||
|
auto it = getImpl(std::type_index(typeid(T)));
|
||||||
|
if (it == records.cend())
|
||||||
|
return nullptr;
|
||||||
|
auto cast = std::dynamic_pointer_cast<T>(it->ptr);
|
||||||
|
chassert(cast);
|
||||||
|
|
||||||
|
records.erase(it);
|
||||||
|
return cast;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string debug() const
|
||||||
|
{
|
||||||
|
std::string result;
|
||||||
|
|
||||||
|
for (auto & rec : records)
|
||||||
|
{
|
||||||
|
result.append(rec.type_idx.name());
|
||||||
|
result.append(" ");
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool isUniqTypes() const
|
||||||
|
{
|
||||||
|
auto uniq_it = std::adjacent_find(records.begin(), records.end());
|
||||||
|
|
||||||
|
return uniq_it == records.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
void addImpl(std::type_index type_idx, ItemPtr item)
|
||||||
|
{
|
||||||
|
auto it = std::lower_bound(records.begin(), records.end(), type_idx);
|
||||||
|
|
||||||
|
if (it == records.end())
|
||||||
|
{
|
||||||
|
records.emplace_back(type_idx, item);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (it->type_idx == type_idx)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name());
|
||||||
|
|
||||||
|
|
||||||
|
records.emplace(it, type_idx, item);
|
||||||
|
|
||||||
|
chassert(isUniqTypes());
|
||||||
|
}
|
||||||
|
|
||||||
|
Records::const_iterator getImpl(std::type_index type_idx) const
|
||||||
|
{
|
||||||
|
auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx);
|
||||||
|
|
||||||
|
if (it == records.cend())
|
||||||
|
return records.cend();
|
||||||
|
|
||||||
|
if (it->type_idx != type_idx)
|
||||||
|
return records.cend();
|
||||||
|
|
||||||
|
return it;
|
||||||
|
}
|
||||||
|
|
||||||
|
Records records;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -36,7 +36,7 @@ class IColumn;
|
|||||||
M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\
|
M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\
|
||||||
M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \
|
M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \
|
||||||
M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \
|
M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \
|
||||||
M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \
|
M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \
|
||||||
M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \
|
M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \
|
||||||
M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
|
M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
|
||||||
M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
|
M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
|
||||||
@ -609,9 +609,8 @@ class IColumn;
|
|||||||
M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \
|
M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \
|
||||||
M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
|
M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
|
||||||
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \
|
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \
|
||||||
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
|
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \
|
||||||
M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \
|
M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \
|
||||||
M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \
|
|
||||||
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
|
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
|
||||||
M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \
|
M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \
|
||||||
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
|
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
|
||||||
@ -977,6 +976,7 @@ class IColumn;
|
|||||||
|
|
||||||
#define OBSOLETE_SETTINGS(M, ALIAS) \
|
#define OBSOLETE_SETTINGS(M, ALIAS) \
|
||||||
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
|
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
|
||||||
|
MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 0) \
|
||||||
MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \
|
MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \
|
||||||
MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \
|
MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \
|
||||||
MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \
|
MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \
|
||||||
|
@ -1306,6 +1306,10 @@ void BaseDaemon::setupWatchdog()
|
|||||||
int status = 0;
|
int status = 0;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
// Close log files to prevent keeping descriptors of unlinked rotated files.
|
||||||
|
// On next log write files will be reopened.
|
||||||
|
closeLogs(logger());
|
||||||
|
|
||||||
if (-1 != waitpid(pid, &status, WUNTRACED | WCONTINUED) || errno == ECHILD)
|
if (-1 != waitpid(pid, &status, WUNTRACED | WCONTINUED) || errno == ECHILD)
|
||||||
{
|
{
|
||||||
if (WIFSTOPPED(status))
|
if (WIFSTOPPED(status))
|
||||||
|
@ -39,8 +39,10 @@ namespace ErrorCodes
|
|||||||
class AtomicDatabaseTablesSnapshotIterator final : public DatabaseTablesSnapshotIterator
|
class AtomicDatabaseTablesSnapshotIterator final : public DatabaseTablesSnapshotIterator
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit AtomicDatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && base)
|
explicit AtomicDatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && base) noexcept
|
||||||
: DatabaseTablesSnapshotIterator(std::move(base)) {}
|
: DatabaseTablesSnapshotIterator(std::move(base))
|
||||||
|
{
|
||||||
|
}
|
||||||
UUID uuid() const override { return table()->getStorageID().uuid; }
|
UUID uuid() const override { return table()->getStorageID().uuid; }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -111,12 +113,12 @@ StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String &
|
|||||||
// it is important to call the destructors of not_in_use without
|
// it is important to call the destructors of not_in_use without
|
||||||
// locked mutex to avoid potential deadlock.
|
// locked mutex to avoid potential deadlock.
|
||||||
DetachedTables not_in_use;
|
DetachedTables not_in_use;
|
||||||
StoragePtr table;
|
StoragePtr detached_table;
|
||||||
{
|
{
|
||||||
std::lock_guard lock(mutex);
|
std::lock_guard lock(mutex);
|
||||||
table = DatabaseOrdinary::detachTableUnlocked(name);
|
detached_table = DatabaseOrdinary::detachTableUnlocked(name);
|
||||||
table_name_to_path.erase(name);
|
table_name_to_path.erase(name);
|
||||||
detached_tables.emplace(table->getStorageID().uuid, table);
|
detached_tables.emplace(detached_table->getStorageID().uuid, detached_table);
|
||||||
not_in_use = cleanupDetachedTables();
|
not_in_use = cleanupDetachedTables();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -126,7 +128,7 @@ StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String &
|
|||||||
LOG_DEBUG(log, "Finished removing not used detached tables");
|
LOG_DEBUG(log, "Finished removing not used detached tables");
|
||||||
}
|
}
|
||||||
|
|
||||||
return table;
|
return detached_table;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DatabaseAtomic::dropTable(ContextPtr local_context, const String & table_name, bool sync)
|
void DatabaseAtomic::dropTable(ContextPtr local_context, const String & table_name, bool sync)
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Databases/DatabasesCommon.h>
|
|
||||||
#include <Databases/DatabaseOrdinary.h>
|
#include <Databases/DatabaseOrdinary.h>
|
||||||
|
#include <Databases/DatabasesCommon.h>
|
||||||
|
#include <Storages/IStorage_fwd.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
|
@ -188,6 +188,13 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n
|
|||||||
|
|
||||||
it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name);
|
it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name);
|
||||||
|
|
||||||
|
LOG_DEBUG(log, "Add info for detached table {} to snapshot.", backQuote(table_name));
|
||||||
|
if (snapshot_detached_tables.contains(table_name))
|
||||||
|
{
|
||||||
|
LOG_DEBUG(log, "Clean info about detached table {} from snapshot.", backQuote(table_name));
|
||||||
|
snapshot_detached_tables.erase(table_name);
|
||||||
|
}
|
||||||
|
|
||||||
CurrentMetrics::add(CurrentMetrics::AttachedTable, 1);
|
CurrentMetrics::add(CurrentMetrics::AttachedTable, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,6 +211,15 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta
|
|||||||
if (it->second.expiration_iterator != cache_expiration_queue.end())
|
if (it->second.expiration_iterator != cache_expiration_queue.end())
|
||||||
cache_expiration_queue.erase(it->second.expiration_iterator);
|
cache_expiration_queue.erase(it->second.expiration_iterator);
|
||||||
tables_cache.erase(it);
|
tables_cache.erase(it);
|
||||||
|
LOG_DEBUG(log, "Add info for detached table {} to snapshot.", backQuote(table_name));
|
||||||
|
snapshot_detached_tables.emplace(
|
||||||
|
table_name,
|
||||||
|
SnapshotDetachedTable{
|
||||||
|
.database = res->getStorageID().database_name,
|
||||||
|
.table = res->getStorageID().table_name,
|
||||||
|
.uuid = res->getStorageID().uuid,
|
||||||
|
.metadata_path = getObjectMetadataPath(table_name),
|
||||||
|
.is_permanently = false});
|
||||||
|
|
||||||
CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1);
|
CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1);
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include <Storages/StorageFactory.h>
|
#include <Storages/StorageFactory.h>
|
||||||
#include <TableFunctions/TableFunctionFactory.h>
|
#include <TableFunctions/TableFunctionFactory.h>
|
||||||
#include <Common/CurrentMetrics.h>
|
#include <Common/CurrentMetrics.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
#include <Common/assert_cast.h>
|
#include <Common/assert_cast.h>
|
||||||
#include <Common/escapeForFileName.h>
|
#include <Common/escapeForFileName.h>
|
||||||
#include <Common/filesystemHelpers.h>
|
#include <Common/filesystemHelpers.h>
|
||||||
@ -308,6 +309,16 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
FS::createFile(detached_permanently_flag);
|
FS::createFile(detached_permanently_flag);
|
||||||
|
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end())
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
it->second.is_permanently = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (Exception & e)
|
catch (Exception & e)
|
||||||
{
|
{
|
||||||
|
@ -189,7 +189,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables
|
|||||||
size_t prev_tables_count = metadata.parsed_tables.size();
|
size_t prev_tables_count = metadata.parsed_tables.size();
|
||||||
size_t prev_total_dictionaries = metadata.total_dictionaries;
|
size_t prev_total_dictionaries = metadata.total_dictionaries;
|
||||||
|
|
||||||
auto process_metadata = [&metadata, is_startup, this](const String & file_name)
|
auto process_metadata = [&metadata, is_startup, local_context, this](const String & file_name)
|
||||||
{
|
{
|
||||||
fs::path path(getMetadataPath());
|
fs::path path(getMetadataPath());
|
||||||
fs::path file_path(file_name);
|
fs::path file_path(file_name);
|
||||||
@ -197,7 +197,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables
|
|||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false);
|
auto ast = parseQueryFromMetadata(log, local_context, full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false);
|
||||||
if (ast)
|
if (ast)
|
||||||
{
|
{
|
||||||
FunctionNameNormalizer::visit(ast.get());
|
FunctionNameNormalizer::visit(ast.get());
|
||||||
@ -226,8 +226,23 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables
|
|||||||
if (fs::exists(full_path.string() + detached_suffix))
|
if (fs::exists(full_path.string() + detached_suffix))
|
||||||
{
|
{
|
||||||
const std::string table_name = unescapeForFileName(file_name.substr(0, file_name.size() - 4));
|
const std::string table_name = unescapeForFileName(file_name.substr(0, file_name.size() - 4));
|
||||||
permanently_detached_tables.push_back(table_name);
|
|
||||||
LOG_DEBUG(log, "Skipping permanently detached table {}.", backQuote(table_name));
|
LOG_DEBUG(log, "Skipping permanently detached table {}.", backQuote(table_name));
|
||||||
|
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
permanently_detached_tables.push_back(table_name);
|
||||||
|
|
||||||
|
const auto detached_table_name = create_query->getTable();
|
||||||
|
|
||||||
|
snapshot_detached_tables.emplace(
|
||||||
|
detached_table_name,
|
||||||
|
SnapshotDetachedTable{
|
||||||
|
.database = create_query->getDatabase(),
|
||||||
|
.table = detached_table_name,
|
||||||
|
.uuid = create_query->uuid,
|
||||||
|
.metadata_path = getObjectMetadataPath(detached_table_name),
|
||||||
|
.is_permanently = true});
|
||||||
|
|
||||||
|
LOG_TRACE(log, "Add permanently detached table {} to system.detached_tables", detached_table_name);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -489,6 +504,12 @@ DatabaseTablesIteratorPtr DatabaseOrdinary::getTablesIterator(ContextPtr local_c
|
|||||||
return DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name, skip_not_loaded);
|
return DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name, skip_not_loaded);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DatabaseDetachedTablesSnapshotIteratorPtr DatabaseOrdinary::getDetachedTablesIterator(
|
||||||
|
ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const
|
||||||
|
{
|
||||||
|
return DatabaseWithOwnTablesBase::getDetachedTablesIterator(local_context, filter_by_table_name, skip_not_loaded);
|
||||||
|
}
|
||||||
|
|
||||||
Strings DatabaseOrdinary::getAllTableNames(ContextPtr) const
|
Strings DatabaseOrdinary::getAllTableNames(ContextPtr) const
|
||||||
{
|
{
|
||||||
std::set<String> unique_names;
|
std::set<String> unique_names;
|
||||||
|
@ -57,6 +57,9 @@ public:
|
|||||||
LoadTaskPtr startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode) override;
|
LoadTaskPtr startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode) override;
|
||||||
|
|
||||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override;
|
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override;
|
||||||
|
DatabaseDetachedTablesSnapshotIteratorPtr getDetachedTablesIterator(
|
||||||
|
ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override;
|
||||||
|
|
||||||
Strings getAllTableNames(ContextPtr context) const override;
|
Strings getAllTableNames(ContextPtr context) const override;
|
||||||
|
|
||||||
void alterTable(
|
void alterTable(
|
||||||
@ -64,7 +67,11 @@ public:
|
|||||||
const StorageID & table_id,
|
const StorageID & table_id,
|
||||||
const StorageInMemoryMetadata & metadata) override;
|
const StorageInMemoryMetadata & metadata) override;
|
||||||
|
|
||||||
Strings getNamesOfPermanentlyDetachedTables() const override { return permanently_detached_tables; }
|
Strings getNamesOfPermanentlyDetachedTables() const override
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
return permanently_detached_tables;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void commitAlterTable(
|
virtual void commitAlterTable(
|
||||||
@ -74,7 +81,7 @@ protected:
|
|||||||
const String & statement,
|
const String & statement,
|
||||||
ContextPtr query_context);
|
ContextPtr query_context);
|
||||||
|
|
||||||
Strings permanently_detached_tables;
|
Strings permanently_detached_tables TSA_GUARDED_BY(mutex);
|
||||||
|
|
||||||
std::unordered_map<String, LoadTaskPtr> load_table TSA_GUARDED_BY(mutex);
|
std::unordered_map<String, LoadTaskPtr> load_table TSA_GUARDED_BY(mutex);
|
||||||
std::unordered_map<String, LoadTaskPtr> startup_table TSA_GUARDED_BY(mutex);
|
std::unordered_map<String, LoadTaskPtr> startup_table TSA_GUARDED_BY(mutex);
|
||||||
|
@ -2,12 +2,9 @@
|
|||||||
|
|
||||||
#include <Backups/BackupEntriesCollector.h>
|
#include <Backups/BackupEntriesCollector.h>
|
||||||
#include <Backups/RestorerFromBackup.h>
|
#include <Backups/RestorerFromBackup.h>
|
||||||
#include <Common/typeid_cast.h>
|
|
||||||
#include <Common/CurrentMetrics.h>
|
|
||||||
#include <Common/escapeForFileName.h>
|
|
||||||
#include <Interpreters/InterpreterCreateQuery.h>
|
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
#include <Interpreters/DatabaseCatalog.h>
|
#include <Interpreters/DatabaseCatalog.h>
|
||||||
|
#include <Interpreters/InterpreterCreateQuery.h>
|
||||||
#include <Parsers/ASTCreateQuery.h>
|
#include <Parsers/ASTCreateQuery.h>
|
||||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||||
#include <Parsers/ParserCreateQuery.h>
|
#include <Parsers/ParserCreateQuery.h>
|
||||||
@ -16,6 +13,10 @@
|
|||||||
#include <Storages/StorageFactory.h>
|
#include <Storages/StorageFactory.h>
|
||||||
#include <Storages/Utils.h>
|
#include <Storages/Utils.h>
|
||||||
#include <TableFunctions/TableFunctionFactory.h>
|
#include <TableFunctions/TableFunctionFactory.h>
|
||||||
|
#include <Common/CurrentMetrics.h>
|
||||||
|
#include <Common/escapeForFileName.h>
|
||||||
|
#include <Common/logger_useful.h>
|
||||||
|
#include <Common/typeid_cast.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -237,6 +238,24 @@ DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(ContextPt
|
|||||||
return std::make_unique<DatabaseTablesSnapshotIterator>(std::move(filtered_tables), database_name);
|
return std::make_unique<DatabaseTablesSnapshotIterator>(std::move(filtered_tables), database_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DatabaseDetachedTablesSnapshotIteratorPtr DatabaseWithOwnTablesBase::getDetachedTablesIterator(
|
||||||
|
ContextPtr, const FilterByNameFunction & filter_by_table_name, bool /* skip_not_loaded */) const
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
if (!filter_by_table_name)
|
||||||
|
return std::make_unique<DatabaseDetachedTablesSnapshotIterator>(snapshot_detached_tables);
|
||||||
|
|
||||||
|
SnapshotDetachedTables filtered_detached_tables;
|
||||||
|
for (const auto & [detached_table_name, snapshot] : snapshot_detached_tables)
|
||||||
|
if (filter_by_table_name(detached_table_name))
|
||||||
|
{
|
||||||
|
filtered_detached_tables.emplace(detached_table_name, snapshot);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return std::make_unique<DatabaseDetachedTablesSnapshotIterator>(std::move(filtered_detached_tables));
|
||||||
|
}
|
||||||
|
|
||||||
bool DatabaseWithOwnTablesBase::empty() const
|
bool DatabaseWithOwnTablesBase::empty() const
|
||||||
{
|
{
|
||||||
std::lock_guard lock(mutex);
|
std::lock_guard lock(mutex);
|
||||||
@ -251,27 +270,36 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(ContextPtr /* context_ */, con
|
|||||||
|
|
||||||
StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_name)
|
StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_name)
|
||||||
{
|
{
|
||||||
StoragePtr res;
|
|
||||||
|
|
||||||
auto it = tables.find(table_name);
|
auto it = tables.find(table_name);
|
||||||
if (it == tables.end())
|
if (it == tables.end())
|
||||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
|
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
|
||||||
backQuote(database_name), backQuote(table_name));
|
backQuote(database_name), backQuote(table_name));
|
||||||
res = it->second;
|
|
||||||
|
auto table_storage = it->second;
|
||||||
|
|
||||||
|
snapshot_detached_tables.emplace(
|
||||||
|
table_name,
|
||||||
|
SnapshotDetachedTable{
|
||||||
|
.database = it->second->getStorageID().getDatabaseName(),
|
||||||
|
.table = table_name,
|
||||||
|
.uuid = it->second->getStorageID().uuid,
|
||||||
|
.metadata_path = getObjectMetadataPath(table_name),
|
||||||
|
.is_permanently = false});
|
||||||
|
|
||||||
tables.erase(it);
|
tables.erase(it);
|
||||||
res->is_detached = true;
|
table_storage->is_detached = true;
|
||||||
|
|
||||||
if (res->isSystemStorage() == false)
|
if (table_storage->isSystemStorage() == false)
|
||||||
CurrentMetrics::sub(getAttachedCounterForStorage(res), 1);
|
CurrentMetrics::sub(getAttachedCounterForStorage(table_storage), 1);
|
||||||
|
|
||||||
auto table_id = res->getStorageID();
|
auto table_id = table_storage->getStorageID();
|
||||||
if (table_id.hasUUID())
|
if (table_id.hasUUID())
|
||||||
{
|
{
|
||||||
assert(database_name == DatabaseCatalog::TEMPORARY_DATABASE || getUUID() != UUIDHelpers::Nil);
|
assert(database_name == DatabaseCatalog::TEMPORARY_DATABASE || getUUID() != UUIDHelpers::Nil);
|
||||||
DatabaseCatalog::instance().removeUUIDMapping(table_id.uuid);
|
DatabaseCatalog::instance().removeUUIDMapping(table_id.uuid);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return table_storage;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DatabaseWithOwnTablesBase::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &)
|
void DatabaseWithOwnTablesBase::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &)
|
||||||
@ -300,6 +328,8 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c
|
|||||||
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {} already exists.", table_id.getFullTableName());
|
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {} already exists.", table_id.getFullTableName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
snapshot_detached_tables.erase(table_name);
|
||||||
|
|
||||||
/// It is important to reset is_detached here since in case of RENAME in
|
/// It is important to reset is_detached here since in case of RENAME in
|
||||||
/// non-Atomic database the is_detached is set to true before RENAME.
|
/// non-Atomic database the is_detached is set to true before RENAME.
|
||||||
table->is_detached = false;
|
table->is_detached = false;
|
||||||
@ -337,6 +367,7 @@ void DatabaseWithOwnTablesBase::shutdown()
|
|||||||
|
|
||||||
std::lock_guard lock(mutex);
|
std::lock_guard lock(mutex);
|
||||||
tables.clear();
|
tables.clear();
|
||||||
|
snapshot_detached_tables.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
DatabaseWithOwnTablesBase::~DatabaseWithOwnTablesBase()
|
DatabaseWithOwnTablesBase::~DatabaseWithOwnTablesBase()
|
||||||
|
@ -37,6 +37,9 @@ public:
|
|||||||
|
|
||||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override;
|
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override;
|
||||||
|
|
||||||
|
DatabaseDetachedTablesSnapshotIteratorPtr
|
||||||
|
getDetachedTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override;
|
||||||
|
|
||||||
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override;
|
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override;
|
||||||
void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr<IRestoreCoordination> restore_coordination, UInt64 timeout_ms) override;
|
void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr<IRestoreCoordination> restore_coordination, UInt64 timeout_ms) override;
|
||||||
|
|
||||||
@ -46,12 +49,13 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
Tables tables TSA_GUARDED_BY(mutex);
|
Tables tables TSA_GUARDED_BY(mutex);
|
||||||
|
SnapshotDetachedTables snapshot_detached_tables TSA_GUARDED_BY(mutex);
|
||||||
LoggerPtr log;
|
LoggerPtr log;
|
||||||
|
|
||||||
DatabaseWithOwnTablesBase(const String & name_, const String & logger, ContextPtr context);
|
DatabaseWithOwnTablesBase(const String & name_, const String & logger, ContextPtr context);
|
||||||
|
|
||||||
void attachTableUnlocked(const String & table_name, const StoragePtr & table) TSA_REQUIRES(mutex);
|
void attachTableUnlocked(const String & table_name, const StoragePtr & table) TSA_REQUIRES(mutex);
|
||||||
StoragePtr detachTableUnlocked(const String & table_name) TSA_REQUIRES(mutex);
|
StoragePtr detachTableUnlocked(const String & table_name) TSA_REQUIRES(mutex);
|
||||||
StoragePtr getTableUnlocked(const String & table_name) const TSA_REQUIRES(mutex);
|
StoragePtr getTableUnlocked(const String & table_name) const TSA_REQUIRES(mutex);
|
||||||
StoragePtr tryGetTableNoWait(const String & table_name) const;
|
StoragePtr tryGetTableNoWait(const String & table_name) const;
|
||||||
};
|
};
|
||||||
|
@ -5,20 +5,22 @@
|
|||||||
#include <Interpreters/Context_fwd.h>
|
#include <Interpreters/Context_fwd.h>
|
||||||
#include <Interpreters/executeQuery.h>
|
#include <Interpreters/executeQuery.h>
|
||||||
#include <Parsers/IAST_fwd.h>
|
#include <Parsers/IAST_fwd.h>
|
||||||
|
#include <QueryPipeline/BlockIO.h>
|
||||||
|
#include <Storages/IStorage.h>
|
||||||
#include <Storages/IStorage_fwd.h>
|
#include <Storages/IStorage_fwd.h>
|
||||||
#include <base/types.h>
|
#include <base/types.h>
|
||||||
#include <Common/Exception.h>
|
|
||||||
#include <Common/AsyncLoader.h>
|
#include <Common/AsyncLoader.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
#include <Common/PoolId.h>
|
#include <Common/PoolId.h>
|
||||||
#include <Common/ThreadPool_fwd.h>
|
#include <Common/ThreadPool_fwd.h>
|
||||||
#include <QueryPipeline/BlockIO.h>
|
|
||||||
|
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
#include <stdexcept>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -110,6 +112,57 @@ public:
|
|||||||
|
|
||||||
using DatabaseTablesIteratorPtr = std::unique_ptr<IDatabaseTablesIterator>;
|
using DatabaseTablesIteratorPtr = std::unique_ptr<IDatabaseTablesIterator>;
|
||||||
|
|
||||||
|
struct SnapshotDetachedTable final
|
||||||
|
{
|
||||||
|
String database;
|
||||||
|
String table;
|
||||||
|
UUID uuid = UUIDHelpers::Nil;
|
||||||
|
String metadata_path;
|
||||||
|
bool is_permanently{};
|
||||||
|
};
|
||||||
|
|
||||||
|
class DatabaseDetachedTablesSnapshotIterator
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
SnapshotDetachedTables snapshot;
|
||||||
|
SnapshotDetachedTables::iterator it;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
DatabaseDetachedTablesSnapshotIterator(DatabaseDetachedTablesSnapshotIterator && other) noexcept
|
||||||
|
{
|
||||||
|
size_t idx = std::distance(other.snapshot.begin(), other.it);
|
||||||
|
std::swap(snapshot, other.snapshot);
|
||||||
|
other.it = other.snapshot.end();
|
||||||
|
it = snapshot.begin();
|
||||||
|
std::advance(it, idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit DatabaseDetachedTablesSnapshotIterator(const SnapshotDetachedTables & tables_) : snapshot(tables_), it(snapshot.begin())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit DatabaseDetachedTablesSnapshotIterator(SnapshotDetachedTables && tables_) : snapshot(std::move(tables_)), it(snapshot.begin())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void next() { ++it; }
|
||||||
|
|
||||||
|
bool isValid() const { return it != snapshot.end(); }
|
||||||
|
|
||||||
|
String database() const { return it->second.database; }
|
||||||
|
|
||||||
|
String table() const { return it->second.table; }
|
||||||
|
|
||||||
|
UUID uuid() const { return it->second.uuid; }
|
||||||
|
|
||||||
|
String metadataPath() const { return it->second.metadata_path; }
|
||||||
|
|
||||||
|
bool isPermanently() const { return it->second.is_permanently; }
|
||||||
|
};
|
||||||
|
|
||||||
|
using DatabaseDetachedTablesSnapshotIteratorPtr = std::unique_ptr<DatabaseDetachedTablesSnapshotIterator>;
|
||||||
|
|
||||||
|
|
||||||
/** Database engine.
|
/** Database engine.
|
||||||
* It is responsible for:
|
* It is responsible for:
|
||||||
@ -232,6 +285,12 @@ public:
|
|||||||
/// Wait for all tables to be loaded and started up. If `skip_not_loaded` is true, then not yet loaded or not yet started up (at the moment of iterator creation) tables are excluded.
|
/// Wait for all tables to be loaded and started up. If `skip_not_loaded` is true, then not yet loaded or not yet started up (at the moment of iterator creation) tables are excluded.
|
||||||
virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}, bool skip_not_loaded = false) const = 0; /// NOLINT
|
virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}, bool skip_not_loaded = false) const = 0; /// NOLINT
|
||||||
|
|
||||||
|
virtual DatabaseDetachedTablesSnapshotIteratorPtr getDetachedTablesIterator(
|
||||||
|
ContextPtr /*context*/, const FilterByNameFunction & /*filter_by_table_name = {}*/, bool /*skip_not_loaded = false*/) const
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get detached tables for Database{}", getEngineName());
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns list of table names.
|
/// Returns list of table names.
|
||||||
virtual Strings getAllTableNames(ContextPtr context) const
|
virtual Strings getAllTableNames(ContextPtr context) const
|
||||||
{
|
{
|
||||||
|
@ -4,10 +4,10 @@
|
|||||||
#include <Functions/FunctionHelpers.h>
|
#include <Functions/FunctionHelpers.h>
|
||||||
#include <Core/ServerUUID.h>
|
#include <Core/ServerUUID.h>
|
||||||
#include <Common/Logger.h>
|
#include <Common/Logger.h>
|
||||||
|
#include <Common/ErrorCodes.h>
|
||||||
#include <Common/logger_useful.h>
|
#include <Common/logger_useful.h>
|
||||||
#include "base/types.h"
|
#include "base/types.h"
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -96,10 +96,11 @@ struct SnowflakeIdRange
|
|||||||
/// 1. calculate Snowflake ID by current timestamp (`now`)
|
/// 1. calculate Snowflake ID by current timestamp (`now`)
|
||||||
/// 2. `begin = max(available, now)`
|
/// 2. `begin = max(available, now)`
|
||||||
/// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow
|
/// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow
|
||||||
SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count)
|
SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, uint64_t machine_id, size_t input_rows_count)
|
||||||
|
|
||||||
{
|
{
|
||||||
/// 1. `now`
|
/// 1. `now`
|
||||||
SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = getMachineId(), .machine_seq_num = 0};
|
SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = machine_id, .machine_seq_num = 0};
|
||||||
|
|
||||||
/// 2. `begin`
|
/// 2. `begin`
|
||||||
if (begin.timestamp <= available.timestamp)
|
if (begin.timestamp <= available.timestamp)
|
||||||
@ -128,13 +129,13 @@ struct Data
|
|||||||
/// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously.
|
/// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously.
|
||||||
static inline std::atomic<uint64_t> lowest_available_snowflake_id = 0;
|
static inline std::atomic<uint64_t> lowest_available_snowflake_id = 0;
|
||||||
|
|
||||||
SnowflakeId reserveRange(size_t input_rows_count)
|
SnowflakeId reserveRange(uint64_t machine_id, size_t input_rows_count)
|
||||||
{
|
{
|
||||||
uint64_t available_snowflake_id = lowest_available_snowflake_id.load();
|
uint64_t available_snowflake_id = lowest_available_snowflake_id.load();
|
||||||
SnowflakeIdRange range;
|
SnowflakeIdRange range;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count);
|
range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), machine_id, input_rows_count);
|
||||||
}
|
}
|
||||||
while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end)));
|
while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end)));
|
||||||
/// CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try
|
/// CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try
|
||||||
@ -165,24 +166,32 @@ public:
|
|||||||
{
|
{
|
||||||
FunctionArgumentDescriptors mandatory_args;
|
FunctionArgumentDescriptors mandatory_args;
|
||||||
FunctionArgumentDescriptors optional_args{
|
FunctionArgumentDescriptors optional_args{
|
||||||
{"expr", nullptr, nullptr, "Arbitrary expression"}
|
{"expr", nullptr, nullptr, "Arbitrary expression"},
|
||||||
|
{"machine_id", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeUInt), static_cast<FunctionArgumentDescriptor::ColumnValidator>(&isColumnConst), "const UInt*"}
|
||||||
};
|
};
|
||||||
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
|
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
|
||||||
|
|
||||||
return std::make_shared<DataTypeUInt64>();
|
return std::make_shared<DataTypeUInt64>();
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||||
{
|
{
|
||||||
auto col_res = ColumnVector<UInt64>::create();
|
auto col_res = ColumnVector<UInt64>::create();
|
||||||
typename ColumnVector<UInt64>::Container & vec_to = col_res->getData();
|
typename ColumnVector<UInt64>::Container & vec_to = col_res->getData();
|
||||||
|
|
||||||
if (input_rows_count != 0)
|
if (input_rows_count > 0)
|
||||||
{
|
{
|
||||||
vec_to.resize(input_rows_count);
|
vec_to.resize(input_rows_count);
|
||||||
|
|
||||||
|
uint64_t machine_id = getMachineId();
|
||||||
|
if (arguments.size() == 2)
|
||||||
|
{
|
||||||
|
machine_id = arguments[1].column->getUInt(0);
|
||||||
|
machine_id &= (1ull << machine_id_bits_count) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
Data data;
|
Data data;
|
||||||
SnowflakeId snowflake_id = data.reserveRange(input_rows_count); /// returns begin of available snowflake ids range
|
SnowflakeId snowflake_id = data.reserveRange(machine_id, input_rows_count);
|
||||||
|
|
||||||
for (UInt64 & to_row : vec_to)
|
for (UInt64 & to_row : vec_to)
|
||||||
{
|
{
|
||||||
@ -208,10 +217,13 @@ public:
|
|||||||
REGISTER_FUNCTION(GenerateSnowflakeID)
|
REGISTER_FUNCTION(GenerateSnowflakeID)
|
||||||
{
|
{
|
||||||
FunctionDocumentation::Description description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds (41 + 1 top zero bits), followed by a machine id (10 bits), and a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
|
FunctionDocumentation::Description description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds (41 + 1 top zero bits), followed by a machine id (10 bits), and a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
|
||||||
FunctionDocumentation::Syntax syntax = "generateSnowflakeID([expression])";
|
FunctionDocumentation::Syntax syntax = "generateSnowflakeID([expression, [machine_id]])";
|
||||||
FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
|
FunctionDocumentation::Arguments arguments = {
|
||||||
|
{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."},
|
||||||
|
{"machine_id", "A machine ID, the lowest 10 bits are used. Optional."}
|
||||||
|
};
|
||||||
FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64";
|
FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64";
|
||||||
FunctionDocumentation::Examples examples = {{"single", "SELECT generateSnowflakeID()", "7201148511606784000"}, {"multiple", "SELECT generateSnowflakeID(1), generateSnowflakeID(2)", ""}};
|
FunctionDocumentation::Examples examples = {{"no_arguments", "SELECT generateSnowflakeID()", "7201148511606784000"}, {"with_machine_id", "SELECT generateSnowflakeID(1)", "7201148511606784001"}, {"with_expression_and_machine_id", "SELECT generateSnowflakeID('some_expression', 1)", "7201148511606784002"}};
|
||||||
FunctionDocumentation::Categories categories = {"Snowflake ID"};
|
FunctionDocumentation::Categories categories = {"Snowflake ID"};
|
||||||
|
|
||||||
factory.registerFunction<FunctionGenerateSnowflakeID>({description, syntax, arguments, returned_value, examples, categories});
|
factory.registerFunction<FunctionGenerateSnowflakeID>({description, syntax, arguments, returned_value, examples, categories});
|
||||||
|
@ -301,7 +301,13 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const
|
|||||||
auto & insert_query = query->as<ASTInsertQuery &>();
|
auto & insert_query = query->as<ASTInsertQuery &>();
|
||||||
insert_query.async_insert_flush = true;
|
insert_query.async_insert_flush = true;
|
||||||
|
|
||||||
InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns);
|
InterpreterInsertQuery interpreter(
|
||||||
|
query,
|
||||||
|
query_context,
|
||||||
|
query_context->getSettingsRef().insert_allow_materialized_columns,
|
||||||
|
/* no_squash */ false,
|
||||||
|
/* no_destination */ false,
|
||||||
|
/* async_insert */ false);
|
||||||
auto table = interpreter.getTable(insert_query);
|
auto table = interpreter.getTable(insert_query);
|
||||||
auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context);
|
auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context);
|
||||||
|
|
||||||
@ -784,7 +790,12 @@ try
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
interpreter = std::make_unique<InterpreterInsertQuery>(
|
interpreter = std::make_unique<InterpreterInsertQuery>(
|
||||||
key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true);
|
key.query,
|
||||||
|
insert_context,
|
||||||
|
key.settings.insert_allow_materialized_columns,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
true);
|
||||||
|
|
||||||
pipeline = interpreter->execute().pipeline;
|
pipeline = interpreter->execute().pipeline;
|
||||||
chassert(pipeline.pushing());
|
chassert(pipeline.pushing());
|
||||||
@ -1003,7 +1014,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Chunk chunk(executor.getResultColumns(), total_rows);
|
Chunk chunk(executor.getResultColumns(), total_rows);
|
||||||
chunk.setChunkInfo(std::move(chunk_info));
|
chunk.getChunkInfos().add(std::move(chunk_info));
|
||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1055,7 +1066,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Chunk chunk(std::move(result_columns), total_rows);
|
Chunk chunk(std::move(result_columns), total_rows);
|
||||||
chunk.setChunkInfo(std::move(chunk_info));
|
chunk.getChunkInfos().add(std::move(chunk_info));
|
||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#include <Interpreters/InterpreterFactory.h>
|
#include <Interpreters/InterpreterFactory.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#include <Access/Common/AccessFlags.h>
|
#include <Access/Common/AccessFlags.h>
|
||||||
|
|
||||||
@ -24,6 +25,7 @@
|
|||||||
#include <Parsers/ASTCheckQuery.h>
|
#include <Parsers/ASTCheckQuery.h>
|
||||||
#include <Parsers/ASTSetQuery.h>
|
#include <Parsers/ASTSetQuery.h>
|
||||||
|
|
||||||
|
#include <Processors/Chunk.h>
|
||||||
#include <Processors/IAccumulatingTransform.h>
|
#include <Processors/IAccumulatingTransform.h>
|
||||||
#include <Processors/IInflatingTransform.h>
|
#include <Processors/IInflatingTransform.h>
|
||||||
#include <Processors/ISimpleTransform.h>
|
#include <Processors/ISimpleTransform.h>
|
||||||
@ -93,7 +95,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con
|
|||||||
return Chunk(std::move(columns), 1);
|
return Chunk(std::move(columns), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
class TableCheckTask : public ChunkInfo
|
class TableCheckTask : public ChunkInfoCloneable<TableCheckTask>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
TableCheckTask(StorageID table_id, const std::variant<std::monostate, ASTPtr, String> & partition_or_part, ContextPtr context)
|
TableCheckTask(StorageID table_id, const std::variant<std::monostate, ASTPtr, String> & partition_or_part, ContextPtr context)
|
||||||
@ -112,6 +114,12 @@ public:
|
|||||||
context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID());
|
context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TableCheckTask(const TableCheckTask & other)
|
||||||
|
: table(other.table)
|
||||||
|
, check_data_tasks(other.check_data_tasks)
|
||||||
|
, is_finished(other.is_finished.load())
|
||||||
|
{}
|
||||||
|
|
||||||
std::optional<CheckResult> checkNext() const
|
std::optional<CheckResult> checkNext() const
|
||||||
{
|
{
|
||||||
if (isFinished())
|
if (isFinished())
|
||||||
@ -123,8 +131,8 @@ public:
|
|||||||
std::this_thread::sleep_for(sleep_time);
|
std::this_thread::sleep_for(sleep_time);
|
||||||
});
|
});
|
||||||
|
|
||||||
IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks;
|
IStorage::DataValidationTasksPtr tmp = check_data_tasks;
|
||||||
auto result = table->checkDataNext(check_data_tasks_);
|
auto result = table->checkDataNext(tmp);
|
||||||
is_finished = !result.has_value();
|
is_finished = !result.has_value();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -182,7 +190,7 @@ protected:
|
|||||||
/// source should return at least one row to start pipeline
|
/// source should return at least one row to start pipeline
|
||||||
result.addColumn(ColumnUInt8::create(1, 1));
|
result.addColumn(ColumnUInt8::create(1, 1));
|
||||||
/// actual data stored in chunk info
|
/// actual data stored in chunk info
|
||||||
result.setChunkInfo(std::move(current_check_task));
|
result.getChunkInfos().add(std::move(current_check_task));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -282,7 +290,7 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
void transform(Chunk & chunk) override
|
void transform(Chunk & chunk) override
|
||||||
{
|
{
|
||||||
auto table_check_task = std::dynamic_pointer_cast<const TableCheckTask>(chunk.getChunkInfo());
|
auto table_check_task = chunk.getChunkInfos().get<TableCheckTask>();
|
||||||
auto check_result = table_check_task->checkNext();
|
auto check_result = table_check_task->checkNext();
|
||||||
if (!check_result)
|
if (!check_result)
|
||||||
{
|
{
|
||||||
|
@ -1777,8 +1777,13 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create)
|
|||||||
else
|
else
|
||||||
insert->select = create.select->clone();
|
insert->select = create.select->clone();
|
||||||
|
|
||||||
return InterpreterInsertQuery(insert, getContext(),
|
return InterpreterInsertQuery(
|
||||||
getContext()->getSettingsRef().insert_allow_materialized_columns).execute();
|
insert,
|
||||||
|
getContext(),
|
||||||
|
getContext()->getSettingsRef().insert_allow_materialized_columns,
|
||||||
|
/* no_squash */ false,
|
||||||
|
/* no_destination */ false,
|
||||||
|
/* async_isnert */ false).execute();
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
@ -535,7 +535,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
|
|||||||
}
|
}
|
||||||
else if (dynamic_cast<const ASTInsertQuery *>(ast.getExplainedQuery().get()))
|
else if (dynamic_cast<const ASTInsertQuery *>(ast.getExplainedQuery().get()))
|
||||||
{
|
{
|
||||||
InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext());
|
InterpreterInsertQuery insert(
|
||||||
|
ast.getExplainedQuery(),
|
||||||
|
getContext(),
|
||||||
|
/* allow_materialized */ false,
|
||||||
|
/* no_squash */ false,
|
||||||
|
/* no_destination */ false,
|
||||||
|
/* async_isnert */ false);
|
||||||
auto io = insert.execute();
|
auto io = insert.execute();
|
||||||
printPipeline(io.pipeline.getProcessors(), buf);
|
printPipeline(io.pipeline.getProcessors(), buf);
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <Interpreters/getTableExpressions.h>
|
#include <Interpreters/getTableExpressions.h>
|
||||||
#include <Interpreters/processColumnTransformers.h>
|
#include <Interpreters/processColumnTransformers.h>
|
||||||
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
||||||
|
#include <Interpreters/Context_fwd.h>
|
||||||
#include <Parsers/ASTFunction.h>
|
#include <Parsers/ASTFunction.h>
|
||||||
#include <Parsers/ASTInsertQuery.h>
|
#include <Parsers/ASTInsertQuery.h>
|
||||||
#include <Parsers/ASTSelectQuery.h>
|
#include <Parsers/ASTSelectQuery.h>
|
||||||
@ -27,6 +28,7 @@
|
|||||||
#include <Processors/Transforms/CountingTransform.h>
|
#include <Processors/Transforms/CountingTransform.h>
|
||||||
#include <Processors/Transforms/ExpressionTransform.h>
|
#include <Processors/Transforms/ExpressionTransform.h>
|
||||||
#include <Processors/Transforms/MaterializingTransform.h>
|
#include <Processors/Transforms/MaterializingTransform.h>
|
||||||
|
#include <Processors/Transforms/DeduplicationTokenTransforms.h>
|
||||||
#include <Processors/Transforms/SquashingTransform.h>
|
#include <Processors/Transforms/SquashingTransform.h>
|
||||||
#include <Processors/Transforms/PlanSquashingTransform.h>
|
#include <Processors/Transforms/PlanSquashingTransform.h>
|
||||||
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
|
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
|
||||||
@ -36,9 +38,11 @@
|
|||||||
#include <Storages/StorageMaterializedView.h>
|
#include <Storages/StorageMaterializedView.h>
|
||||||
#include <Storages/WindowView/StorageWindowView.h>
|
#include <Storages/WindowView/StorageWindowView.h>
|
||||||
#include <TableFunctions/TableFunctionFactory.h>
|
#include <TableFunctions/TableFunctionFactory.h>
|
||||||
|
#include <Common/logger_useful.h>
|
||||||
#include <Common/ThreadStatus.h>
|
#include <Common/ThreadStatus.h>
|
||||||
#include <Common/checkStackSize.h>
|
#include <Common/checkStackSize.h>
|
||||||
#include <Common/ProfileEvents.h>
|
#include <Common/ProfileEvents.h>
|
||||||
|
#include "base/defines.h"
|
||||||
|
|
||||||
|
|
||||||
namespace ProfileEvents
|
namespace ProfileEvents
|
||||||
@ -395,28 +399,349 @@ Chain InterpreterInsertQuery::buildPreSinkChain(
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<std::vector<Chain>, std::vector<Chain>> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block)
|
||||||
|
{
|
||||||
|
chassert(presink_streams > 0);
|
||||||
|
chassert(sink_streams > 0);
|
||||||
|
|
||||||
|
ThreadGroupPtr running_group;
|
||||||
|
if (current_thread)
|
||||||
|
running_group = current_thread->getThreadGroup();
|
||||||
|
if (!running_group)
|
||||||
|
running_group = std::make_shared<ThreadGroup>(getContext());
|
||||||
|
|
||||||
|
std::vector<Chain> sink_chains;
|
||||||
|
std::vector<Chain> presink_chains;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < sink_streams; ++i)
|
||||||
|
{
|
||||||
|
auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr,
|
||||||
|
running_group, /* elapsed_counter_ms= */ nullptr);
|
||||||
|
|
||||||
|
sink_chains.emplace_back(std::move(out));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < presink_streams; ++i)
|
||||||
|
{
|
||||||
|
auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block);
|
||||||
|
presink_chains.emplace_back(std::move(out));
|
||||||
|
}
|
||||||
|
|
||||||
|
return {std::move(presink_chains), std::move(sink_chains)};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table)
|
||||||
|
{
|
||||||
|
const Settings & settings = getContext()->getSettingsRef();
|
||||||
|
|
||||||
|
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
||||||
|
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
|
||||||
|
|
||||||
|
bool is_trivial_insert_select = false;
|
||||||
|
|
||||||
|
if (settings.optimize_trivial_insert_select)
|
||||||
|
{
|
||||||
|
const auto & select_query = query.select->as<ASTSelectWithUnionQuery &>();
|
||||||
|
const auto & selects = select_query.list_of_selects->children;
|
||||||
|
const auto & union_modes = select_query.list_of_modes;
|
||||||
|
|
||||||
|
/// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries
|
||||||
|
const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; };
|
||||||
|
|
||||||
|
is_trivial_insert_select =
|
||||||
|
std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all))
|
||||||
|
&& std::all_of(selects.begin(), selects.end(), isTrivialSelect);
|
||||||
|
}
|
||||||
|
|
||||||
|
ContextPtr select_context = getContext();
|
||||||
|
|
||||||
|
if (is_trivial_insert_select)
|
||||||
|
{
|
||||||
|
/** When doing trivial INSERT INTO ... SELECT ... FROM table,
|
||||||
|
* don't need to process SELECT with more than max_insert_threads
|
||||||
|
* and it's reasonable to set block size for SELECT to the desired block size for INSERT
|
||||||
|
* to avoid unnecessary squashing.
|
||||||
|
*/
|
||||||
|
|
||||||
|
Settings new_settings = select_context->getSettings();
|
||||||
|
|
||||||
|
new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
|
||||||
|
|
||||||
|
if (table->prefersLargeBlocks())
|
||||||
|
{
|
||||||
|
if (settings.min_insert_block_size_rows)
|
||||||
|
new_settings.max_block_size = settings.min_insert_block_size_rows;
|
||||||
|
if (settings.min_insert_block_size_bytes)
|
||||||
|
new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto context_for_trivial_select = Context::createCopy(context);
|
||||||
|
context_for_trivial_select->setSettings(new_settings);
|
||||||
|
context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames());
|
||||||
|
|
||||||
|
select_context = context_for_trivial_select;
|
||||||
|
}
|
||||||
|
|
||||||
|
QueryPipelineBuilder pipeline;
|
||||||
|
|
||||||
|
{
|
||||||
|
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
|
||||||
|
|
||||||
|
if (settings.allow_experimental_analyzer)
|
||||||
|
{
|
||||||
|
InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options);
|
||||||
|
pipeline = interpreter_select_analyzer.buildQueryPipeline();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options);
|
||||||
|
pipeline = interpreter_select.buildQueryPipeline();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline.dropTotalsAndExtremes();
|
||||||
|
|
||||||
|
/// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values.
|
||||||
|
if (getContext()->getSettingsRef().insert_null_as_default)
|
||||||
|
{
|
||||||
|
const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName();
|
||||||
|
const auto & query_columns = query_sample_block.getColumnsWithTypeAndName();
|
||||||
|
const auto & output_columns = metadata_snapshot->getColumns();
|
||||||
|
|
||||||
|
if (input_columns.size() == query_columns.size())
|
||||||
|
{
|
||||||
|
for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx)
|
||||||
|
{
|
||||||
|
/// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
|
||||||
|
/// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
|
||||||
|
if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type)
|
||||||
|
&& !isNullableOrLowCardinalityNullable(query_columns[col_idx].type)
|
||||||
|
&& !isVariant(query_columns[col_idx].type)
|
||||||
|
&& !isDynamic(query_columns[col_idx].type)
|
||||||
|
&& output_columns.has(query_columns[col_idx].name))
|
||||||
|
{
|
||||||
|
query_sample_block.setColumn(
|
||||||
|
col_idx,
|
||||||
|
ColumnWithTypeAndName(
|
||||||
|
makeNullableOrLowCardinalityNullable(query_columns[col_idx].column),
|
||||||
|
makeNullableOrLowCardinalityNullable(query_columns[col_idx].type),
|
||||||
|
query_columns[col_idx].name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto actions_dag = ActionsDAG::makeConvertingActions(
|
||||||
|
pipeline.getHeader().getColumnsWithTypeAndName(),
|
||||||
|
query_sample_block.getColumnsWithTypeAndName(),
|
||||||
|
ActionsDAG::MatchColumnsMode::Position);
|
||||||
|
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes));
|
||||||
|
|
||||||
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
return std::make_shared<ExpressionTransform>(in_header, actions);
|
||||||
|
});
|
||||||
|
|
||||||
|
/// We need to convert Sparse columns to full, because it's destination storage
|
||||||
|
/// may not support it or may have different settings for applying Sparse serialization.
|
||||||
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
return std::make_shared<MaterializingTransform>(in_header);
|
||||||
|
});
|
||||||
|
|
||||||
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
auto context_ptr = getContext();
|
||||||
|
auto counting = std::make_shared<CountingTransform>(in_header, nullptr, context_ptr->getQuota());
|
||||||
|
counting->setProcessListElement(context_ptr->getProcessListElement());
|
||||||
|
counting->setProgressCallback(context_ptr->getProgressCallback());
|
||||||
|
|
||||||
|
return counting;
|
||||||
|
});
|
||||||
|
|
||||||
|
size_t num_select_threads = pipeline.getNumThreads();
|
||||||
|
|
||||||
|
pipeline.resize(1);
|
||||||
|
|
||||||
|
if (shouldAddSquashingFroStorage(table))
|
||||||
|
{
|
||||||
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
return std::make_shared<PlanSquashingTransform>(
|
||||||
|
in_header,
|
||||||
|
table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||||
|
table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
return std::make_shared<DeduplicationToken::AddTokenInfoTransform>(in_header);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!settings.insert_deduplication_token.value.empty())
|
||||||
|
{
|
||||||
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
return std::make_shared<DeduplicationToken::SetUserTokenTransform>(settings.insert_deduplication_token.value, in_header);
|
||||||
|
});
|
||||||
|
|
||||||
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
return std::make_shared<DeduplicationToken::SetSourceBlockNumberTransform>(in_header);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Number of streams works like this:
|
||||||
|
/// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever
|
||||||
|
/// InterpreterSelectQuery ends up with.
|
||||||
|
/// * Use `max_insert_threads` streams for various insert-preparation steps, e.g.
|
||||||
|
/// materializing and squashing (too slow to do in one thread). That's `presink_chains`.
|
||||||
|
/// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage.
|
||||||
|
/// Otherwise ResizeProcessor them down to 1 stream.
|
||||||
|
|
||||||
|
size_t presink_streams_size = std::max<size_t>(settings.max_insert_threads, pipeline.getNumStreams());
|
||||||
|
if (settings.max_insert_threads.changed)
|
||||||
|
presink_streams_size = std::max<size_t>(1, settings.max_insert_threads);
|
||||||
|
|
||||||
|
size_t sink_streams_size = table->supportsParallelInsert() ? std::max<size_t>(1, settings.max_insert_threads) : 1;
|
||||||
|
|
||||||
|
size_t views_involved = table->isView() || !DatabaseCatalog::instance().getDependentViews(table->getStorageID()).empty();
|
||||||
|
if (!settings.parallel_view_processing && views_involved)
|
||||||
|
{
|
||||||
|
sink_streams_size = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto [presink_chains, sink_chains] = buildPreAndSinkChains(
|
||||||
|
presink_streams_size, sink_streams_size,
|
||||||
|
table, metadata_snapshot, query_sample_block);
|
||||||
|
|
||||||
|
pipeline.resize(presink_chains.size());
|
||||||
|
|
||||||
|
if (shouldAddSquashingFroStorage(table))
|
||||||
|
{
|
||||||
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
return std::make_shared<ApplySquashingTransform>(
|
||||||
|
in_header,
|
||||||
|
table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||||
|
table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto & chain : presink_chains)
|
||||||
|
pipeline.addResources(chain.detachResources());
|
||||||
|
pipeline.addChains(std::move(presink_chains));
|
||||||
|
|
||||||
|
pipeline.resize(sink_streams_size);
|
||||||
|
|
||||||
|
for (auto & chain : sink_chains)
|
||||||
|
pipeline.addResources(chain.detachResources());
|
||||||
|
pipeline.addChains(std::move(sink_chains));
|
||||||
|
|
||||||
|
if (!settings.parallel_view_processing && views_involved)
|
||||||
|
{
|
||||||
|
/// Don't use more threads for INSERT than for SELECT to reduce memory consumption.
|
||||||
|
if (pipeline.getNumThreads() > num_select_threads)
|
||||||
|
pipeline.setMaxThreads(num_select_threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
return std::make_shared<EmptySink>(cur_header);
|
||||||
|
});
|
||||||
|
|
||||||
|
return QueryPipelineBuilder::getPipeline(std::move(pipeline));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table)
|
||||||
|
{
|
||||||
|
const Settings & settings = getContext()->getSettingsRef();
|
||||||
|
|
||||||
|
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
||||||
|
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
|
||||||
|
|
||||||
|
Chain chain;
|
||||||
|
|
||||||
|
{
|
||||||
|
auto [presink_chains, sink_chains] = buildPreAndSinkChains(
|
||||||
|
/* presink_streams */1, /* sink_streams */1,
|
||||||
|
table, metadata_snapshot, query_sample_block);
|
||||||
|
|
||||||
|
chain = std::move(presink_chains.front());
|
||||||
|
chain.appendChain(std::move(sink_chains.front()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!settings.insert_deduplication_token.value.empty())
|
||||||
|
{
|
||||||
|
chain.addSource(std::make_shared<DeduplicationToken::SetSourceBlockNumberTransform>(chain.getInputHeader()));
|
||||||
|
chain.addSource(std::make_shared<DeduplicationToken::SetUserTokenTransform>(settings.insert_deduplication_token.value, chain.getInputHeader()));
|
||||||
|
}
|
||||||
|
|
||||||
|
chain.addSource(std::make_shared<DeduplicationToken::AddTokenInfoTransform>(chain.getInputHeader()));
|
||||||
|
|
||||||
|
if (shouldAddSquashingFroStorage(table))
|
||||||
|
{
|
||||||
|
bool table_prefers_large_blocks = table->prefersLargeBlocks();
|
||||||
|
|
||||||
|
auto squashing = std::make_shared<ApplySquashingTransform>(
|
||||||
|
chain.getInputHeader(),
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
||||||
|
|
||||||
|
chain.addSource(std::move(squashing));
|
||||||
|
|
||||||
|
auto balancing = std::make_shared<PlanSquashingTransform>(
|
||||||
|
chain.getInputHeader(),
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
||||||
|
|
||||||
|
chain.addSource(std::move(balancing));
|
||||||
|
}
|
||||||
|
|
||||||
|
auto context_ptr = getContext();
|
||||||
|
auto counting = std::make_shared<CountingTransform>(chain.getInputHeader(), nullptr, context_ptr->getQuota());
|
||||||
|
counting->setProcessListElement(context_ptr->getProcessListElement());
|
||||||
|
counting->setProgressCallback(context_ptr->getProgressCallback());
|
||||||
|
chain.addSource(std::move(counting));
|
||||||
|
|
||||||
|
QueryPipeline pipeline = QueryPipeline(std::move(chain));
|
||||||
|
|
||||||
|
pipeline.setNumThreads(std::min<size_t>(pipeline.getNumThreads(), settings.max_threads));
|
||||||
|
pipeline.setConcurrencyControl(settings.use_concurrency_control);
|
||||||
|
|
||||||
|
if (query.hasInlinedData() && !async_insert)
|
||||||
|
{
|
||||||
|
/// can execute without additional data
|
||||||
|
auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr);
|
||||||
|
for (auto && buffer : owned_buffers)
|
||||||
|
format->addBuffer(std::move(buffer));
|
||||||
|
|
||||||
|
auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr);
|
||||||
|
pipeline.complete(std::move(pipe));
|
||||||
|
}
|
||||||
|
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
BlockIO InterpreterInsertQuery::execute()
|
BlockIO InterpreterInsertQuery::execute()
|
||||||
{
|
{
|
||||||
const Settings & settings = getContext()->getSettingsRef();
|
const Settings & settings = getContext()->getSettingsRef();
|
||||||
auto & query = query_ptr->as<ASTInsertQuery &>();
|
auto & query = query_ptr->as<ASTInsertQuery &>();
|
||||||
|
|
||||||
QueryPipelineBuilder pipeline;
|
|
||||||
std::optional<QueryPipeline> distributed_pipeline;
|
|
||||||
QueryPlanResourceHolder resources;
|
|
||||||
|
|
||||||
StoragePtr table = getTable(query);
|
StoragePtr table = getTable(query);
|
||||||
checkStorageSupportsTransactionsIfNeeded(table, getContext());
|
checkStorageSupportsTransactionsIfNeeded(table, getContext());
|
||||||
|
|
||||||
StoragePtr inner_table;
|
|
||||||
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
|
|
||||||
inner_table = mv->getTargetTable();
|
|
||||||
|
|
||||||
if (query.partition_by && !table->supportsPartitionBy())
|
if (query.partition_by && !table->supportsPartitionBy())
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage");
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage");
|
||||||
|
|
||||||
auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);
|
auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);
|
||||||
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
|
||||||
|
|
||||||
|
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
||||||
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
|
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
|
||||||
|
|
||||||
/// For table functions we check access while executing
|
/// For table functions we check access while executing
|
||||||
@ -424,320 +749,45 @@ BlockIO InterpreterInsertQuery::execute()
|
|||||||
if (!query.table_function)
|
if (!query.table_function)
|
||||||
getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames());
|
getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames());
|
||||||
|
|
||||||
if (query.select && settings.parallel_distributed_insert_select)
|
if (!allow_materialized)
|
||||||
// Distributed INSERT SELECT
|
|
||||||
distributed_pipeline = table->distributedWrite(query, getContext());
|
|
||||||
|
|
||||||
std::vector<Chain> presink_chains;
|
|
||||||
std::vector<Chain> sink_chains;
|
|
||||||
if (!distributed_pipeline)
|
|
||||||
{
|
{
|
||||||
/// Number of streams works like this:
|
for (const auto & column : metadata_snapshot->getColumns())
|
||||||
/// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever
|
if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name))
|
||||||
/// InterpreterSelectQuery ends up with.
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name);
|
||||||
/// * Use `max_insert_threads` streams for various insert-preparation steps, e.g.
|
|
||||||
/// materializing and squashing (too slow to do in one thread). That's `presink_chains`.
|
|
||||||
/// * If the table supports parallel inserts, use the same streams for writing to IStorage.
|
|
||||||
/// Otherwise ResizeProcessor them down to 1 stream.
|
|
||||||
/// * If it's not an INSERT SELECT, forget all that and use one stream.
|
|
||||||
size_t pre_streams_size = 1;
|
|
||||||
size_t sink_streams_size = 1;
|
|
||||||
|
|
||||||
if (query.select)
|
|
||||||
{
|
|
||||||
bool is_trivial_insert_select = false;
|
|
||||||
|
|
||||||
if (settings.optimize_trivial_insert_select)
|
|
||||||
{
|
|
||||||
const auto & select_query = query.select->as<ASTSelectWithUnionQuery &>();
|
|
||||||
const auto & selects = select_query.list_of_selects->children;
|
|
||||||
const auto & union_modes = select_query.list_of_modes;
|
|
||||||
|
|
||||||
/// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries
|
|
||||||
const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; };
|
|
||||||
|
|
||||||
is_trivial_insert_select =
|
|
||||||
std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all))
|
|
||||||
&& std::all_of(selects.begin(), selects.end(), isTrivialSelect);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_trivial_insert_select)
|
|
||||||
{
|
|
||||||
/** When doing trivial INSERT INTO ... SELECT ... FROM table,
|
|
||||||
* don't need to process SELECT with more than max_insert_threads
|
|
||||||
* and it's reasonable to set block size for SELECT to the desired block size for INSERT
|
|
||||||
* to avoid unnecessary squashing.
|
|
||||||
*/
|
|
||||||
|
|
||||||
Settings new_settings = getContext()->getSettings();
|
|
||||||
|
|
||||||
new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
|
|
||||||
|
|
||||||
if (table->prefersLargeBlocks())
|
|
||||||
{
|
|
||||||
if (settings.min_insert_block_size_rows)
|
|
||||||
new_settings.max_block_size = settings.min_insert_block_size_rows;
|
|
||||||
if (settings.min_insert_block_size_bytes)
|
|
||||||
new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto new_context = Context::createCopy(context);
|
|
||||||
new_context->setSettings(new_settings);
|
|
||||||
new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames());
|
|
||||||
|
|
||||||
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
|
|
||||||
|
|
||||||
if (settings.allow_experimental_analyzer)
|
|
||||||
{
|
|
||||||
InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options);
|
|
||||||
pipeline = interpreter_select_analyzer.buildQueryPipeline();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options);
|
|
||||||
pipeline = interpreter_select.buildQueryPipeline();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
|
|
||||||
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
|
|
||||||
|
|
||||||
if (settings.allow_experimental_analyzer)
|
|
||||||
{
|
|
||||||
InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options);
|
|
||||||
pipeline = interpreter_select_analyzer.buildQueryPipeline();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options);
|
|
||||||
pipeline = interpreter_select.buildQueryPipeline();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pipeline.dropTotalsAndExtremes();
|
|
||||||
|
|
||||||
if (settings.max_insert_threads > 1)
|
|
||||||
{
|
|
||||||
auto table_id = table->getStorageID();
|
|
||||||
auto views = DatabaseCatalog::instance().getDependentViews(table_id);
|
|
||||||
|
|
||||||
/// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them.
|
|
||||||
/// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts.
|
|
||||||
const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert();
|
|
||||||
pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads
|
|
||||||
: std::min<size_t>(settings.max_insert_threads, pipeline.getNumStreams());
|
|
||||||
|
|
||||||
/// Deduplication when passing insert_deduplication_token breaks if using more than one thread
|
|
||||||
if (!settings.insert_deduplication_token.toString().empty())
|
|
||||||
{
|
|
||||||
LOG_DEBUG(
|
|
||||||
getLogger("InsertQuery"),
|
|
||||||
"Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues");
|
|
||||||
pre_streams_size = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (table->supportsParallelInsert())
|
|
||||||
sink_streams_size = pre_streams_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
pipeline.resize(pre_streams_size);
|
|
||||||
|
|
||||||
/// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values.
|
|
||||||
if (getContext()->getSettingsRef().insert_null_as_default)
|
|
||||||
{
|
|
||||||
const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName();
|
|
||||||
const auto & query_columns = query_sample_block.getColumnsWithTypeAndName();
|
|
||||||
const auto & output_columns = metadata_snapshot->getColumns();
|
|
||||||
|
|
||||||
if (input_columns.size() == query_columns.size())
|
|
||||||
{
|
|
||||||
for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx)
|
|
||||||
{
|
|
||||||
/// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
|
|
||||||
/// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
|
|
||||||
if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type)
|
|
||||||
&& !isNullableOrLowCardinalityNullable(query_columns[col_idx].type)
|
|
||||||
&& !isVariant(query_columns[col_idx].type)
|
|
||||||
&& !isDynamic(query_columns[col_idx].type)
|
|
||||||
&& output_columns.has(query_columns[col_idx].name))
|
|
||||||
query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ThreadGroupPtr running_group;
|
|
||||||
if (current_thread)
|
|
||||||
running_group = current_thread->getThreadGroup();
|
|
||||||
if (!running_group)
|
|
||||||
running_group = std::make_shared<ThreadGroup>(getContext());
|
|
||||||
for (size_t i = 0; i < sink_streams_size; ++i)
|
|
||||||
{
|
|
||||||
auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr,
|
|
||||||
running_group, /* elapsed_counter_ms= */ nullptr);
|
|
||||||
sink_chains.emplace_back(std::move(out));
|
|
||||||
}
|
|
||||||
for (size_t i = 0; i < pre_streams_size; ++i)
|
|
||||||
{
|
|
||||||
auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block);
|
|
||||||
presink_chains.emplace_back(std::move(out));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockIO res;
|
BlockIO res;
|
||||||
|
|
||||||
/// What type of query: INSERT or INSERT SELECT or INSERT WATCH?
|
if (query.select)
|
||||||
if (distributed_pipeline)
|
|
||||||
{
|
{
|
||||||
res.pipeline = std::move(*distributed_pipeline);
|
if (settings.parallel_distributed_insert_select)
|
||||||
}
|
|
||||||
else if (query.select)
|
|
||||||
{
|
|
||||||
const auto & header = presink_chains.at(0).getInputHeader();
|
|
||||||
auto actions_dag = ActionsDAG::makeConvertingActions(
|
|
||||||
pipeline.getHeader().getColumnsWithTypeAndName(),
|
|
||||||
header.getColumnsWithTypeAndName(),
|
|
||||||
ActionsDAG::MatchColumnsMode::Position);
|
|
||||||
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes));
|
|
||||||
|
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
|
||||||
{
|
{
|
||||||
return std::make_shared<ExpressionTransform>(in_header, actions);
|
auto distributed = table->distributedWrite(query, getContext());
|
||||||
});
|
if (distributed)
|
||||||
|
|
||||||
/// We need to convert Sparse columns to full, because it's destination storage
|
|
||||||
/// may not support it or may have different settings for applying Sparse serialization.
|
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
return std::make_shared<MaterializingTransform>(in_header);
|
|
||||||
});
|
|
||||||
|
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
auto context_ptr = getContext();
|
|
||||||
auto counting = std::make_shared<CountingTransform>(in_header, nullptr, context_ptr->getQuota());
|
|
||||||
counting->setProcessListElement(context_ptr->getProcessListElement());
|
|
||||||
counting->setProgressCallback(context_ptr->getProgressCallback());
|
|
||||||
|
|
||||||
return counting;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (shouldAddSquashingFroStorage(table))
|
|
||||||
{
|
|
||||||
bool table_prefers_large_blocks = table->prefersLargeBlocks();
|
|
||||||
|
|
||||||
size_t threads = presink_chains.size();
|
|
||||||
|
|
||||||
pipeline.resize(1);
|
|
||||||
|
|
||||||
pipeline.addTransform(std::make_shared<PlanSquashingTransform>(
|
|
||||||
header,
|
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
|
|
||||||
|
|
||||||
pipeline.resize(threads);
|
|
||||||
|
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
|
||||||
{
|
{
|
||||||
return std::make_shared<ApplySquashingTransform>(
|
res.pipeline = std::move(*distributed);
|
||||||
in_header,
|
}
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
else
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
{
|
||||||
});
|
res.pipeline = buildInsertSelectPipeline(query, table);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
size_t num_select_threads = pipeline.getNumThreads();
|
|
||||||
|
|
||||||
for (auto & chain : presink_chains)
|
|
||||||
resources = chain.detachResources();
|
|
||||||
for (auto & chain : sink_chains)
|
|
||||||
resources = chain.detachResources();
|
|
||||||
|
|
||||||
pipeline.addChains(std::move(presink_chains));
|
|
||||||
pipeline.resize(sink_chains.size());
|
|
||||||
pipeline.addChains(std::move(sink_chains));
|
|
||||||
|
|
||||||
if (!settings.parallel_view_processing)
|
|
||||||
{
|
{
|
||||||
/// Don't use more threads for INSERT than for SELECT to reduce memory consumption.
|
res.pipeline = buildInsertSelectPipeline(query, table);
|
||||||
if (pipeline.getNumThreads() > num_select_threads)
|
|
||||||
pipeline.setMaxThreads(num_select_threads);
|
|
||||||
}
|
}
|
||||||
else if (pipeline.getNumThreads() < settings.max_threads)
|
|
||||||
{
|
|
||||||
/// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select,
|
|
||||||
/// however in case of parallel_view_processing and multiple views, views can still be processed in parallel.
|
|
||||||
///
|
|
||||||
/// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads.
|
|
||||||
pipeline.setMaxThreads(settings.max_threads);
|
|
||||||
}
|
|
||||||
|
|
||||||
pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
return std::make_shared<EmptySink>(cur_header);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!allow_materialized)
|
|
||||||
{
|
|
||||||
for (const auto & column : metadata_snapshot->getColumns())
|
|
||||||
if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name))
|
|
||||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name);
|
|
||||||
}
|
|
||||||
|
|
||||||
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline));
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto & chain = presink_chains.at(0);
|
res.pipeline = buildInsertPipeline(query, table);
|
||||||
chain.appendChain(std::move(sink_chains.at(0)));
|
|
||||||
|
|
||||||
if (shouldAddSquashingFroStorage(table))
|
|
||||||
{
|
|
||||||
bool table_prefers_large_blocks = table->prefersLargeBlocks();
|
|
||||||
|
|
||||||
auto squashing = std::make_shared<ApplySquashingTransform>(
|
|
||||||
chain.getInputHeader(),
|
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
|
||||||
|
|
||||||
chain.addSource(std::move(squashing));
|
|
||||||
|
|
||||||
auto balancing = std::make_shared<PlanSquashingTransform>(
|
|
||||||
chain.getInputHeader(),
|
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
|
||||||
|
|
||||||
chain.addSource(std::move(balancing));
|
|
||||||
}
|
|
||||||
|
|
||||||
auto context_ptr = getContext();
|
|
||||||
auto counting = std::make_shared<CountingTransform>(chain.getInputHeader(), nullptr, context_ptr->getQuota());
|
|
||||||
counting->setProcessListElement(context_ptr->getProcessListElement());
|
|
||||||
counting->setProgressCallback(context_ptr->getProgressCallback());
|
|
||||||
chain.addSource(std::move(counting));
|
|
||||||
|
|
||||||
res.pipeline = QueryPipeline(std::move(presink_chains[0]));
|
|
||||||
res.pipeline.setNumThreads(std::min<size_t>(res.pipeline.getNumThreads(), settings.max_threads));
|
|
||||||
res.pipeline.setConcurrencyControl(settings.use_concurrency_control);
|
|
||||||
|
|
||||||
if (query.hasInlinedData() && !async_insert)
|
|
||||||
{
|
|
||||||
/// can execute without additional data
|
|
||||||
auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr);
|
|
||||||
for (auto && buffer : owned_buffers)
|
|
||||||
format->addBuffer(std::move(buffer));
|
|
||||||
|
|
||||||
auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr);
|
|
||||||
res.pipeline.complete(std::move(pipe));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
res.pipeline.addResources(std::move(resources));
|
|
||||||
|
|
||||||
res.pipeline.addStorageHolder(table);
|
res.pipeline.addStorageHolder(table);
|
||||||
if (inner_table)
|
|
||||||
res.pipeline.addStorageHolder(inner_table);
|
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
|
||||||
|
res.pipeline.addStorageHolder(mv->getTargetTable());
|
||||||
|
|
||||||
|
LOG_TEST(getLogger("InterpreterInsertQuery"), "Pipeline could use up to {} thread", res.pipeline.getNumThreads());
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -758,17 +808,27 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const
|
void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const
|
||||||
{
|
{
|
||||||
extendQueryLogElemImpl(elem, context_);
|
extendQueryLogElemImpl(elem, context_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void registerInterpreterInsertQuery(InterpreterFactory & factory)
|
void registerInterpreterInsertQuery(InterpreterFactory & factory)
|
||||||
{
|
{
|
||||||
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
||||||
{
|
{
|
||||||
return std::make_unique<InterpreterInsertQuery>(args.query, args.context, args.allow_materialized);
|
return std::make_unique<InterpreterInsertQuery>(
|
||||||
|
args.query,
|
||||||
|
args.context,
|
||||||
|
args.allow_materialized,
|
||||||
|
/* no_squash */false,
|
||||||
|
/* no_destination */false,
|
||||||
|
/* async_insert */false);
|
||||||
};
|
};
|
||||||
factory.registerInterpreter("InterpreterInsertQuery", create_fn);
|
factory.registerInterpreter("InterpreterInsertQuery", create_fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -23,10 +23,10 @@ public:
|
|||||||
InterpreterInsertQuery(
|
InterpreterInsertQuery(
|
||||||
const ASTPtr & query_ptr_,
|
const ASTPtr & query_ptr_,
|
||||||
ContextPtr context_,
|
ContextPtr context_,
|
||||||
bool allow_materialized_ = false,
|
bool allow_materialized_,
|
||||||
bool no_squash_ = false,
|
bool no_squash_,
|
||||||
bool no_destination_ = false,
|
bool no_destination,
|
||||||
bool async_insert_ = false);
|
bool async_insert_);
|
||||||
|
|
||||||
/** Prepare a request for execution. Return block streams
|
/** Prepare a request for execution. Return block streams
|
||||||
* - the stream into which you can write data to execute the query, if INSERT;
|
* - the stream into which you can write data to execute the query, if INSERT;
|
||||||
@ -73,12 +73,17 @@ private:
|
|||||||
|
|
||||||
ASTPtr query_ptr;
|
ASTPtr query_ptr;
|
||||||
const bool allow_materialized;
|
const bool allow_materialized;
|
||||||
const bool no_squash;
|
bool no_squash = false;
|
||||||
const bool no_destination;
|
bool no_destination = false;
|
||||||
const bool async_insert;
|
const bool async_insert;
|
||||||
|
|
||||||
std::vector<std::unique_ptr<ReadBuffer>> owned_buffers;
|
std::vector<std::unique_ptr<ReadBuffer>> owned_buffers;
|
||||||
|
|
||||||
|
std::pair<std::vector<Chain>, std::vector<Chain>> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block);
|
||||||
|
|
||||||
|
QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table);
|
||||||
|
QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table);
|
||||||
|
|
||||||
Chain buildSink(
|
Chain buildSink(
|
||||||
const StoragePtr & table,
|
const StoragePtr & table,
|
||||||
const StorageMetadataPtr & metadata_snapshot,
|
const StorageMetadataPtr & metadata_snapshot,
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <Interpreters/Squashing.h>
|
#include <Interpreters/Squashing.h>
|
||||||
|
#include "Common/Logger.h"
|
||||||
|
#include "Common/logger_useful.h"
|
||||||
#include <Common/CurrentThread.h>
|
#include <Common/CurrentThread.h>
|
||||||
|
#include <base/defines.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -60,24 +63,33 @@ const ChunksToSquash * getInfoFromChunk(const Chunk & chunk)
|
|||||||
}
|
}
|
||||||
|
|
||||||
Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_)
|
Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_)
|
||||||
: header(header_)
|
: min_block_size_rows(min_block_size_rows_)
|
||||||
, min_block_size_rows(min_block_size_rows_)
|
|
||||||
, min_block_size_bytes(min_block_size_bytes_)
|
, min_block_size_bytes(min_block_size_bytes_)
|
||||||
|
, header(header_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk Squashing::flush()
|
Chunk Squashing::flush()
|
||||||
{
|
{
|
||||||
return convertToChunk(std::move(chunks_to_merge_vec));
|
if (!accumulated)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
auto result = convertToChunk(extract());
|
||||||
|
chassert(result);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk Squashing::squash(Chunk && input_chunk)
|
Chunk Squashing::squash(Chunk && input_chunk)
|
||||||
{
|
{
|
||||||
if (!input_chunk.hasChunkInfo())
|
if (!input_chunk)
|
||||||
return Chunk();
|
return Chunk();
|
||||||
|
|
||||||
const auto * info = getInfoFromChunk(input_chunk);
|
auto squash_info = input_chunk.getChunkInfos().extract<ChunksToSquash>();
|
||||||
return squashImpl(info->chunks);
|
|
||||||
|
if (!squash_info)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr");
|
||||||
|
|
||||||
|
return squash(std::move(squash_info->chunks), std::move(input_chunk.getChunkInfos()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk Squashing::add(Chunk && input_chunk)
|
Chunk Squashing::add(Chunk && input_chunk)
|
||||||
@ -86,74 +98,94 @@ Chunk Squashing::add(Chunk && input_chunk)
|
|||||||
return {};
|
return {};
|
||||||
|
|
||||||
/// Just read block is already enough.
|
/// Just read block is already enough.
|
||||||
if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes()))
|
if (isEnoughSize(input_chunk))
|
||||||
{
|
{
|
||||||
/// If no accumulated data, return just read block.
|
/// If no accumulated data, return just read block.
|
||||||
if (chunks_to_merge_vec.empty())
|
if (!accumulated)
|
||||||
{
|
{
|
||||||
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
accumulated.add(std::move(input_chunk));
|
||||||
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
return convertToChunk(extract());
|
||||||
chunks_to_merge_vec.clear();
|
|
||||||
return res_chunk;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return accumulated data (maybe it has small size) and place new block to accumulated data.
|
/// Return accumulated data (maybe it has small size) and place new block to accumulated data.
|
||||||
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
Chunk res_chunk = convertToChunk(extract());
|
||||||
chunks_to_merge_vec.clear();
|
accumulated.add(std::move(input_chunk));
|
||||||
changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
|
|
||||||
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
|
||||||
return res_chunk;
|
return res_chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Accumulated block is already enough.
|
/// Accumulated block is already enough.
|
||||||
if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes))
|
if (isEnoughSize())
|
||||||
{
|
{
|
||||||
/// Return accumulated data and place new block to accumulated data.
|
/// Return accumulated data and place new block to accumulated data.
|
||||||
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
Chunk res_chunk = convertToChunk(extract());
|
||||||
chunks_to_merge_vec.clear();
|
accumulated.add(std::move(input_chunk));
|
||||||
changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
|
|
||||||
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
|
||||||
return res_chunk;
|
return res_chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Pushing data into accumulating vector
|
/// Pushing data into accumulating vector
|
||||||
expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
|
accumulated.add(std::move(input_chunk));
|
||||||
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
|
||||||
|
|
||||||
/// If accumulated data is big enough, we send it
|
/// If accumulated data is big enough, we send it
|
||||||
if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes))
|
if (isEnoughSize())
|
||||||
{
|
return convertToChunk(extract());
|
||||||
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
|
||||||
changeCurrentSize(0, 0);
|
|
||||||
chunks_to_merge_vec.clear();
|
|
||||||
return res_chunk;
|
|
||||||
}
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk Squashing::convertToChunk(std::vector<Chunk> && chunks) const
|
Chunk Squashing::convertToChunk(CurrentData && data) const
|
||||||
{
|
{
|
||||||
if (chunks.empty())
|
if (data.chunks.empty())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
auto info = std::make_shared<ChunksToSquash>();
|
auto info = std::make_shared<ChunksToSquash>();
|
||||||
info->chunks = std::move(chunks);
|
info->chunks = std::move(data.chunks);
|
||||||
|
|
||||||
chunks.clear();
|
// It is imortant that chunk is not empty, it has to have columns even if they are empty
|
||||||
|
// Sometimes there are could be no columns in header but not empty rows in chunks
|
||||||
|
// That happens when we intend to add defaults for the missing columns after
|
||||||
|
auto aggr_chunk = Chunk(header.getColumns(), 0);
|
||||||
|
if (header.columns() == 0)
|
||||||
|
aggr_chunk = Chunk(header.getColumns(), data.getRows());
|
||||||
|
|
||||||
return Chunk(header.cloneEmptyColumns(), 0, info);
|
aggr_chunk.getChunkInfos().add(std::move(info));
|
||||||
|
chassert(aggr_chunk);
|
||||||
|
return aggr_chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Squashing::expandCurrentSize(size_t rows, size_t bytes)
|
Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoCollection && infos)
|
||||||
{
|
{
|
||||||
accumulated_size.rows += rows;
|
std::vector<IColumn::MutablePtr> mutable_columns = {};
|
||||||
accumulated_size.bytes += bytes;
|
size_t rows = 0;
|
||||||
}
|
for (const Chunk & chunk : input_chunks)
|
||||||
|
rows += chunk.getNumRows();
|
||||||
|
|
||||||
void Squashing::changeCurrentSize(size_t rows, size_t bytes)
|
{
|
||||||
{
|
auto & first_chunk = input_chunks[0];
|
||||||
accumulated_size.rows = rows;
|
Columns columns = first_chunk.detachColumns();
|
||||||
accumulated_size.bytes = bytes;
|
for (auto & column : columns)
|
||||||
|
{
|
||||||
|
mutable_columns.push_back(IColumn::mutate(std::move(column)));
|
||||||
|
mutable_columns.back()->reserve(rows);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above
|
||||||
|
{
|
||||||
|
Columns columns = input_chunks[i].detachColumns();
|
||||||
|
for (size_t j = 0, size = mutable_columns.size(); j < size; ++j)
|
||||||
|
{
|
||||||
|
const auto source_column = columns[j];
|
||||||
|
mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Chunk result;
|
||||||
|
result.setColumns(std::move(mutable_columns), rows);
|
||||||
|
result.setChunkInfos(infos);
|
||||||
|
result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos()));
|
||||||
|
|
||||||
|
chassert(result);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Squashing::isEnoughSize(size_t rows, size_t bytes) const
|
bool Squashing::isEnoughSize(size_t rows, size_t bytes) const
|
||||||
@ -162,4 +194,29 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const
|
|||||||
|| (min_block_size_rows && rows >= min_block_size_rows)
|
|| (min_block_size_rows && rows >= min_block_size_rows)
|
||||||
|| (min_block_size_bytes && bytes >= min_block_size_bytes);
|
|| (min_block_size_bytes && bytes >= min_block_size_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Squashing::isEnoughSize() const
|
||||||
|
{
|
||||||
|
return isEnoughSize(accumulated.getRows(), accumulated.getBytes());
|
||||||
|
};
|
||||||
|
|
||||||
|
bool Squashing::isEnoughSize(const Chunk & chunk) const
|
||||||
|
{
|
||||||
|
return isEnoughSize(chunk.getNumRows(), chunk.bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
void Squashing::CurrentData::add(Chunk && chunk)
|
||||||
|
{
|
||||||
|
rows += chunk.getNumRows();
|
||||||
|
bytes += chunk.bytes();
|
||||||
|
chunks.push_back(std::move(chunk));
|
||||||
|
}
|
||||||
|
|
||||||
|
Squashing::CurrentData Squashing::extract()
|
||||||
|
{
|
||||||
|
auto result = std::move(accumulated);
|
||||||
|
accumulated = {};
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -8,9 +8,18 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
struct ChunksToSquash : public ChunkInfo
|
class ChunksToSquash : public ChunkInfoCloneable<ChunksToSquash>
|
||||||
{
|
{
|
||||||
mutable std::vector<Chunk> chunks = {};
|
public:
|
||||||
|
ChunksToSquash() = default;
|
||||||
|
ChunksToSquash(const ChunksToSquash & other)
|
||||||
|
{
|
||||||
|
chunks.reserve(other.chunks.size());
|
||||||
|
for (const auto & chunk: other.chunks)
|
||||||
|
chunks.push_back(chunk.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Chunk> chunks = {};
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Merging consecutive passed blocks to specified minimum size.
|
/** Merging consecutive passed blocks to specified minimum size.
|
||||||
@ -36,26 +45,37 @@ public:
|
|||||||
static Chunk squash(Chunk && input_chunk);
|
static Chunk squash(Chunk && input_chunk);
|
||||||
Chunk flush();
|
Chunk flush();
|
||||||
|
|
||||||
Block header;
|
void setHeader(Block header_) { header = std::move(header_); }
|
||||||
|
const Block & getHeader() const { return header; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct CurrentSize
|
struct CurrentData
|
||||||
{
|
{
|
||||||
|
std::vector<Chunk> chunks = {};
|
||||||
size_t rows = 0;
|
size_t rows = 0;
|
||||||
size_t bytes = 0;
|
size_t bytes = 0;
|
||||||
|
|
||||||
|
explicit operator bool () const { return !chunks.empty(); }
|
||||||
|
size_t getRows() const { return rows; }
|
||||||
|
size_t getBytes() const { return bytes; }
|
||||||
|
void add(Chunk && chunk);
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<Chunk> chunks_to_merge_vec = {};
|
const size_t min_block_size_rows;
|
||||||
size_t min_block_size_rows;
|
const size_t min_block_size_bytes;
|
||||||
size_t min_block_size_bytes;
|
Block header;
|
||||||
|
|
||||||
CurrentSize accumulated_size;
|
CurrentData accumulated;
|
||||||
|
|
||||||
void expandCurrentSize(size_t rows, size_t bytes);
|
static Chunk squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoCollection && infos);
|
||||||
void changeCurrentSize(size_t rows, size_t bytes);
|
|
||||||
|
bool isEnoughSize() const;
|
||||||
bool isEnoughSize(size_t rows, size_t bytes) const;
|
bool isEnoughSize(size_t rows, size_t bytes) const;
|
||||||
|
bool isEnoughSize(const Chunk & chunk) const;
|
||||||
|
|
||||||
Chunk convertToChunk(std::vector<Chunk> && chunks) const;
|
CurrentData extract();
|
||||||
|
|
||||||
|
Chunk convertToChunk(CurrentData && data) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -544,7 +544,13 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
|
|||||||
insert_context->makeQueryContext();
|
insert_context->makeQueryContext();
|
||||||
addSettingsForQuery(insert_context, IAST::QueryKind::Insert);
|
addSettingsForQuery(insert_context, IAST::QueryKind::Insert);
|
||||||
|
|
||||||
InterpreterInsertQuery interpreter(query_ptr, insert_context);
|
InterpreterInsertQuery interpreter(
|
||||||
|
query_ptr,
|
||||||
|
insert_context,
|
||||||
|
/* allow_materialized */ false,
|
||||||
|
/* no_squash */ false,
|
||||||
|
/* no_destination */ false,
|
||||||
|
/* async_isnert */ false);
|
||||||
BlockIO io = interpreter.execute();
|
BlockIO io = interpreter.execute();
|
||||||
|
|
||||||
PushingPipelineExecutor executor(io.pipeline);
|
PushingPipelineExecutor executor(io.pipeline);
|
||||||
|
@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check for dynamic subcolums in unknown required columns.
|
/// Check for dynamic subcolumns in unknown required columns.
|
||||||
if (!unknown_required_source_columns.empty())
|
if (!unknown_required_source_columns.empty())
|
||||||
{
|
{
|
||||||
for (const NameAndTypePair & pair : source_columns_ordinary)
|
for (const NameAndTypePair & pair : source_columns_ordinary)
|
||||||
|
@ -385,7 +385,7 @@ static void maybeConvertOrdinaryDatabaseToAtomic(ContextMutablePtr context, cons
|
|||||||
if (database->getEngineName() != "Ordinary")
|
if (database->getEngineName() != "Ordinary")
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Strings permanently_detached_tables = database->getNamesOfPermanentlyDetachedTables();
|
const Strings permanently_detached_tables = database->getNamesOfPermanentlyDetachedTables();
|
||||||
if (!permanently_detached_tables.empty())
|
if (!permanently_detached_tables.empty())
|
||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot automatically convert database {} from Ordinary to Atomic, "
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot automatically convert database {} from Ordinary to Atomic, "
|
||||||
|
@ -19,14 +19,6 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns
|
|||||||
checkNumRowsIsConsistent();
|
checkNumRowsIsConsistent();
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
|
|
||||||
: columns(std::move(columns_))
|
|
||||||
, num_rows(num_rows_)
|
|
||||||
, chunk_info(std::move(chunk_info_))
|
|
||||||
{
|
|
||||||
checkNumRowsIsConsistent();
|
|
||||||
}
|
|
||||||
|
|
||||||
static Columns unmuteColumns(MutableColumns && mutable_columns)
|
static Columns unmuteColumns(MutableColumns && mutable_columns)
|
||||||
{
|
{
|
||||||
Columns columns;
|
Columns columns;
|
||||||
@ -43,17 +35,11 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_)
|
|||||||
checkNumRowsIsConsistent();
|
checkNumRowsIsConsistent();
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
|
|
||||||
: columns(unmuteColumns(std::move(columns_)))
|
|
||||||
, num_rows(num_rows_)
|
|
||||||
, chunk_info(std::move(chunk_info_))
|
|
||||||
{
|
|
||||||
checkNumRowsIsConsistent();
|
|
||||||
}
|
|
||||||
|
|
||||||
Chunk Chunk::clone() const
|
Chunk Chunk::clone() const
|
||||||
{
|
{
|
||||||
return Chunk(getColumns(), getNumRows(), chunk_info);
|
auto tmp = Chunk(getColumns(), getNumRows());
|
||||||
|
tmp.setChunkInfos(chunk_infos.clone());
|
||||||
|
return tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Chunk::setColumns(Columns columns_, UInt64 num_rows_)
|
void Chunk::setColumns(Columns columns_, UInt64 num_rows_)
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <Common/CollectionOfDerived.h>
|
||||||
#include <Columns/IColumn.h>
|
#include <Columns/IColumn.h>
|
||||||
#include <unordered_map>
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -9,11 +11,29 @@ namespace DB
|
|||||||
class ChunkInfo
|
class ChunkInfo
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual ~ChunkInfo() = default;
|
using Ptr = std::shared_ptr<ChunkInfo>;
|
||||||
|
|
||||||
ChunkInfo() = default;
|
ChunkInfo() = default;
|
||||||
|
ChunkInfo(const ChunkInfo&) = default;
|
||||||
|
ChunkInfo(ChunkInfo&&) = default;
|
||||||
|
|
||||||
|
virtual Ptr clone() const = 0;
|
||||||
|
virtual ~ChunkInfo() = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
using ChunkInfoPtr = std::shared_ptr<const ChunkInfo>;
|
|
||||||
|
template<class Derived>
|
||||||
|
class ChunkInfoCloneable : public ChunkInfo
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ChunkInfoCloneable() = default;
|
||||||
|
ChunkInfoCloneable(const ChunkInfoCloneable & other) = default;
|
||||||
|
|
||||||
|
Ptr clone() const override
|
||||||
|
{
|
||||||
|
return std::static_pointer_cast<ChunkInfo>(std::make_shared<Derived>(*static_cast<const Derived*>(this)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Chunk is a list of columns with the same length.
|
* Chunk is a list of columns with the same length.
|
||||||
@ -32,26 +52,26 @@ using ChunkInfoPtr = std::shared_ptr<const ChunkInfo>;
|
|||||||
class Chunk
|
class Chunk
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
using ChunkInfoCollection = CollectionOfDerivedItems<ChunkInfo>;
|
||||||
|
|
||||||
Chunk() = default;
|
Chunk() = default;
|
||||||
Chunk(const Chunk & other) = delete;
|
Chunk(const Chunk & other) = delete;
|
||||||
Chunk(Chunk && other) noexcept
|
Chunk(Chunk && other) noexcept
|
||||||
: columns(std::move(other.columns))
|
: columns(std::move(other.columns))
|
||||||
, num_rows(other.num_rows)
|
, num_rows(other.num_rows)
|
||||||
, chunk_info(std::move(other.chunk_info))
|
, chunk_infos(std::move(other.chunk_infos))
|
||||||
{
|
{
|
||||||
other.num_rows = 0;
|
other.num_rows = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk(Columns columns_, UInt64 num_rows_);
|
Chunk(Columns columns_, UInt64 num_rows_);
|
||||||
Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);
|
|
||||||
Chunk(MutableColumns columns_, UInt64 num_rows_);
|
Chunk(MutableColumns columns_, UInt64 num_rows_);
|
||||||
Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);
|
|
||||||
|
|
||||||
Chunk & operator=(const Chunk & other) = delete;
|
Chunk & operator=(const Chunk & other) = delete;
|
||||||
Chunk & operator=(Chunk && other) noexcept
|
Chunk & operator=(Chunk && other) noexcept
|
||||||
{
|
{
|
||||||
columns = std::move(other.columns);
|
columns = std::move(other.columns);
|
||||||
chunk_info = std::move(other.chunk_info);
|
chunk_infos = std::move(other.chunk_infos);
|
||||||
num_rows = other.num_rows;
|
num_rows = other.num_rows;
|
||||||
other.num_rows = 0;
|
other.num_rows = 0;
|
||||||
return *this;
|
return *this;
|
||||||
@ -62,15 +82,15 @@ public:
|
|||||||
void swap(Chunk & other) noexcept
|
void swap(Chunk & other) noexcept
|
||||||
{
|
{
|
||||||
columns.swap(other.columns);
|
columns.swap(other.columns);
|
||||||
chunk_info.swap(other.chunk_info);
|
|
||||||
std::swap(num_rows, other.num_rows);
|
std::swap(num_rows, other.num_rows);
|
||||||
|
chunk_infos.swap(other.chunk_infos);
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear()
|
void clear()
|
||||||
{
|
{
|
||||||
num_rows = 0;
|
num_rows = 0;
|
||||||
columns.clear();
|
columns.clear();
|
||||||
chunk_info.reset();
|
chunk_infos.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
const Columns & getColumns() const { return columns; }
|
const Columns & getColumns() const { return columns; }
|
||||||
@ -81,9 +101,9 @@ public:
|
|||||||
/** Get empty columns with the same types as in block. */
|
/** Get empty columns with the same types as in block. */
|
||||||
MutableColumns cloneEmptyColumns() const;
|
MutableColumns cloneEmptyColumns() const;
|
||||||
|
|
||||||
const ChunkInfoPtr & getChunkInfo() const { return chunk_info; }
|
ChunkInfoCollection & getChunkInfos() { return chunk_infos; }
|
||||||
bool hasChunkInfo() const { return chunk_info != nullptr; }
|
const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; }
|
||||||
void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); }
|
void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); }
|
||||||
|
|
||||||
UInt64 getNumRows() const { return num_rows; }
|
UInt64 getNumRows() const { return num_rows; }
|
||||||
UInt64 getNumColumns() const { return columns.size(); }
|
UInt64 getNumColumns() const { return columns.size(); }
|
||||||
@ -107,7 +127,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
Columns columns;
|
Columns columns;
|
||||||
UInt64 num_rows = 0;
|
UInt64 num_rows = 0;
|
||||||
ChunkInfoPtr chunk_info;
|
ChunkInfoCollection chunk_infos;
|
||||||
|
|
||||||
void checkNumRowsIsConsistent();
|
void checkNumRowsIsConsistent();
|
||||||
};
|
};
|
||||||
@ -117,11 +137,15 @@ using Chunks = std::vector<Chunk>;
|
|||||||
/// AsyncInsert needs two kinds of information:
|
/// AsyncInsert needs two kinds of information:
|
||||||
/// - offsets of different sub-chunks
|
/// - offsets of different sub-chunks
|
||||||
/// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`.
|
/// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`.
|
||||||
class AsyncInsertInfo : public ChunkInfo
|
class AsyncInsertInfo : public ChunkInfoCloneable<AsyncInsertInfo>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
AsyncInsertInfo() = default;
|
AsyncInsertInfo() = default;
|
||||||
explicit AsyncInsertInfo(const std::vector<size_t> & offsets_, const std::vector<String> & tokens_) : offsets(offsets_), tokens(tokens_) {}
|
AsyncInsertInfo(const AsyncInsertInfo & other) = default;
|
||||||
|
AsyncInsertInfo(const std::vector<size_t> & offsets_, const std::vector<String> & tokens_)
|
||||||
|
: offsets(offsets_)
|
||||||
|
, tokens(tokens_)
|
||||||
|
{}
|
||||||
|
|
||||||
std::vector<size_t> offsets;
|
std::vector<size_t> offsets;
|
||||||
std::vector<String> tokens;
|
std::vector<String> tokens;
|
||||||
@ -130,9 +154,11 @@ public:
|
|||||||
using AsyncInsertInfoPtr = std::shared_ptr<AsyncInsertInfo>;
|
using AsyncInsertInfoPtr = std::shared_ptr<AsyncInsertInfo>;
|
||||||
|
|
||||||
/// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults.
|
/// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults.
|
||||||
class ChunkMissingValues : public ChunkInfo
|
class ChunkMissingValues : public ChunkInfoCloneable<ChunkMissingValues>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
ChunkMissingValues(const ChunkMissingValues & other) = default;
|
||||||
|
|
||||||
using RowsBitMask = std::vector<bool>; /// a bit per row for a column
|
using RowsBitMask = std::vector<bool>; /// a bit per row for a column
|
||||||
|
|
||||||
const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
|
const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
|
||||||
|
@ -147,13 +147,10 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds)
|
|||||||
|
|
||||||
block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());
|
block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());
|
||||||
|
|
||||||
if (auto chunk_info = chunk.getChunkInfo())
|
if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
||||||
{
|
{
|
||||||
if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(chunk_info.get()))
|
block.info.bucket_num = agg_info->bucket_num;
|
||||||
{
|
block.info.is_overflows = agg_info->is_overflows;
|
||||||
block.info.bucket_num = agg_info->bucket_num;
|
|
||||||
block.info.is_overflows = agg_info->is_overflows;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -73,13 +73,10 @@ bool PullingPipelineExecutor::pull(Block & block)
|
|||||||
}
|
}
|
||||||
|
|
||||||
block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());
|
block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());
|
||||||
if (auto chunk_info = chunk.getChunkInfo())
|
if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
||||||
{
|
{
|
||||||
if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(chunk_info.get()))
|
block.info.bucket_num = agg_info->bucket_num;
|
||||||
{
|
block.info.is_overflows = agg_info->is_overflows;
|
||||||
block.info.bucket_num = agg_info->bucket_num;
|
|
||||||
block.info.is_overflows = agg_info->is_overflows;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -179,7 +179,9 @@ void ParquetBlockOutputFormat::consume(Chunk chunk)
|
|||||||
columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count);
|
columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count);
|
||||||
|
|
||||||
Chunks piece;
|
Chunks piece;
|
||||||
piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo());
|
piece.emplace_back(std::move(columns), count);
|
||||||
|
piece.back().setChunkInfos(concatenated.getChunkInfos());
|
||||||
|
|
||||||
writeRowGroup(std::move(piece));
|
writeRowGroup(std::move(piece));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,8 +8,9 @@ namespace ErrorCodes
|
|||||||
}
|
}
|
||||||
|
|
||||||
IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header)
|
IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header)
|
||||||
: IProcessor({std::move(input_header)}, {std::move(output_header)}),
|
: IProcessor({std::move(input_header)}, {std::move(output_header)})
|
||||||
input(inputs.front()), output(outputs.front())
|
, input(inputs.front())
|
||||||
|
, output(outputs.front())
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,13 +53,11 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num
|
|||||||
if (!input.chunk.hasRows())
|
if (!input.chunk.hasRows())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const auto & info = input.chunk.getChunkInfo();
|
if (input.chunk.getChunkInfos().empty())
|
||||||
if (!info)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm");
|
||||||
|
|
||||||
Int64 allocated_bytes = 0;
|
Int64 allocated_bytes = 0;
|
||||||
/// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator.
|
if (auto arenas_info = input.chunk.getChunkInfos().get<ChunkInfoWithAllocatedBytes>())
|
||||||
if (const auto * arenas_info = typeid_cast<const ChunkInfoWithAllocatedBytes *>(info.get()))
|
|
||||||
allocated_bytes = arenas_info->allocated_bytes;
|
allocated_bytes = arenas_info->allocated_bytes;
|
||||||
|
|
||||||
states[source_num] = State{input.chunk, description, allocated_bytes};
|
states[source_num] = State{input.chunk, description, allocated_bytes};
|
||||||
@ -136,7 +134,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge()
|
|||||||
info->chunk_num = chunk_num++;
|
info->chunk_num = chunk_num++;
|
||||||
|
|
||||||
Chunk chunk;
|
Chunk chunk;
|
||||||
chunk.setChunkInfo(std::move(info));
|
chunk.getChunkInfos().add(std::move(info));
|
||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -163,7 +161,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation()
|
|||||||
chunks.emplace_back(std::move(new_columns), current_rows);
|
chunks.emplace_back(std::move(new_columns), current_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
chunks.back().setChunkInfo(std::make_shared<AggregatedChunkInfo>());
|
chunks.back().getChunkInfos().add(std::make_shared<AggregatedChunkInfo>());
|
||||||
states[i].current_row = states[i].to_row;
|
states[i].current_row = states[i].to_row;
|
||||||
|
|
||||||
/// We assume that sizes in bytes of rows are almost the same.
|
/// We assume that sizes in bytes of rows are almost the same.
|
||||||
|
@ -6,18 +6,22 @@ namespace DB
|
|||||||
{
|
{
|
||||||
|
|
||||||
/// To carry part level if chunk is produced by a merge tree source
|
/// To carry part level if chunk is produced by a merge tree source
|
||||||
class MergeTreePartLevelInfo : public ChunkInfo
|
class MergeTreePartLevelInfo : public ChunkInfoCloneable<MergeTreePartLevelInfo>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
MergeTreePartLevelInfo() = delete;
|
MergeTreePartLevelInfo() = delete;
|
||||||
explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { }
|
explicit MergeTreePartLevelInfo(ssize_t part_level)
|
||||||
|
: origin_merge_tree_part_level(part_level)
|
||||||
|
{ }
|
||||||
|
MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default;
|
||||||
|
|
||||||
size_t origin_merge_tree_part_level = 0;
|
size_t origin_merge_tree_part_level = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline size_t getPartLevelFromChunk(const Chunk & chunk)
|
inline size_t getPartLevelFromChunk(const Chunk & chunk)
|
||||||
{
|
{
|
||||||
const auto & info = chunk.getChunkInfo();
|
const auto part_level_info = chunk.getChunkInfos().get<MergeTreePartLevelInfo>();
|
||||||
if (const auto * part_level_info = typeid_cast<const MergeTreePartLevelInfo *>(info.get()))
|
if (part_level_info)
|
||||||
return part_level_info->origin_merge_tree_part_level;
|
return part_level_info->origin_merge_tree_part_level;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false)
|
static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false)
|
||||||
{
|
{
|
||||||
chunk->setChunkInfo(std::make_shared<ChunkSelectFinalIndices>(std::move(chunk->replace_final_selection)));
|
chunk->getChunkInfos().add(std::make_shared<ChunkSelectFinalIndices>(std::move(chunk->replace_final_selection)));
|
||||||
return IMergingAlgorithm::Status(std::move(*chunk), finished);
|
return IMergingAlgorithm::Status(std::move(*chunk), finished);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include <Processors/Merges/Algorithms/MergedData.h>
|
#include <Processors/Merges/Algorithms/MergedData.h>
|
||||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||||
#include <Processors/Merges/Algorithms/RowRef.h>
|
#include <Processors/Merges/Algorithms/RowRef.h>
|
||||||
|
#include <Processors/Chunk.h>
|
||||||
|
|
||||||
namespace Poco
|
namespace Poco
|
||||||
{
|
{
|
||||||
@ -14,11 +15,13 @@ namespace DB
|
|||||||
|
|
||||||
/** Use in skipping final to keep list of indices of selected row after merging final
|
/** Use in skipping final to keep list of indices of selected row after merging final
|
||||||
*/
|
*/
|
||||||
struct ChunkSelectFinalIndices : public ChunkInfo
|
struct ChunkSelectFinalIndices : public ChunkInfoCloneable<ChunkSelectFinalIndices>
|
||||||
{
|
{
|
||||||
|
explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_);
|
||||||
|
ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default;
|
||||||
|
|
||||||
const ColumnPtr column_holder;
|
const ColumnPtr column_holder;
|
||||||
const ColumnUInt64 * select_final_indices = nullptr;
|
const ColumnUInt64 * select_final_indices = nullptr;
|
||||||
explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Merges several sorted inputs into one.
|
/** Merges several sorted inputs into one.
|
||||||
|
@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare()
|
|||||||
bool is_port_full = !output.canPush();
|
bool is_port_full = !output.canPush();
|
||||||
|
|
||||||
/// Push if has data.
|
/// Push if has data.
|
||||||
if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full)
|
if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full)
|
||||||
output.push(std::move(state.output_chunk));
|
output.push(std::move(state.output_chunk));
|
||||||
|
|
||||||
if (!is_initialized)
|
if (!is_initialized)
|
||||||
|
@ -129,7 +129,7 @@ public:
|
|||||||
|
|
||||||
IMergingAlgorithm::Status status = algorithm.merge();
|
IMergingAlgorithm::Status status = algorithm.merge();
|
||||||
|
|
||||||
if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo())
|
if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty())
|
||||||
{
|
{
|
||||||
// std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl;
|
// std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl;
|
||||||
state.output_chunk = std::move(status.chunk);
|
state.output_chunk = std::move(status.chunk);
|
||||||
|
@ -20,7 +20,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
String getName() const override { return "RemoteSink"; }
|
String getName() const override { return "RemoteSink"; }
|
||||||
void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); }
|
void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); }
|
||||||
void onFinish() override { RemoteInserter::onFinish(); }
|
void onFinish() override { RemoteInserter::onFinish(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -15,9 +15,8 @@ void SinkToStorage::onConsume(Chunk chunk)
|
|||||||
*/
|
*/
|
||||||
Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns()));
|
Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns()));
|
||||||
|
|
||||||
consume(chunk.clone());
|
consume(chunk);
|
||||||
if (!lastBlockIsDuplicate())
|
cur_chunk = std::move(chunk);
|
||||||
cur_chunk = std::move(chunk);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SinkToStorage::GenerateResult SinkToStorage::onGenerate()
|
SinkToStorage::GenerateResult SinkToStorage::onGenerate()
|
||||||
|
@ -18,8 +18,7 @@ public:
|
|||||||
void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }
|
void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void consume(Chunk chunk) = 0;
|
virtual void consume(Chunk & chunk) = 0;
|
||||||
virtual bool lastBlockIsDuplicate() const { return false; }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<TableLockHolder> table_locks;
|
std::vector<TableLockHolder> table_locks;
|
||||||
@ -38,7 +37,7 @@ class NullSinkToStorage : public SinkToStorage
|
|||||||
public:
|
public:
|
||||||
using SinkToStorage::SinkToStorage;
|
using SinkToStorage::SinkToStorage;
|
||||||
std::string getName() const override { return "NullSinkToStorage"; }
|
std::string getName() const override { return "NullSinkToStorage"; }
|
||||||
void consume(Chunk) override {}
|
void consume(Chunk &) override {}
|
||||||
};
|
};
|
||||||
|
|
||||||
using SinkPtr = std::shared_ptr<SinkToStorage>;
|
using SinkPtr = std::shared_ptr<SinkToStorage>;
|
||||||
|
@ -43,7 +43,10 @@ protected:
|
|||||||
info->bucket_num = res.info.bucket_num;
|
info->bucket_num = res.info.bucket_num;
|
||||||
info->is_overflows = res.info.is_overflows;
|
info->is_overflows = res.info.is_overflows;
|
||||||
|
|
||||||
return Chunk(res.getColumns(), res.rows(), std::move(info));
|
auto chunk = Chunk(res.getColumns(), res.rows());
|
||||||
|
chunk.getChunkInfos().add(std::move(info));
|
||||||
|
|
||||||
|
return chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -176,7 +176,7 @@ std::optional<Chunk> RemoteSource::tryGenerate()
|
|||||||
auto info = std::make_shared<AggregatedChunkInfo>();
|
auto info = std::make_shared<AggregatedChunkInfo>();
|
||||||
info->bucket_num = block.info.bucket_num;
|
info->bucket_num = block.info.bucket_num;
|
||||||
info->is_overflows = block.info.is_overflows;
|
info->is_overflows = block.info.is_overflows;
|
||||||
chunk.setChunkInfo(std::move(info));
|
chunk.getChunkInfos().add(std::move(info));
|
||||||
}
|
}
|
||||||
|
|
||||||
return chunk;
|
return chunk;
|
||||||
|
@ -5,7 +5,9 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {}
|
SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows())
|
SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows())
|
||||||
{
|
{
|
||||||
@ -20,7 +22,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp
|
|||||||
auto info = std::make_shared<AggregatedChunkInfo>();
|
auto info = std::make_shared<AggregatedChunkInfo>();
|
||||||
info->bucket_num = data.info.bucket_num;
|
info->bucket_num = data.info.bucket_num;
|
||||||
info->is_overflows = data.info.is_overflows;
|
info->is_overflows = data.info.is_overflows;
|
||||||
chunk.setChunkInfo(std::move(info));
|
chunk.getChunkInfos().add(std::move(info));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate()
|
|||||||
variants.aggregates_pool = variants.aggregates_pools.at(0).get();
|
variants.aggregates_pool = variants.aggregates_pools.at(0).get();
|
||||||
|
|
||||||
/// Pass info about used memory by aggregate functions further.
|
/// Pass info about used memory by aggregate functions further.
|
||||||
to_push_chunk.setChunkInfo(std::make_shared<ChunkInfoWithAllocatedBytes>(cur_block_bytes));
|
to_push_chunk.getChunkInfos().add(std::make_shared<ChunkInfoWithAllocatedBytes>(cur_block_bytes));
|
||||||
|
|
||||||
cur_block_bytes = 0;
|
cur_block_bytes = 0;
|
||||||
cur_block_size = 0;
|
cur_block_size = 0;
|
||||||
@ -351,11 +351,12 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati
|
|||||||
void FinalizeAggregatedTransform::transform(Chunk & chunk)
|
void FinalizeAggregatedTransform::transform(Chunk & chunk)
|
||||||
{
|
{
|
||||||
if (params->final)
|
if (params->final)
|
||||||
finalizeChunk(chunk, aggregates_mask);
|
|
||||||
else if (!chunk.getChunkInfo())
|
|
||||||
{
|
{
|
||||||
auto info = std::make_shared<AggregatedChunkInfo>();
|
finalizeChunk(chunk, aggregates_mask);
|
||||||
chunk.setChunkInfo(std::move(info));
|
}
|
||||||
|
else if (!chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
||||||
|
{
|
||||||
|
chunk.getChunkInfos().add(std::make_shared<AggregatedChunkInfo>());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include <Processors/ISimpleTransform.h>
|
#include <Processors/ISimpleTransform.h>
|
||||||
#include <Processors/Transforms/AggregatingTransform.h>
|
#include <Processors/Transforms/AggregatingTransform.h>
|
||||||
#include <Processors/Transforms/finalizeChunk.h>
|
#include <Processors/Transforms/finalizeChunk.h>
|
||||||
|
#include <Processors/Chunk.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -12,10 +13,12 @@ namespace DB
|
|||||||
struct InputOrderInfo;
|
struct InputOrderInfo;
|
||||||
using InputOrderInfoPtr = std::shared_ptr<const InputOrderInfo>;
|
using InputOrderInfoPtr = std::shared_ptr<const InputOrderInfo>;
|
||||||
|
|
||||||
struct ChunkInfoWithAllocatedBytes : public ChunkInfo
|
struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable<ChunkInfoWithAllocatedBytes>
|
||||||
{
|
{
|
||||||
|
ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default;
|
||||||
explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_)
|
explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_)
|
||||||
: allocated_bytes(allocated_bytes_) {}
|
: allocated_bytes(allocated_bytes_) {}
|
||||||
|
|
||||||
Int64 allocated_bytes;
|
Int64 allocated_bytes;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block)
|
|||||||
|
|
||||||
UInt64 num_rows = block.rows();
|
UInt64 num_rows = block.rows();
|
||||||
Chunk chunk(block.getColumns(), num_rows);
|
Chunk chunk(block.getColumns(), num_rows);
|
||||||
chunk.setChunkInfo(std::move(info));
|
chunk.getChunkInfos().add(std::move(info));
|
||||||
|
|
||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
@ -44,15 +44,11 @@ namespace
|
|||||||
{
|
{
|
||||||
const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk)
|
const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk)
|
||||||
{
|
{
|
||||||
const auto & info = chunk.getChunkInfo();
|
auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
|
||||||
if (!info)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk.");
|
|
||||||
|
|
||||||
const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
|
|
||||||
if (!agg_info)
|
if (!agg_info)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo.");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo.");
|
||||||
|
|
||||||
return agg_info;
|
return agg_info.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reads chunks from file in native format. Provide chunks with aggregation info.
|
/// Reads chunks from file in native format. Provide chunks with aggregation info.
|
||||||
@ -210,11 +206,7 @@ private:
|
|||||||
|
|
||||||
void process(Chunk && chunk)
|
void process(Chunk && chunk)
|
||||||
{
|
{
|
||||||
if (!chunk.hasChunkInfo())
|
auto chunks_to_merge = chunk.getChunkInfos().get<ChunksToMerge>();
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName());
|
|
||||||
|
|
||||||
const auto & info = chunk.getChunkInfo();
|
|
||||||
const auto * chunks_to_merge = typeid_cast<const ChunksToMerge *>(info.get());
|
|
||||||
if (!chunks_to_merge)
|
if (!chunks_to_merge)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName());
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName());
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#include <Compression/CompressedReadBuffer.h>
|
#include <Compression/CompressedReadBuffer.h>
|
||||||
#include <IO/ReadBufferFromFile.h>
|
#include <IO/ReadBufferFromFile.h>
|
||||||
#include <Interpreters/Aggregator.h>
|
#include <Interpreters/Aggregator.h>
|
||||||
|
#include <Processors/Chunk.h>
|
||||||
#include <Processors/IAccumulatingTransform.h>
|
#include <Processors/IAccumulatingTransform.h>
|
||||||
#include <Common/Stopwatch.h>
|
#include <Common/Stopwatch.h>
|
||||||
#include <Common/setThreadName.h>
|
#include <Common/setThreadName.h>
|
||||||
@ -19,7 +20,7 @@ namespace CurrentMetrics
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
class AggregatedChunkInfo : public ChunkInfo
|
class AggregatedChunkInfo : public ChunkInfoCloneable<AggregatedChunkInfo>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
bool is_overflows = false;
|
bool is_overflows = false;
|
||||||
|
@ -27,18 +27,12 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
ExceptionKeepingTransform::work();
|
ExceptionKeepingTransform::work();
|
||||||
if (finish_chunk)
|
|
||||||
{
|
|
||||||
data.chunk = std::move(finish_chunk);
|
|
||||||
ready_output = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void onConsume(Chunk chunk) override
|
void onConsume(Chunk chunk) override
|
||||||
{
|
{
|
||||||
if (auto res_chunk = DB::Squashing::squash(std::move(chunk)))
|
cur_chunk = Squashing::squash(std::move(chunk));
|
||||||
cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GenerateResult onGenerate() override
|
GenerateResult onGenerate() override
|
||||||
@ -48,16 +42,10 @@ protected:
|
|||||||
res.is_done = true;
|
res.is_done = true;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
void onFinish() override
|
|
||||||
{
|
|
||||||
auto chunk = DB::Squashing::squash({});
|
|
||||||
finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows());
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Squashing squashing;
|
Squashing squashing;
|
||||||
Chunk cur_chunk;
|
Chunk cur_chunk;
|
||||||
Chunk finish_chunk;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
|
|
||||||
#include <Interpreters/ProcessList.h>
|
|
||||||
#include <Processors/Transforms/CountingTransform.h>
|
#include <Processors/Transforms/CountingTransform.h>
|
||||||
|
|
||||||
|
#include <IO/Progress.h>
|
||||||
|
#include <Interpreters/ProcessList.h>
|
||||||
#include <Common/ProfileEvents.h>
|
#include <Common/ProfileEvents.h>
|
||||||
#include <Common/ThreadStatus.h>
|
#include <Common/ThreadStatus.h>
|
||||||
|
|
||||||
|
238
src/Processors/Transforms/DeduplicationTokenTransforms.cpp
Normal file
238
src/Processors/Transforms/DeduplicationTokenTransforms.cpp
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
#include <Processors/Transforms/DeduplicationTokenTransforms.h>
|
||||||
|
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
|
#include <Common/logger_useful.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
#include <Common/SipHash.h>
|
||||||
|
|
||||||
|
|
||||||
|
#include <fmt/core.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RestoreChunkInfosTransform::transform(Chunk & chunk)
|
||||||
|
{
|
||||||
|
chunk.getChunkInfos().append(chunk_infos.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace DeduplicationToken
|
||||||
|
{
|
||||||
|
|
||||||
|
String TokenInfo::getToken() const
|
||||||
|
{
|
||||||
|
if (!isDefined())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken());
|
||||||
|
|
||||||
|
return getTokenImpl();
|
||||||
|
}
|
||||||
|
|
||||||
|
String TokenInfo::getTokenImpl() const
|
||||||
|
{
|
||||||
|
String result;
|
||||||
|
result.reserve(getTotalSize());
|
||||||
|
|
||||||
|
for (const auto & part : parts)
|
||||||
|
{
|
||||||
|
if (!result.empty())
|
||||||
|
result.append(":");
|
||||||
|
result.append(part);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
String TokenInfo::debugToken() const
|
||||||
|
{
|
||||||
|
return getTokenImpl();
|
||||||
|
}
|
||||||
|
|
||||||
|
void TokenInfo::addChunkHash(String part)
|
||||||
|
{
|
||||||
|
if (stage == UNDEFINED && empty())
|
||||||
|
stage = DEFINE_SOURCE_WITH_HASHES;
|
||||||
|
|
||||||
|
if (stage != DEFINE_SOURCE_WITH_HASHES)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
|
||||||
|
|
||||||
|
addTokenPart(std::move(part));
|
||||||
|
}
|
||||||
|
|
||||||
|
void TokenInfo::finishChunkHashes()
|
||||||
|
{
|
||||||
|
if (stage == UNDEFINED && empty())
|
||||||
|
stage = DEFINE_SOURCE_WITH_HASHES;
|
||||||
|
|
||||||
|
if (stage != DEFINE_SOURCE_WITH_HASHES)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
|
||||||
|
|
||||||
|
stage = DEFINED;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TokenInfo::setUserToken(const String & token)
|
||||||
|
{
|
||||||
|
if (stage == UNDEFINED && empty())
|
||||||
|
stage = DEFINE_SOURCE_USER_TOKEN;
|
||||||
|
|
||||||
|
if (stage != DEFINE_SOURCE_USER_TOKEN)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
|
||||||
|
|
||||||
|
addTokenPart(fmt::format("user-token-{}", token));
|
||||||
|
}
|
||||||
|
|
||||||
|
void TokenInfo::setSourceWithUserToken(size_t block_number)
|
||||||
|
{
|
||||||
|
if (stage != DEFINE_SOURCE_USER_TOKEN)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
|
||||||
|
|
||||||
|
addTokenPart(fmt::format("source-number-{}", block_number));
|
||||||
|
|
||||||
|
stage = DEFINED;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TokenInfo::setViewID(const String & id)
|
||||||
|
{
|
||||||
|
if (stage == DEFINED)
|
||||||
|
stage = DEFINE_VIEW;
|
||||||
|
|
||||||
|
if (stage != DEFINE_VIEW)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
|
||||||
|
|
||||||
|
addTokenPart(fmt::format("view-id-{}", id));
|
||||||
|
}
|
||||||
|
|
||||||
|
void TokenInfo::setViewBlockNumber(size_t block_number)
|
||||||
|
{
|
||||||
|
if (stage != DEFINE_VIEW)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken());
|
||||||
|
|
||||||
|
addTokenPart(fmt::format("view-block-{}", block_number));
|
||||||
|
|
||||||
|
stage = DEFINED;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TokenInfo::reset()
|
||||||
|
{
|
||||||
|
stage = UNDEFINED;
|
||||||
|
parts.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void TokenInfo::addTokenPart(String part)
|
||||||
|
{
|
||||||
|
parts.push_back(std::move(part));
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t TokenInfo::getTotalSize() const
|
||||||
|
{
|
||||||
|
if (parts.empty())
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
size_t size = 0;
|
||||||
|
for (const auto & part : parts)
|
||||||
|
size += part.size();
|
||||||
|
|
||||||
|
// we reserve more size here to be able to add delimenter between parts.
|
||||||
|
return size + parts.size() - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||||
|
void CheckTokenTransform::transform(Chunk & chunk)
|
||||||
|
{
|
||||||
|
auto token_info = chunk.getChunkInfos().get<TokenInfo>();
|
||||||
|
|
||||||
|
if (!token_info)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_TEST(log, "debug: {}, token: {}, columns {} rows {}", debug, token_info->debugToken(), chunk.getNumColumns(), chunk.getNumRows());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk)
|
||||||
|
{
|
||||||
|
SipHash hash;
|
||||||
|
for (const auto & colunm : chunk.getColumns())
|
||||||
|
colunm->updateHashFast(hash);
|
||||||
|
|
||||||
|
const auto hash_value = hash.get128();
|
||||||
|
return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DefineSourceWithChunkHashTransform::transform(Chunk & chunk)
|
||||||
|
{
|
||||||
|
auto token_info = chunk.getChunkInfos().get<TokenInfo>();
|
||||||
|
|
||||||
|
if (!token_info)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform");
|
||||||
|
|
||||||
|
if (token_info->isDefined())
|
||||||
|
return;
|
||||||
|
|
||||||
|
token_info->addChunkHash(getChunkHash(chunk));
|
||||||
|
token_info->finishChunkHashes();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetUserTokenTransform::transform(Chunk & chunk)
|
||||||
|
{
|
||||||
|
auto token_info = chunk.getChunkInfos().get<TokenInfo>();
|
||||||
|
if (!token_info)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"TokenInfo is expected for consumed chunk in SetUserTokenTransform");
|
||||||
|
token_info->setUserToken(user_token);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetSourceBlockNumberTransform::transform(Chunk & chunk)
|
||||||
|
{
|
||||||
|
auto token_info = chunk.getChunkInfos().get<TokenInfo>();
|
||||||
|
if (!token_info)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform");
|
||||||
|
token_info->setSourceWithUserToken(block_number++);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetViewIDTransform::transform(Chunk & chunk)
|
||||||
|
{
|
||||||
|
auto token_info = chunk.getChunkInfos().get<TokenInfo>();
|
||||||
|
if (!token_info)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"TokenInfo is expected for consumed chunk in SetViewIDTransform");
|
||||||
|
token_info->setViewID(view_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetViewBlockNumberTransform::transform(Chunk & chunk)
|
||||||
|
{
|
||||||
|
auto token_info = chunk.getChunkInfos().get<TokenInfo>();
|
||||||
|
if (!token_info)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform");
|
||||||
|
token_info->setViewBlockNumber(block_number++);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResetTokenTransform::transform(Chunk & chunk)
|
||||||
|
{
|
||||||
|
auto token_info = chunk.getChunkInfos().get<TokenInfo>();
|
||||||
|
if (!token_info)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"TokenInfo is expected for consumed chunk in ResetTokenTransform");
|
||||||
|
|
||||||
|
token_info->reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
237
src/Processors/Transforms/DeduplicationTokenTransforms.h
Normal file
237
src/Processors/Transforms/DeduplicationTokenTransforms.h
Normal file
@ -0,0 +1,237 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Processors/Chunk.h>
|
||||||
|
#include <Processors/ISimpleTransform.h>
|
||||||
|
|
||||||
|
#include <base/defines.h>
|
||||||
|
#include "Common/Logger.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
class RestoreChunkInfosTransform : public ISimpleTransform
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_)
|
||||||
|
: ISimpleTransform(header_, header_, true)
|
||||||
|
, chunk_infos(std::move(chunk_infos_))
|
||||||
|
{}
|
||||||
|
|
||||||
|
String getName() const override { return "RestoreChunkInfosTransform"; }
|
||||||
|
|
||||||
|
void transform(Chunk & chunk) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Chunk::ChunkInfoCollection chunk_infos;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
namespace DeduplicationToken
|
||||||
|
{
|
||||||
|
class TokenInfo : public ChunkInfoCloneable<TokenInfo>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TokenInfo() = default;
|
||||||
|
TokenInfo(const TokenInfo & other) = default;
|
||||||
|
|
||||||
|
String getToken() const;
|
||||||
|
String debugToken() const;
|
||||||
|
|
||||||
|
bool empty() const { return parts.empty(); }
|
||||||
|
|
||||||
|
bool isDefined() const { return stage == DEFINED; }
|
||||||
|
|
||||||
|
void addChunkHash(String part);
|
||||||
|
void finishChunkHashes();
|
||||||
|
|
||||||
|
void setUserToken(const String & token);
|
||||||
|
void setSourceWithUserToken(size_t block_number);
|
||||||
|
|
||||||
|
void setViewID(const String & id);
|
||||||
|
void setViewBlockNumber(size_t block_number);
|
||||||
|
|
||||||
|
void reset();
|
||||||
|
|
||||||
|
private:
|
||||||
|
String getTokenImpl() const;
|
||||||
|
|
||||||
|
void addTokenPart(String part);
|
||||||
|
size_t getTotalSize() const;
|
||||||
|
|
||||||
|
/* Token has to be prepared in a particular order.
|
||||||
|
* BuildingStage ensures that token is expanded according the following order.
|
||||||
|
* Firstly token is expanded with information about the source.
|
||||||
|
* It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number.
|
||||||
|
*
|
||||||
|
* transition // method
|
||||||
|
* UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash
|
||||||
|
* DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash
|
||||||
|
* DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes
|
||||||
|
*
|
||||||
|
* transition // method
|
||||||
|
* UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken
|
||||||
|
* DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken
|
||||||
|
*
|
||||||
|
* After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views.
|
||||||
|
*
|
||||||
|
* transition // method
|
||||||
|
* DEFINED -> DEFINE_VIEW // setViewID
|
||||||
|
* DEFINE_VIEW -> DEFINED // defineViewID
|
||||||
|
*/
|
||||||
|
|
||||||
|
enum BuildingStage
|
||||||
|
{
|
||||||
|
UNDEFINED,
|
||||||
|
DEFINE_SOURCE_WITH_HASHES,
|
||||||
|
DEFINE_SOURCE_USER_TOKEN,
|
||||||
|
DEFINE_VIEW,
|
||||||
|
DEFINED,
|
||||||
|
};
|
||||||
|
|
||||||
|
BuildingStage stage = UNDEFINED;
|
||||||
|
std::vector<String> parts;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||||
|
/// use that class only with debug builds in CI for introspection
|
||||||
|
class CheckTokenTransform : public ISimpleTransform
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CheckTokenTransform(String debug_, const Block & header_)
|
||||||
|
: ISimpleTransform(header_, header_, true)
|
||||||
|
, debug(std::move(debug_))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "DeduplicationToken::CheckTokenTransform"; }
|
||||||
|
|
||||||
|
void transform(Chunk & chunk) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
String debug;
|
||||||
|
LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform");
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
class AddTokenInfoTransform : public ISimpleTransform
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit AddTokenInfoTransform(const Block & header_)
|
||||||
|
: ISimpleTransform(header_, header_, true)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; }
|
||||||
|
|
||||||
|
void transform(Chunk & chunk) override
|
||||||
|
{
|
||||||
|
chunk.getChunkInfos().add(std::make_shared<TokenInfo>());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class DefineSourceWithChunkHashTransform : public ISimpleTransform
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit DefineSourceWithChunkHashTransform(const Block & header_)
|
||||||
|
: ISimpleTransform(header_, header_, true)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; }
|
||||||
|
|
||||||
|
// Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts.
|
||||||
|
// But if there is some table with different engine, we still need to define the source of the data in deduplication token
|
||||||
|
// We use that transform to define the source as a hash of entire block in deduplication token
|
||||||
|
void transform(Chunk & chunk) override;
|
||||||
|
|
||||||
|
static String getChunkHash(const Chunk & chunk);
|
||||||
|
};
|
||||||
|
|
||||||
|
class ResetTokenTransform : public ISimpleTransform
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ResetTokenTransform(const Block & header_)
|
||||||
|
: ISimpleTransform(header_, header_, true)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "DeduplicationToken::ResetTokenTransform"; }
|
||||||
|
|
||||||
|
void transform(Chunk & chunk) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class SetUserTokenTransform : public ISimpleTransform
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SetUserTokenTransform(String user_token_, const Block & header_)
|
||||||
|
: ISimpleTransform(header_, header_, true)
|
||||||
|
, user_token(std::move(user_token_))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; }
|
||||||
|
|
||||||
|
void transform(Chunk & chunk) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
String user_token;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class SetSourceBlockNumberTransform : public ISimpleTransform
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit SetSourceBlockNumberTransform(const Block & header_)
|
||||||
|
: ISimpleTransform(header_, header_, true)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; }
|
||||||
|
|
||||||
|
void transform(Chunk & chunk) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t block_number = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class SetViewIDTransform : public ISimpleTransform
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SetViewIDTransform(String view_id_, const Block & header_)
|
||||||
|
: ISimpleTransform(header_, header_, true)
|
||||||
|
, view_id(std::move(view_id_))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "DeduplicationToken::SetViewIDTransform"; }
|
||||||
|
|
||||||
|
void transform(Chunk & chunk) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
String view_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class SetViewBlockNumberTransform : public ISimpleTransform
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit SetViewBlockNumberTransform(const Block & header_)
|
||||||
|
: ISimpleTransform(header_, header_, true)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; }
|
||||||
|
|
||||||
|
void transform(Chunk & chunk) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t block_number = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,7 @@
|
|||||||
#include <Processors/Transforms/ExpressionTransform.h>
|
#include <Processors/Transforms/ExpressionTransform.h>
|
||||||
#include <Interpreters/ExpressionActions.h>
|
#include <Interpreters/ExpressionActions.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -365,10 +365,9 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare()
|
|||||||
return Status::Finished;
|
return Status::Finished;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!data.chunk.hasChunkInfo())
|
task = data.chunk.getChunkInfos().get<DelayedBlocksTask>();
|
||||||
|
if (!task)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info");
|
||||||
|
|
||||||
task = std::dynamic_pointer_cast<const DelayedBlocksTask>(data.chunk.getChunkInfo());
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -479,7 +478,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare()
|
|||||||
if (output.isFinished())
|
if (output.isFinished())
|
||||||
continue;
|
continue;
|
||||||
Chunk chunk;
|
Chunk chunk;
|
||||||
chunk.setChunkInfo(std::make_shared<DelayedBlocksTask>());
|
chunk.getChunkInfos().add(std::make_shared<DelayedBlocksTask>());
|
||||||
output.push(std::move(chunk));
|
output.push(std::move(chunk));
|
||||||
output.finish();
|
output.finish();
|
||||||
}
|
}
|
||||||
@ -496,7 +495,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare()
|
|||||||
{
|
{
|
||||||
Chunk chunk;
|
Chunk chunk;
|
||||||
auto task = std::make_shared<DelayedBlocksTask>(delayed_blocks, left_delayed_stream_finished_counter);
|
auto task = std::make_shared<DelayedBlocksTask>(delayed_blocks, left_delayed_stream_finished_counter);
|
||||||
chunk.setChunkInfo(task);
|
chunk.getChunkInfos().add(std::move(task));
|
||||||
output.push(std::move(chunk));
|
output.push(std::move(chunk));
|
||||||
}
|
}
|
||||||
delayed_blocks = nullptr;
|
delayed_blocks = nullptr;
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include <Processors/IProcessor.h>
|
#include <Processors/IProcessor.h>
|
||||||
|
#include <Processors/Chunk.h>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -111,11 +112,12 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class DelayedBlocksTask : public ChunkInfo
|
class DelayedBlocksTask : public ChunkInfoCloneable<DelayedBlocksTask>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
DelayedBlocksTask() = default;
|
DelayedBlocksTask() = default;
|
||||||
|
DelayedBlocksTask(const DelayedBlocksTask & other) = default;
|
||||||
explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_)
|
explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_)
|
||||||
: delayed_blocks(std::move(delayed_blocks_))
|
: delayed_blocks(std::move(delayed_blocks_))
|
||||||
, left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_)
|
, left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <Processors/Transforms/MaterializingTransform.h>
|
#include <Processors/Transforms/MaterializingTransform.h>
|
||||||
#include <Columns/ColumnSparse.h>
|
#include <Columns/ColumnSparse.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -150,11 +150,7 @@ private:
|
|||||||
if (!chunk.hasRows())
|
if (!chunk.hasRows())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const auto & info = chunk.getChunkInfo();
|
const auto & agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
|
||||||
if (!info)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform.");
|
|
||||||
|
|
||||||
const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
|
|
||||||
if (!agg_info)
|
if (!agg_info)
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform.");
|
ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform.");
|
||||||
|
@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_
|
|||||||
auto info = std::make_shared<ChunksToMerge>();
|
auto info = std::make_shared<ChunksToMerge>();
|
||||||
info->bucket_num = bucket;
|
info->bucket_num = bucket;
|
||||||
info->is_overflows = is_overflows;
|
info->is_overflows = is_overflows;
|
||||||
info->chunks = std::make_unique<Chunks>(std::move(chunks));
|
info->chunks = std::make_shared<Chunks>(std::move(chunks));
|
||||||
|
|
||||||
Chunk chunk;
|
Chunk chunk;
|
||||||
chunk.setChunkInfo(std::move(info));
|
chunk.getChunkInfos().add(std::move(info));
|
||||||
output.push(std::move(chunk));
|
output.push(std::move(chunk));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -255,11 +255,10 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input)
|
|||||||
if (!chunk.hasRows())
|
if (!chunk.hasRows())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const auto & info = chunk.getChunkInfo();
|
if (chunk.getChunkInfos().empty())
|
||||||
if (!info)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform.");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform.");
|
||||||
|
|
||||||
if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get()))
|
if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
||||||
{
|
{
|
||||||
Int32 bucket = agg_info->bucket_num;
|
Int32 bucket = agg_info->bucket_num;
|
||||||
bool is_overflows = agg_info->is_overflows;
|
bool is_overflows = agg_info->is_overflows;
|
||||||
@ -275,7 +274,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input)
|
|||||||
last_bucket_number[input] = bucket;
|
last_bucket_number[input] = bucket;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (typeid_cast<const ChunkInfoWithAllocatedBytes *>(info.get()))
|
else if (chunk.getChunkInfos().get<ChunkInfoWithAllocatedBytes>())
|
||||||
{
|
{
|
||||||
single_level_chunks.emplace_back(std::move(chunk));
|
single_level_chunks.emplace_back(std::move(chunk));
|
||||||
}
|
}
|
||||||
@ -304,7 +303,11 @@ void GroupingAggregatedTransform::work()
|
|||||||
Int32 bucket = cur_block.info.bucket_num;
|
Int32 bucket = cur_block.info.bucket_num;
|
||||||
auto chunk_info = std::make_shared<AggregatedChunkInfo>();
|
auto chunk_info = std::make_shared<AggregatedChunkInfo>();
|
||||||
chunk_info->bucket_num = bucket;
|
chunk_info->bucket_num = bucket;
|
||||||
chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info)));
|
|
||||||
|
auto chunk = Chunk(cur_block.getColumns(), cur_block.rows());
|
||||||
|
chunk.getChunkInfos().add(std::move(chunk_info));
|
||||||
|
|
||||||
|
chunks_map[bucket].emplace_back(std::move(chunk));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -319,9 +322,7 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform(
|
|||||||
|
|
||||||
void MergingAggregatedBucketTransform::transform(Chunk & chunk)
|
void MergingAggregatedBucketTransform::transform(Chunk & chunk)
|
||||||
{
|
{
|
||||||
const auto & info = chunk.getChunkInfo();
|
auto chunks_to_merge = chunk.getChunkInfos().get<ChunksToMerge>();
|
||||||
const auto * chunks_to_merge = typeid_cast<const ChunksToMerge *>(info.get());
|
|
||||||
|
|
||||||
if (!chunks_to_merge)
|
if (!chunks_to_merge)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge.");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge.");
|
||||||
|
|
||||||
@ -330,11 +331,10 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk)
|
|||||||
BlocksList blocks_list;
|
BlocksList blocks_list;
|
||||||
for (auto & cur_chunk : *chunks_to_merge->chunks)
|
for (auto & cur_chunk : *chunks_to_merge->chunks)
|
||||||
{
|
{
|
||||||
const auto & cur_info = cur_chunk.getChunkInfo();
|
if (cur_chunk.getChunkInfos().empty())
|
||||||
if (!cur_info)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform.");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform.");
|
||||||
|
|
||||||
if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(cur_info.get()))
|
if (auto agg_info = cur_chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
||||||
{
|
{
|
||||||
Block block = header.cloneWithColumns(cur_chunk.detachColumns());
|
Block block = header.cloneWithColumns(cur_chunk.detachColumns());
|
||||||
block.info.is_overflows = agg_info->is_overflows;
|
block.info.is_overflows = agg_info->is_overflows;
|
||||||
@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk)
|
|||||||
|
|
||||||
blocks_list.emplace_back(std::move(block));
|
blocks_list.emplace_back(std::move(block));
|
||||||
}
|
}
|
||||||
else if (typeid_cast<const ChunkInfoWithAllocatedBytes *>(cur_info.get()))
|
else if (cur_chunk.getChunkInfos().get<ChunkInfoWithAllocatedBytes>())
|
||||||
{
|
{
|
||||||
Block block = header.cloneWithColumns(cur_chunk.detachColumns());
|
Block block = header.cloneWithColumns(cur_chunk.detachColumns());
|
||||||
block.info.is_overflows = false;
|
block.info.is_overflows = false;
|
||||||
@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk)
|
|||||||
res_info->is_overflows = chunks_to_merge->is_overflows;
|
res_info->is_overflows = chunks_to_merge->is_overflows;
|
||||||
res_info->bucket_num = chunks_to_merge->bucket_num;
|
res_info->bucket_num = chunks_to_merge->bucket_num;
|
||||||
res_info->chunk_num = chunks_to_merge->chunk_num;
|
res_info->chunk_num = chunks_to_merge->chunk_num;
|
||||||
chunk.setChunkInfo(std::move(res_info));
|
chunk.getChunkInfos().add(std::move(res_info));
|
||||||
|
|
||||||
auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled);
|
auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled);
|
||||||
|
|
||||||
@ -405,11 +405,7 @@ bool SortingAggregatedTransform::tryPushChunk()
|
|||||||
|
|
||||||
void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input)
|
void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input)
|
||||||
{
|
{
|
||||||
const auto & info = chunk.getChunkInfo();
|
auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
|
||||||
if (!info)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform.");
|
|
||||||
|
|
||||||
const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
|
|
||||||
if (!agg_info)
|
if (!agg_info)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||||
"Chunk should have AggregatedChunkInfo in SortingAggregatedTransform.");
|
"Chunk should have AggregatedChunkInfo in SortingAggregatedTransform.");
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include <Core/SortDescription.h>
|
#include <Core/SortDescription.h>
|
||||||
#include <Common/HashTable/HashSet.h>
|
#include <Common/HashTable/HashSet.h>
|
||||||
#include <Interpreters/Aggregator.h>
|
#include <Interpreters/Aggregator.h>
|
||||||
|
#include <Processors/Chunk.h>
|
||||||
#include <Processors/IProcessor.h>
|
#include <Processors/IProcessor.h>
|
||||||
#include <Processors/ISimpleTransform.h>
|
#include <Processors/ISimpleTransform.h>
|
||||||
#include <Processors/ResizeProcessor.h>
|
#include <Processors/ResizeProcessor.h>
|
||||||
@ -142,9 +143,9 @@ private:
|
|||||||
void addChunk(Chunk chunk, size_t from_input);
|
void addChunk(Chunk chunk, size_t from_input);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ChunksToMerge : public ChunkInfo
|
struct ChunksToMerge : public ChunkInfoCloneable<ChunksToMerge>
|
||||||
{
|
{
|
||||||
std::unique_ptr<Chunks> chunks;
|
std::shared_ptr<Chunks> chunks;
|
||||||
Int32 bucket_num = -1;
|
Int32 bucket_num = -1;
|
||||||
bool is_overflows = false;
|
bool is_overflows = false;
|
||||||
UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order
|
UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order
|
||||||
|
@ -32,11 +32,10 @@ void MergingAggregatedTransform::consume(Chunk chunk)
|
|||||||
total_input_rows += input_rows;
|
total_input_rows += input_rows;
|
||||||
++total_input_blocks;
|
++total_input_blocks;
|
||||||
|
|
||||||
const auto & info = chunk.getChunkInfo();
|
if (chunk.getChunkInfos().empty())
|
||||||
if (!info)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform.");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform.");
|
||||||
|
|
||||||
if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get()))
|
if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
||||||
{
|
{
|
||||||
/** If the remote servers used a two-level aggregation method,
|
/** If the remote servers used a two-level aggregation method,
|
||||||
* then blocks will contain information about the number of the bucket.
|
* then blocks will contain information about the number of the bucket.
|
||||||
@ -49,7 +48,7 @@ void MergingAggregatedTransform::consume(Chunk chunk)
|
|||||||
|
|
||||||
bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block));
|
bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block));
|
||||||
}
|
}
|
||||||
else if (typeid_cast<const ChunkInfoWithAllocatedBytes *>(info.get()))
|
else if (chunk.getChunkInfos().get<ChunkInfoWithAllocatedBytes>())
|
||||||
{
|
{
|
||||||
auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns());
|
auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns());
|
||||||
block.info.is_overflows = false;
|
block.info.is_overflows = false;
|
||||||
@ -89,7 +88,8 @@ Chunk MergingAggregatedTransform::generate()
|
|||||||
|
|
||||||
UInt64 num_rows = block.rows();
|
UInt64 num_rows = block.rows();
|
||||||
Chunk chunk(block.getColumns(), num_rows);
|
Chunk chunk(block.getColumns(), num_rows);
|
||||||
chunk.setChunkInfo(std::move(info));
|
|
||||||
|
chunk.getChunkInfos().add(std::move(info));
|
||||||
|
|
||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
|
@ -10,20 +10,20 @@ namespace ErrorCodes
|
|||||||
}
|
}
|
||||||
|
|
||||||
PlanSquashingTransform::PlanSquashingTransform(
|
PlanSquashingTransform::PlanSquashingTransform(
|
||||||
const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes)
|
Block header_, size_t min_block_size_rows, size_t min_block_size_bytes)
|
||||||
: IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes)
|
: IInflatingTransform(header_, header_)
|
||||||
|
, squashing(header_, min_block_size_rows, min_block_size_bytes)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void PlanSquashingTransform::consume(Chunk chunk)
|
void PlanSquashingTransform::consume(Chunk chunk)
|
||||||
{
|
{
|
||||||
if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo())
|
squashed_chunk = squashing.add(std::move(chunk));
|
||||||
squashed_chunk.swap(current_chunk);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk PlanSquashingTransform::generate()
|
Chunk PlanSquashingTransform::generate()
|
||||||
{
|
{
|
||||||
if (!squashed_chunk.hasChunkInfo())
|
if (!squashed_chunk)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform");
|
||||||
|
|
||||||
Chunk result_chunk;
|
Chunk result_chunk;
|
||||||
@ -33,12 +33,11 @@ Chunk PlanSquashingTransform::generate()
|
|||||||
|
|
||||||
bool PlanSquashingTransform::canGenerate()
|
bool PlanSquashingTransform::canGenerate()
|
||||||
{
|
{
|
||||||
return squashed_chunk.hasChunkInfo();
|
return bool(squashed_chunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk PlanSquashingTransform::getRemaining()
|
Chunk PlanSquashingTransform::getRemaining()
|
||||||
{
|
{
|
||||||
Chunk current_chunk = squashing.flush();
|
return squashing.flush();
|
||||||
return current_chunk;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
PlanSquashingTransform(
|
PlanSquashingTransform(
|
||||||
const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes);
|
Block header_, size_t min_block_size_rows, size_t min_block_size_bytes);
|
||||||
|
|
||||||
String getName() const override { return "PlanSquashingTransform"; }
|
String getName() const override { return "PlanSquashingTransform"; }
|
||||||
|
|
||||||
@ -23,7 +23,6 @@ protected:
|
|||||||
private:
|
private:
|
||||||
Squashing squashing;
|
Squashing squashing;
|
||||||
Chunk squashed_chunk;
|
Chunk squashed_chunk;
|
||||||
Chunk finish_chunk;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ public:
|
|||||||
void transform(Chunk & chunk) override
|
void transform(Chunk & chunk) override
|
||||||
{
|
{
|
||||||
size_t num_rows = chunk.getNumRows();
|
size_t num_rows = chunk.getNumRows();
|
||||||
const auto * select_final_indices_info = typeid_cast<const ChunkSelectFinalIndices *>(chunk.getChunkInfo().get());
|
auto select_final_indices_info = chunk.getChunkInfos().extract<ChunkSelectFinalIndices>();
|
||||||
|
|
||||||
if (!select_final_indices_info || !select_final_indices_info->select_final_indices)
|
if (!select_final_indices_info || !select_final_indices_info->select_final_indices)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column");
|
||||||
@ -41,7 +41,6 @@ public:
|
|||||||
|
|
||||||
chunk.setColumns(std::move(columns), index_column->size());
|
chunk.setColumns(std::move(columns), index_column->size());
|
||||||
}
|
}
|
||||||
chunk.setChunkInfo(nullptr);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,13 +1,14 @@
|
|||||||
|
#include <utility>
|
||||||
#include <Processors/Transforms/SquashingTransform.h>
|
#include <Processors/Transforms/SquashingTransform.h>
|
||||||
#include <Interpreters/Squashing.h>
|
#include <Interpreters/Squashing.h>
|
||||||
|
#include "Processors/Chunk.h"
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SquashingTransform::SquashingTransform(
|
SquashingTransform::SquashingTransform(
|
||||||
@ -19,9 +20,7 @@ SquashingTransform::SquashingTransform(
|
|||||||
|
|
||||||
void SquashingTransform::onConsume(Chunk chunk)
|
void SquashingTransform::onConsume(Chunk chunk)
|
||||||
{
|
{
|
||||||
Chunk planned_chunk = squashing.add(std::move(chunk));
|
cur_chunk = Squashing::squash(squashing.add(std::move(chunk)));
|
||||||
if (planned_chunk.hasChunkInfo())
|
|
||||||
cur_chunk = DB::Squashing::squash(std::move(planned_chunk));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SquashingTransform::GenerateResult SquashingTransform::onGenerate()
|
SquashingTransform::GenerateResult SquashingTransform::onGenerate()
|
||||||
@ -34,10 +33,7 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate()
|
|||||||
|
|
||||||
void SquashingTransform::onFinish()
|
void SquashingTransform::onFinish()
|
||||||
{
|
{
|
||||||
Chunk chunk = squashing.flush();
|
finish_chunk = Squashing::squash(squashing.flush());
|
||||||
if (chunk.hasChunkInfo())
|
|
||||||
chunk = DB::Squashing::squash(std::move(chunk));
|
|
||||||
finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SquashingTransform::work()
|
void SquashingTransform::work()
|
||||||
@ -50,6 +46,7 @@ void SquashingTransform::work()
|
|||||||
}
|
}
|
||||||
|
|
||||||
ExceptionKeepingTransform::work();
|
ExceptionKeepingTransform::work();
|
||||||
|
|
||||||
if (finish_chunk)
|
if (finish_chunk)
|
||||||
{
|
{
|
||||||
data.chunk = std::move(finish_chunk);
|
data.chunk = std::move(finish_chunk);
|
||||||
@ -59,14 +56,14 @@ void SquashingTransform::work()
|
|||||||
|
|
||||||
SimpleSquashingChunksTransform::SimpleSquashingChunksTransform(
|
SimpleSquashingChunksTransform::SimpleSquashingChunksTransform(
|
||||||
const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes)
|
const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes)
|
||||||
: IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes)
|
: IInflatingTransform(header, header)
|
||||||
|
, squashing(header, min_block_size_rows, min_block_size_bytes)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void SimpleSquashingChunksTransform::consume(Chunk chunk)
|
void SimpleSquashingChunksTransform::consume(Chunk chunk)
|
||||||
{
|
{
|
||||||
Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()));
|
squashed_chunk = Squashing::squash(squashing.add(std::move(chunk)));
|
||||||
squashed_chunk.setColumns(current_block.getColumns(), current_block.rows());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk SimpleSquashingChunksTransform::generate()
|
Chunk SimpleSquashingChunksTransform::generate()
|
||||||
@ -74,7 +71,9 @@ Chunk SimpleSquashingChunksTransform::generate()
|
|||||||
if (squashed_chunk.empty())
|
if (squashed_chunk.empty())
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform");
|
||||||
|
|
||||||
return std::move(squashed_chunk);
|
Chunk result;
|
||||||
|
result.swap(squashed_chunk);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SimpleSquashingChunksTransform::canGenerate()
|
bool SimpleSquashingChunksTransform::canGenerate()
|
||||||
@ -84,143 +83,7 @@ bool SimpleSquashingChunksTransform::canGenerate()
|
|||||||
|
|
||||||
Chunk SimpleSquashingChunksTransform::getRemaining()
|
Chunk SimpleSquashingChunksTransform::getRemaining()
|
||||||
{
|
{
|
||||||
Block current_block = squashing.add({});
|
return Squashing::squash(squashing.flush());
|
||||||
squashed_chunk.setColumns(current_block.getColumns(), current_block.rows());
|
|
||||||
return std::move(squashed_chunk);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SquashingLegacy::SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_)
|
|
||||||
: min_block_size_rows(min_block_size_rows_)
|
|
||||||
, min_block_size_bytes(min_block_size_bytes_)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
Block SquashingLegacy::add(Block && input_block)
|
|
||||||
{
|
|
||||||
return addImpl<Block &&>(std::move(input_block));
|
|
||||||
}
|
|
||||||
|
|
||||||
Block SquashingLegacy::add(const Block & input_block)
|
|
||||||
{
|
|
||||||
return addImpl<const Block &>(input_block);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* To minimize copying, accept two types of argument: const reference for output
|
|
||||||
* stream, and rvalue reference for input stream, and decide whether to copy
|
|
||||||
* inside this function. This allows us not to copy Block unless we absolutely
|
|
||||||
* have to.
|
|
||||||
*/
|
|
||||||
template <typename ReferenceType>
|
|
||||||
Block SquashingLegacy::addImpl(ReferenceType input_block)
|
|
||||||
{
|
|
||||||
/// End of input stream.
|
|
||||||
if (!input_block)
|
|
||||||
{
|
|
||||||
Block to_return;
|
|
||||||
std::swap(to_return, accumulated_block);
|
|
||||||
return to_return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Just read block is already enough.
|
|
||||||
if (isEnoughSize(input_block))
|
|
||||||
{
|
|
||||||
/// If no accumulated data, return just read block.
|
|
||||||
if (!accumulated_block)
|
|
||||||
{
|
|
||||||
return std::move(input_block);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return accumulated data (maybe it has small size) and place new block to accumulated data.
|
|
||||||
Block to_return = std::move(input_block);
|
|
||||||
std::swap(to_return, accumulated_block);
|
|
||||||
return to_return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Accumulated block is already enough.
|
|
||||||
if (isEnoughSize(accumulated_block))
|
|
||||||
{
|
|
||||||
/// Return accumulated data and place new block to accumulated data.
|
|
||||||
Block to_return = std::move(input_block);
|
|
||||||
std::swap(to_return, accumulated_block);
|
|
||||||
return to_return;
|
|
||||||
}
|
|
||||||
|
|
||||||
append<ReferenceType>(std::move(input_block));
|
|
||||||
if (isEnoughSize(accumulated_block))
|
|
||||||
{
|
|
||||||
Block to_return;
|
|
||||||
std::swap(to_return, accumulated_block);
|
|
||||||
return to_return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Squashed block is not ready.
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename ReferenceType>
|
|
||||||
void SquashingLegacy::append(ReferenceType input_block)
|
|
||||||
{
|
|
||||||
if (!accumulated_block)
|
|
||||||
{
|
|
||||||
accumulated_block = std::move(input_block);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(blocksHaveEqualStructure(input_block, accumulated_block));
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i)
|
|
||||||
{
|
|
||||||
const auto source_column = input_block.getByPosition(i).column;
|
|
||||||
|
|
||||||
auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column));
|
|
||||||
mutable_column->insertRangeFrom(*source_column, 0, source_column->size());
|
|
||||||
accumulated_block.getByPosition(i).column = std::move(mutable_column);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
/// add() may be called again even after a previous add() threw an exception.
|
|
||||||
/// Keep accumulated_block in a valid state.
|
|
||||||
/// Seems ok to discard accumulated data because we're throwing an exception, which the caller will
|
|
||||||
/// hopefully interpret to mean "this block and all *previous* blocks are potentially lost".
|
|
||||||
accumulated_block.clear();
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool SquashingLegacy::isEnoughSize(const Block & block)
|
|
||||||
{
|
|
||||||
size_t rows = 0;
|
|
||||||
size_t bytes = 0;
|
|
||||||
|
|
||||||
for (const auto & [column, type, name] : block)
|
|
||||||
{
|
|
||||||
if (!column)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid column in block.");
|
|
||||||
|
|
||||||
if (!rows)
|
|
||||||
rows = column->size();
|
|
||||||
else if (rows != column->size())
|
|
||||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match");
|
|
||||||
|
|
||||||
bytes += column->byteSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
return isEnoughSize(rows, bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool SquashingLegacy::isEnoughSize(size_t rows, size_t bytes) const
|
|
||||||
{
|
|
||||||
return (!min_block_size_rows && !min_block_size_bytes)
|
|
||||||
|| (min_block_size_rows && rows >= min_block_size_rows)
|
|
||||||
|| (min_block_size_bytes && bytes >= min_block_size_bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -30,35 +30,6 @@ private:
|
|||||||
Chunk finish_chunk;
|
Chunk finish_chunk;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class SquashingLegacy
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
/// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored.
|
|
||||||
SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_);
|
|
||||||
|
|
||||||
/** Add next block and possibly returns squashed block.
|
|
||||||
* At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true.
|
|
||||||
*/
|
|
||||||
Block add(Block && block);
|
|
||||||
Block add(const Block & block);
|
|
||||||
|
|
||||||
private:
|
|
||||||
size_t min_block_size_rows;
|
|
||||||
size_t min_block_size_bytes;
|
|
||||||
|
|
||||||
Block accumulated_block;
|
|
||||||
|
|
||||||
template <typename ReferenceType>
|
|
||||||
Block addImpl(ReferenceType block);
|
|
||||||
|
|
||||||
template <typename ReferenceType>
|
|
||||||
void append(ReferenceType block);
|
|
||||||
|
|
||||||
bool isEnoughSize(const Block & block);
|
|
||||||
bool isEnoughSize(size_t rows, size_t bytes) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
class SimpleSquashingChunksTransform : public IInflatingTransform
|
class SimpleSquashingChunksTransform : public IInflatingTransform
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -73,7 +44,7 @@ protected:
|
|||||||
Chunk getRemaining() override;
|
Chunk getRemaining() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
SquashingLegacy squashing;
|
Squashing squashing;
|
||||||
Chunk squashed_chunk;
|
Chunk squashed_chunk;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -151,11 +151,7 @@ void TotalsHavingTransform::transform(Chunk & chunk)
|
|||||||
/// Block with values not included in `max_rows_to_group_by`. We'll postpone it.
|
/// Block with values not included in `max_rows_to_group_by`. We'll postpone it.
|
||||||
if (overflow_row)
|
if (overflow_row)
|
||||||
{
|
{
|
||||||
const auto & info = chunk.getChunkInfo();
|
const auto & agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
|
||||||
if (!info)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform.");
|
|
||||||
|
|
||||||
const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
|
|
||||||
if (!agg_info)
|
if (!agg_info)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform.");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform.");
|
||||||
|
|
||||||
|
@ -5,7 +5,9 @@
|
|||||||
#include <Interpreters/InterpreterSelectQuery.h>
|
#include <Interpreters/InterpreterSelectQuery.h>
|
||||||
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
||||||
#include <Parsers/ASTInsertQuery.h>
|
#include <Parsers/ASTInsertQuery.h>
|
||||||
|
#include <Processors/Chunk.h>
|
||||||
#include <Processors/Transforms/CountingTransform.h>
|
#include <Processors/Transforms/CountingTransform.h>
|
||||||
|
#include <Processors/Transforms/DeduplicationTokenTransforms.h>
|
||||||
#include <Processors/Transforms/PlanSquashingTransform.h>
|
#include <Processors/Transforms/PlanSquashingTransform.h>
|
||||||
#include <Processors/Transforms/SquashingTransform.h>
|
#include <Processors/Transforms/SquashingTransform.h>
|
||||||
#include <Processors/Transforms/ExpressionTransform.h>
|
#include <Processors/Transforms/ExpressionTransform.h>
|
||||||
@ -16,6 +18,7 @@
|
|||||||
#include <Storages/StorageMaterializedView.h>
|
#include <Storages/StorageMaterializedView.h>
|
||||||
#include <Storages/StorageValues.h>
|
#include <Storages/StorageValues.h>
|
||||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||||
|
#include <Common/Logger.h>
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
#include <Common/CurrentThread.h>
|
#include <Common/CurrentThread.h>
|
||||||
#include <Common/MemoryTracker.h>
|
#include <Common/MemoryTracker.h>
|
||||||
@ -24,10 +27,13 @@
|
|||||||
#include <Common/ThreadStatus.h>
|
#include <Common/ThreadStatus.h>
|
||||||
#include <Common/checkStackSize.h>
|
#include <Common/checkStackSize.h>
|
||||||
#include <Common/logger_useful.h>
|
#include <Common/logger_useful.h>
|
||||||
|
#include <Core/Field.h>
|
||||||
#include <Core/Settings.h>
|
#include <Core/Settings.h>
|
||||||
|
#include <base/defines.h>
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
|
||||||
namespace ProfileEvents
|
namespace ProfileEvents
|
||||||
@ -106,7 +112,7 @@ private:
|
|||||||
class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform
|
class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_);
|
ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_);
|
||||||
|
|
||||||
String getName() const override { return "ExecutingInnerQueryFromView"; }
|
String getName() const override { return "ExecutingInnerQueryFromView"; }
|
||||||
|
|
||||||
@ -117,6 +123,7 @@ protected:
|
|||||||
private:
|
private:
|
||||||
ViewsDataPtr views_data;
|
ViewsDataPtr views_data;
|
||||||
ViewRuntimeData & view;
|
ViewRuntimeData & view;
|
||||||
|
bool disable_deduplication_for_children;
|
||||||
|
|
||||||
struct State
|
struct State
|
||||||
{
|
{
|
||||||
@ -139,7 +146,7 @@ class PushingToLiveViewSink final : public SinkToStorage
|
|||||||
public:
|
public:
|
||||||
PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_);
|
PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_);
|
||||||
String getName() const override { return "PushingToLiveViewSink"; }
|
String getName() const override { return "PushingToLiveViewSink"; }
|
||||||
void consume(Chunk chunk) override;
|
void consume(Chunk & chunk) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
StorageLiveView & live_view;
|
StorageLiveView & live_view;
|
||||||
@ -153,7 +160,7 @@ class PushingToWindowViewSink final : public SinkToStorage
|
|||||||
public:
|
public:
|
||||||
PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_);
|
PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_);
|
||||||
String getName() const override { return "PushingToWindowViewSink"; }
|
String getName() const override { return "PushingToWindowViewSink"; }
|
||||||
void consume(Chunk chunk) override;
|
void consume(Chunk & chunk) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
StorageWindowView & window_view;
|
StorageWindowView & window_view;
|
||||||
@ -217,45 +224,10 @@ std::optional<Chain> generateViewChain(
|
|||||||
|
|
||||||
const auto & insert_settings = insert_context->getSettingsRef();
|
const auto & insert_settings = insert_context->getSettingsRef();
|
||||||
|
|
||||||
// Do not deduplicate insertions into MV if the main insertion is Ok
|
|
||||||
if (disable_deduplication_for_children)
|
if (disable_deduplication_for_children)
|
||||||
{
|
{
|
||||||
insert_context->setSetting("insert_deduplicate", Field{false});
|
insert_context->setSetting("insert_deduplicate", Field{false});
|
||||||
}
|
}
|
||||||
else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views &&
|
|
||||||
!insert_settings.insert_deduplication_token.value.empty())
|
|
||||||
{
|
|
||||||
/** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle
|
|
||||||
* deduplication in complex INSERT flows.
|
|
||||||
*
|
|
||||||
* Example:
|
|
||||||
*
|
|
||||||
* landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1
|
|
||||||
* | |
|
|
||||||
* └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘
|
|
||||||
*
|
|
||||||
* Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will
|
|
||||||
* be inserted into `ds_2_1`.
|
|
||||||
*
|
|
||||||
* We are forced to use view id instead of table id because there are some possible INSERT flows where no tables
|
|
||||||
* are involved.
|
|
||||||
*
|
|
||||||
* Example:
|
|
||||||
*
|
|
||||||
* landing -┬--> mv_1_1 --┬-> ds_1_1
|
|
||||||
* | |
|
|
||||||
* └--> mv_1_2 --┘
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
auto insert_deduplication_token = insert_settings.insert_deduplication_token.value;
|
|
||||||
|
|
||||||
if (view_id.hasUUID())
|
|
||||||
insert_deduplication_token += "_" + toString(view_id.uuid);
|
|
||||||
else
|
|
||||||
insert_deduplication_token += "_" + view_id.getFullNameNotQuoted();
|
|
||||||
|
|
||||||
insert_context->setSetting("insert_deduplication_token", insert_deduplication_token);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Processing of blocks for MVs is done block by block, and there will
|
// Processing of blocks for MVs is done block by block, and there will
|
||||||
// be no parallel reading after (plus it is not a costless operation)
|
// be no parallel reading after (plus it is not a costless operation)
|
||||||
@ -362,7 +334,13 @@ std::optional<Chain> generateViewChain(
|
|||||||
insert_columns.emplace_back(column.name);
|
insert_columns.emplace_back(column.name);
|
||||||
}
|
}
|
||||||
|
|
||||||
InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false);
|
InterpreterInsertQuery interpreter(
|
||||||
|
nullptr,
|
||||||
|
insert_context,
|
||||||
|
/* allow_materialized */ false,
|
||||||
|
/* no_squash */ false,
|
||||||
|
/* no_destination */ false,
|
||||||
|
/* async_isnert */ false);
|
||||||
|
|
||||||
/// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false`
|
/// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false`
|
||||||
bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type;
|
bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type;
|
||||||
@ -379,6 +357,10 @@ std::optional<Chain> generateViewChain(
|
|||||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
|
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||||
|
out.addSource(std::make_shared<DeduplicationToken::CheckTokenTransform>("Before squashing", out.getInputHeader()));
|
||||||
|
#endif
|
||||||
|
|
||||||
auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), current_thread, insert_context->getQuota());
|
auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), current_thread, insert_context->getQuota());
|
||||||
counting->setProcessListElement(insert_context->getProcessListElement());
|
counting->setProcessListElement(insert_context->getProcessListElement());
|
||||||
counting->setProgressCallback(insert_context->getProgressCallback());
|
counting->setProgressCallback(insert_context->getProgressCallback());
|
||||||
@ -421,11 +403,19 @@ std::optional<Chain> generateViewChain(
|
|||||||
|
|
||||||
if (type == QueryViewsLogElement::ViewType::MATERIALIZED)
|
if (type == QueryViewsLogElement::ViewType::MATERIALIZED)
|
||||||
{
|
{
|
||||||
|
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||||
|
out.addSource(std::make_shared<DeduplicationToken::CheckTokenTransform>("Right after Inner query", out.getInputHeader()));
|
||||||
|
#endif
|
||||||
|
|
||||||
auto executing_inner_query = std::make_shared<ExecutingInnerQueryFromViewTransform>(
|
auto executing_inner_query = std::make_shared<ExecutingInnerQueryFromViewTransform>(
|
||||||
storage_header, views_data->views.back(), views_data);
|
storage_header, views_data->views.back(), views_data, disable_deduplication_for_children);
|
||||||
executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms);
|
executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms);
|
||||||
|
|
||||||
out.addSource(std::move(executing_inner_query));
|
out.addSource(std::move(executing_inner_query));
|
||||||
|
|
||||||
|
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||||
|
out.addSource(std::make_shared<DeduplicationToken::CheckTokenTransform>("Right before Inner query", out.getInputHeader()));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
@ -466,11 +456,7 @@ Chain buildPushingToViewsChain(
|
|||||||
*/
|
*/
|
||||||
result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout));
|
result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout));
|
||||||
|
|
||||||
/// If the "root" table deduplicates blocks, there are no need to make deduplication for children
|
bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views;
|
||||||
/// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks
|
|
||||||
bool disable_deduplication_for_children = false;
|
|
||||||
if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views)
|
|
||||||
disable_deduplication_for_children = !no_destination && storage->supportsDeduplication();
|
|
||||||
|
|
||||||
auto table_id = storage->getStorageID();
|
auto table_id = storage->getStorageID();
|
||||||
auto views = DatabaseCatalog::instance().getDependentViews(table_id);
|
auto views = DatabaseCatalog::instance().getDependentViews(table_id);
|
||||||
@ -561,12 +547,25 @@ Chain buildPushingToViewsChain(
|
|||||||
auto sink = std::make_shared<PushingToLiveViewSink>(live_view_header, *live_view, storage, context);
|
auto sink = std::make_shared<PushingToLiveViewSink>(live_view_header, *live_view, storage, context);
|
||||||
sink->setRuntimeData(thread_status, elapsed_counter_ms);
|
sink->setRuntimeData(thread_status, elapsed_counter_ms);
|
||||||
result_chain.addSource(std::move(sink));
|
result_chain.addSource(std::move(sink));
|
||||||
|
|
||||||
|
result_chain.addSource(std::make_shared<DeduplicationToken::DefineSourceWithChunkHashTransform>(result_chain.getInputHeader()));
|
||||||
}
|
}
|
||||||
else if (auto * window_view = dynamic_cast<StorageWindowView *>(storage.get()))
|
else if (auto * window_view = dynamic_cast<StorageWindowView *>(storage.get()))
|
||||||
{
|
{
|
||||||
auto sink = std::make_shared<PushingToWindowViewSink>(window_view->getInputHeader(), *window_view, storage, context);
|
auto sink = std::make_shared<PushingToWindowViewSink>(window_view->getInputHeader(), *window_view, storage, context);
|
||||||
sink->setRuntimeData(thread_status, elapsed_counter_ms);
|
sink->setRuntimeData(thread_status, elapsed_counter_ms);
|
||||||
result_chain.addSource(std::move(sink));
|
result_chain.addSource(std::move(sink));
|
||||||
|
|
||||||
|
result_chain.addSource(std::make_shared<DeduplicationToken::DefineSourceWithChunkHashTransform>(result_chain.getInputHeader()));
|
||||||
|
}
|
||||||
|
else if (dynamic_cast<StorageMaterializedView *>(storage.get()))
|
||||||
|
{
|
||||||
|
auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert);
|
||||||
|
metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName());
|
||||||
|
sink->setRuntimeData(thread_status, elapsed_counter_ms);
|
||||||
|
result_chain.addSource(std::move(sink));
|
||||||
|
|
||||||
|
result_chain.addSource(std::make_shared<DeduplicationToken::DefineSourceWithChunkHashTransform>(result_chain.getInputHeader()));
|
||||||
}
|
}
|
||||||
/// Do not push to destination table if the flag is set
|
/// Do not push to destination table if the flag is set
|
||||||
else if (!no_destination)
|
else if (!no_destination)
|
||||||
@ -574,8 +573,15 @@ Chain buildPushingToViewsChain(
|
|||||||
auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert);
|
auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert);
|
||||||
metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName());
|
metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName());
|
||||||
sink->setRuntimeData(thread_status, elapsed_counter_ms);
|
sink->setRuntimeData(thread_status, elapsed_counter_ms);
|
||||||
|
|
||||||
|
result_chain.addSource(std::make_shared<DeduplicationToken::DefineSourceWithChunkHashTransform>(sink->getHeader()));
|
||||||
|
|
||||||
result_chain.addSource(std::move(sink));
|
result_chain.addSource(std::move(sink));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result_chain.addSource(std::make_shared<DeduplicationToken::DefineSourceWithChunkHashTransform>(storage_header));
|
||||||
|
}
|
||||||
|
|
||||||
if (result_chain.empty())
|
if (result_chain.empty())
|
||||||
result_chain.addSink(std::make_shared<NullSinkToStorage>(storage_header));
|
result_chain.addSink(std::make_shared<NullSinkToStorage>(storage_header));
|
||||||
@ -591,7 +597,7 @@ Chain buildPushingToViewsChain(
|
|||||||
return result_chain;
|
return result_chain;
|
||||||
}
|
}
|
||||||
|
|
||||||
static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data)
|
static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children)
|
||||||
{
|
{
|
||||||
const auto & context = view.context;
|
const auto & context = view.context;
|
||||||
|
|
||||||
@ -638,6 +644,19 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat
|
|||||||
pipeline.getHeader(),
|
pipeline.getHeader(),
|
||||||
std::make_shared<ExpressionActions>(std::move(converting))));
|
std::make_shared<ExpressionActions>(std::move(converting))));
|
||||||
|
|
||||||
|
pipeline.addTransform(std::make_shared<RestoreChunkInfosTransform>(std::move(chunk_infos), pipeline.getHeader()));
|
||||||
|
|
||||||
|
if (!disable_deduplication_for_children)
|
||||||
|
{
|
||||||
|
String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted();
|
||||||
|
pipeline.addTransform(std::make_shared<DeduplicationToken::SetViewIDTransform>(std::move(materialize_view_id), pipeline.getHeader()));
|
||||||
|
pipeline.addTransform(std::make_shared<DeduplicationToken::SetViewBlockNumberTransform>(pipeline.getHeader()));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pipeline.addTransform(std::make_shared<DeduplicationToken::ResetTokenTransform>(pipeline.getHeader()));
|
||||||
|
}
|
||||||
|
|
||||||
return QueryPipelineBuilder::getPipeline(std::move(pipeline));
|
return QueryPipelineBuilder::getPipeline(std::move(pipeline));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -729,17 +748,19 @@ IProcessor::Status CopyingDataToViewsTransform::prepare()
|
|||||||
ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform(
|
ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform(
|
||||||
const Block & header,
|
const Block & header,
|
||||||
ViewRuntimeData & view_,
|
ViewRuntimeData & view_,
|
||||||
std::shared_ptr<ViewsData> views_data_)
|
std::shared_ptr<ViewsData> views_data_,
|
||||||
|
bool disable_deduplication_for_children_)
|
||||||
: ExceptionKeepingTransform(header, view_.sample_block)
|
: ExceptionKeepingTransform(header, view_.sample_block)
|
||||||
, views_data(std::move(views_data_))
|
, views_data(std::move(views_data_))
|
||||||
, view(view_)
|
, view(view_)
|
||||||
|
, disable_deduplication_for_children(disable_deduplication_for_children_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk)
|
void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk)
|
||||||
{
|
{
|
||||||
auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns());
|
auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
|
||||||
state.emplace(process(block, view, *views_data));
|
state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -771,10 +792,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void PushingToLiveViewSink::consume(Chunk chunk)
|
void PushingToLiveViewSink::consume(Chunk & chunk)
|
||||||
{
|
{
|
||||||
Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0);
|
Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0);
|
||||||
live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context);
|
live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.getColumns()), std::move(chunk.getChunkInfos()), context);
|
||||||
|
|
||||||
if (auto process = context->getProcessListElement())
|
if (auto process = context->getProcessListElement())
|
||||||
process->updateProgressIn(local_progress);
|
process->updateProgressIn(local_progress);
|
||||||
@ -794,11 +815,11 @@ PushingToWindowViewSink::PushingToWindowViewSink(
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void PushingToWindowViewSink::consume(Chunk chunk)
|
void PushingToWindowViewSink::consume(Chunk & chunk)
|
||||||
{
|
{
|
||||||
Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0);
|
Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0);
|
||||||
StorageWindowView::writeIntoWindowView(
|
StorageWindowView::writeIntoWindowView(
|
||||||
window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context);
|
window_view, getHeader().cloneWithColumns(chunk.getColumns()), std::move(chunk.getChunkInfos()), context);
|
||||||
|
|
||||||
if (auto process = context->getProcessListElement())
|
if (auto process = context->getProcessListElement())
|
||||||
process->updateProgressIn(local_progress);
|
process->updateProgressIn(local_progress);
|
||||||
|
@ -193,7 +193,7 @@ public:
|
|||||||
return concurrency_control;
|
return concurrency_control;
|
||||||
}
|
}
|
||||||
|
|
||||||
void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); }
|
void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); }
|
||||||
void setQueryIdHolder(std::shared_ptr<QueryIdHolder> query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); }
|
void setQueryIdHolder(std::shared_ptr<QueryIdHolder> query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); }
|
||||||
void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); }
|
void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); }
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept
|
QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept
|
||||||
{
|
{
|
||||||
table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end());
|
table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end());
|
||||||
storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end());
|
storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end());
|
||||||
@ -16,6 +16,12 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHo
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept
|
||||||
|
{
|
||||||
|
append(std::move(rhs));
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
QueryPlanResourceHolder::QueryPlanResourceHolder() = default;
|
QueryPlanResourceHolder::QueryPlanResourceHolder() = default;
|
||||||
QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default;
|
QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default;
|
||||||
QueryPlanResourceHolder::~QueryPlanResourceHolder() = default;
|
QueryPlanResourceHolder::~QueryPlanResourceHolder() = default;
|
||||||
|
@ -20,8 +20,11 @@ struct QueryPlanResourceHolder
|
|||||||
QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept;
|
QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept;
|
||||||
~QueryPlanResourceHolder();
|
~QueryPlanResourceHolder();
|
||||||
|
|
||||||
|
QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete;
|
||||||
|
|
||||||
/// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs.
|
/// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs.
|
||||||
QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept;
|
QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept;
|
||||||
|
QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept;
|
||||||
|
|
||||||
/// Some processors may implicitly use Context or temporary Storage created by Interpreter.
|
/// Some processors may implicitly use Context or temporary Storage created by Interpreter.
|
||||||
/// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here,
|
/// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here,
|
||||||
|
@ -30,15 +30,8 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe
|
|||||||
response.setContentType("text/plain; version=0.0.4; charset=UTF-8");
|
response.setContentType("text/plain; version=0.0.4; charset=UTF-8");
|
||||||
|
|
||||||
WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event);
|
WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event);
|
||||||
try
|
metrics_writer->write(wb);
|
||||||
{
|
wb.finalize();
|
||||||
metrics_writer->write(wb);
|
|
||||||
wb.finalize();
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
wb.finalize();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
{
|
{
|
||||||
|
@ -888,12 +888,11 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro
|
|||||||
|
|
||||||
while (readDataNext())
|
while (readDataNext())
|
||||||
{
|
{
|
||||||
squashing.header = state.block_for_insert;
|
squashing.setHeader(state.block_for_insert.cloneEmpty());
|
||||||
auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()});
|
auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}));
|
||||||
if (planned_chunk.hasChunkInfo())
|
if (result_chunk)
|
||||||
{
|
{
|
||||||
Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk));
|
auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns());
|
||||||
auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns());
|
|
||||||
return PushResult
|
return PushResult
|
||||||
{
|
{
|
||||||
.status = PushResult::TOO_MUCH_DATA,
|
.status = PushResult::TOO_MUCH_DATA,
|
||||||
@ -902,12 +901,13 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto planned_chunk = squashing.flush();
|
Chunk result_chunk = Squashing::squash(squashing.flush());
|
||||||
Chunk result_chunk;
|
if (!result_chunk)
|
||||||
if (planned_chunk.hasChunkInfo())
|
{
|
||||||
result_chunk = DB::Squashing::squash(std::move(planned_chunk));
|
return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context);
|
||||||
|
}
|
||||||
|
|
||||||
auto result = squashing.header.cloneWithColumns(result_chunk.getColumns());
|
auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns());
|
||||||
return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context);
|
return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,7 +135,7 @@ DistributedSink::DistributedSink(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void DistributedSink::consume(Chunk chunk)
|
void DistributedSink::consume(Chunk & chunk)
|
||||||
{
|
{
|
||||||
if (is_first_chunk)
|
if (is_first_chunk)
|
||||||
{
|
{
|
||||||
@ -143,7 +143,7 @@ void DistributedSink::consume(Chunk chunk)
|
|||||||
is_first_chunk = false;
|
is_first_chunk = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns());
|
auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns());
|
||||||
|
|
||||||
if (insert_sync)
|
if (insert_sync)
|
||||||
writeSync(ordinary_block);
|
writeSync(ordinary_block);
|
||||||
@ -421,7 +421,13 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si
|
|||||||
/// to resolve tables (in InterpreterInsertQuery::getTable())
|
/// to resolve tables (in InterpreterInsertQuery::getTable())
|
||||||
auto copy_query_ast = query_ast->clone();
|
auto copy_query_ast = query_ast->clone();
|
||||||
|
|
||||||
InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized);
|
InterpreterInsertQuery interp(
|
||||||
|
copy_query_ast,
|
||||||
|
job.local_context,
|
||||||
|
allow_materialized,
|
||||||
|
/* no_squash */ false,
|
||||||
|
/* no_destination */ false,
|
||||||
|
/* async_isnert */ false);
|
||||||
auto block_io = interp.execute();
|
auto block_io = interp.execute();
|
||||||
|
|
||||||
job.pipeline = std::move(block_io.pipeline);
|
job.pipeline = std::move(block_io.pipeline);
|
||||||
@ -716,7 +722,13 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const
|
|||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
InterpreterInsertQuery interp(query_ast, context, allow_materialized);
|
InterpreterInsertQuery interp(
|
||||||
|
query_ast,
|
||||||
|
context,
|
||||||
|
allow_materialized,
|
||||||
|
/* no_squash */ false,
|
||||||
|
/* no_destination */ false,
|
||||||
|
/* async_isnert */ false);
|
||||||
|
|
||||||
auto block_io = interp.execute();
|
auto block_io = interp.execute();
|
||||||
PushingPipelineExecutor executor(block_io.pipeline);
|
PushingPipelineExecutor executor(block_io.pipeline);
|
||||||
|
@ -49,7 +49,7 @@ public:
|
|||||||
const Names & columns_to_send_);
|
const Names & columns_to_send_);
|
||||||
|
|
||||||
String getName() const override { return "DistributedSink"; }
|
String getName() const override { return "DistributedSink"; }
|
||||||
void consume(Chunk chunk) override;
|
void consume(Chunk & chunk) override;
|
||||||
void onFinish() override;
|
void onFinish() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -740,7 +740,14 @@ bool StorageFileLog::streamToViews()
|
|||||||
|
|
||||||
auto new_context = Context::createCopy(getContext());
|
auto new_context = Context::createCopy(getContext());
|
||||||
|
|
||||||
InterpreterInsertQuery interpreter(insert, new_context, false, true, true);
|
InterpreterInsertQuery interpreter(
|
||||||
|
insert,
|
||||||
|
new_context,
|
||||||
|
/* allow_materialized */ false,
|
||||||
|
/* no_squash */ true,
|
||||||
|
/* no_destination */ true,
|
||||||
|
/* async_isnert */ false);
|
||||||
|
|
||||||
auto block_io = interpreter.execute();
|
auto block_io = interpreter.execute();
|
||||||
|
|
||||||
/// Each stream responsible for closing it's files and store meta
|
/// Each stream responsible for closing it's files and store meta
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <Core/Types_fwd.h>
|
||||||
#include <base/types.h>
|
#include <base/types.h>
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
@ -9,9 +10,10 @@ namespace DB
|
|||||||
{
|
{
|
||||||
|
|
||||||
class IStorage;
|
class IStorage;
|
||||||
|
struct SnapshotDetachedTable;
|
||||||
|
|
||||||
using ConstStoragePtr = std::shared_ptr<const IStorage>;
|
using ConstStoragePtr = std::shared_ptr<const IStorage>;
|
||||||
using StoragePtr = std::shared_ptr<IStorage>;
|
using StoragePtr = std::shared_ptr<IStorage>;
|
||||||
using Tables = std::map<String, StoragePtr>;
|
using Tables = std::map<String, StoragePtr>;
|
||||||
|
using SnapshotDetachedTables = std::map<String, SnapshotDetachedTable>;
|
||||||
}
|
}
|
||||||
|
@ -1099,7 +1099,13 @@ bool StorageKafka::streamToViews()
|
|||||||
|
|
||||||
// Create a stream for each consumer and join them in a union stream
|
// Create a stream for each consumer and join them in a union stream
|
||||||
// Only insert into dependent views and expect that input blocks contain virtual columns
|
// Only insert into dependent views and expect that input blocks contain virtual columns
|
||||||
InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true);
|
InterpreterInsertQuery interpreter(
|
||||||
|
insert,
|
||||||
|
kafka_context,
|
||||||
|
/* allow_materialized */ false,
|
||||||
|
/* no_squash */ true,
|
||||||
|
/* no_destination */ true,
|
||||||
|
/* async_isnert */ false);
|
||||||
auto block_io = interpreter.execute();
|
auto block_io = interpreter.execute();
|
||||||
|
|
||||||
// Create a stream for each consumer and join them in a union stream
|
// Create a stream for each consumer and join them in a union stream
|
||||||
|
@ -71,9 +71,9 @@ public:
|
|||||||
new_hash.reset();
|
new_hash.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void consume(Chunk chunk) override
|
void consume(Chunk & chunk) override
|
||||||
{
|
{
|
||||||
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
|
auto block = getHeader().cloneWithColumns(chunk.getColumns());
|
||||||
block.updateHash(*new_hash);
|
block.updateHash(*new_hash);
|
||||||
new_blocks->push_back(std::move(block));
|
new_blocks->push_back(std::move(block));
|
||||||
}
|
}
|
||||||
|
@ -21,6 +21,7 @@ limitations under the License. */
|
|||||||
#include <Processors/Transforms/MaterializingTransform.h>
|
#include <Processors/Transforms/MaterializingTransform.h>
|
||||||
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
|
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
|
||||||
#include <Processors/Executors/PipelineExecutor.h>
|
#include <Processors/Executors/PipelineExecutor.h>
|
||||||
|
#include <Processors/Transforms/DeduplicationTokenTransforms.h>
|
||||||
#include <Processors/Transforms/SquashingTransform.h>
|
#include <Processors/Transforms/SquashingTransform.h>
|
||||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||||
#include <QueryPipeline/QueryPlanResourceHolder.h>
|
#include <QueryPipeline/QueryPlanResourceHolder.h>
|
||||||
@ -331,7 +332,7 @@ Pipe StorageLiveView::watch(
|
|||||||
return reader;
|
return reader;
|
||||||
}
|
}
|
||||||
|
|
||||||
void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context)
|
void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context)
|
||||||
{
|
{
|
||||||
auto output = std::make_shared<LiveViewSink>(*this);
|
auto output = std::make_shared<LiveViewSink>(*this);
|
||||||
|
|
||||||
@ -408,6 +409,21 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context)
|
|||||||
builder = interpreter.buildQueryPipeline();
|
builder = interpreter.buildQueryPipeline();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
builder.addSimpleTransform([&](const Block & cur_header)
|
||||||
|
{
|
||||||
|
return std::make_shared<RestoreChunkInfosTransform>(chunk_infos.clone(), cur_header);
|
||||||
|
});
|
||||||
|
|
||||||
|
String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted();
|
||||||
|
builder.addSimpleTransform([&](const Block & stream_header)
|
||||||
|
{
|
||||||
|
return std::make_shared<DeduplicationToken::SetViewIDTransform>(live_view_id, stream_header);
|
||||||
|
});
|
||||||
|
builder.addSimpleTransform([&](const Block & stream_header)
|
||||||
|
{
|
||||||
|
return std::make_shared<DeduplicationToken::SetViewBlockNumberTransform>(stream_header);
|
||||||
|
});
|
||||||
|
|
||||||
builder.addSimpleTransform([&](const Block & cur_header)
|
builder.addSimpleTransform([&](const Block & cur_header)
|
||||||
{
|
{
|
||||||
return std::make_shared<MaterializingTransform>(cur_header);
|
return std::make_shared<MaterializingTransform>(cur_header);
|
||||||
|
@ -118,7 +118,7 @@ public:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void writeBlock(const Block & block, ContextPtr context);
|
void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context);
|
||||||
|
|
||||||
void refresh();
|
void refresh();
|
||||||
|
|
||||||
|
@ -378,7 +378,13 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr<StorageMaterializedView
|
|||||||
{
|
{
|
||||||
CurrentThread::QueryScope query_scope(refresh_context); // create a thread group for the query
|
CurrentThread::QueryScope query_scope(refresh_context); // create a thread group for the query
|
||||||
|
|
||||||
BlockIO block_io = InterpreterInsertQuery(refresh_query, refresh_context).execute();
|
BlockIO block_io = InterpreterInsertQuery(
|
||||||
|
refresh_query,
|
||||||
|
refresh_context,
|
||||||
|
/* allow_materialized */ false,
|
||||||
|
/* no_squash */ false,
|
||||||
|
/* no_destination */ false,
|
||||||
|
/* async_isnert */ false).execute();
|
||||||
QueryPipeline & pipeline = block_io.pipeline;
|
QueryPipeline & pipeline = block_io.pipeline;
|
||||||
|
|
||||||
pipeline.setProgressCallback([this](const Progress & prog)
|
pipeline.setProgressCallback([this](const Progress & prog)
|
||||||
|
@ -2340,21 +2340,26 @@ String IMergeTreeDataPart::getUniqueId() const
|
|||||||
return getDataPartStorage().getUniqueId();
|
return getDataPartStorage().getUniqueId();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UInt128 IMergeTreeDataPart::getPartBlockIDHash() const
|
||||||
|
{
|
||||||
|
SipHash hash;
|
||||||
|
checksums.computeTotalChecksumDataOnly(hash);
|
||||||
|
return hash.get128();
|
||||||
|
}
|
||||||
|
|
||||||
String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const
|
String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const
|
||||||
{
|
{
|
||||||
if (info.level != 0)
|
if (info.level != 0)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get block id for non zero level part {}", name);
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get block id for non zero level part {}", name);
|
||||||
|
|
||||||
SipHash hash;
|
|
||||||
if (token.empty())
|
if (token.empty())
|
||||||
{
|
{
|
||||||
checksums.computeTotalChecksumDataOnly(hash);
|
const auto hash_value = getPartBlockIDHash();
|
||||||
}
|
return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]);
|
||||||
else
|
|
||||||
{
|
|
||||||
hash.update(token.data(), token.size());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SipHash hash;
|
||||||
|
hash.update(token.data(), token.size());
|
||||||
const auto hash_value = hash.get128();
|
const auto hash_value = hash.get128();
|
||||||
return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]);
|
return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]);
|
||||||
}
|
}
|
||||||
|
@ -210,6 +210,7 @@ public:
|
|||||||
|
|
||||||
/// Compute part block id for zero level part. Otherwise throws an exception.
|
/// Compute part block id for zero level part. Otherwise throws an exception.
|
||||||
/// If token is not empty, block id is calculated based on it instead of block data
|
/// If token is not empty, block id is calculated based on it instead of block data
|
||||||
|
UInt128 getPartBlockIDHash() const;
|
||||||
String getZeroLevelPartBlockID(std::string_view token) const;
|
String getZeroLevelPartBlockID(std::string_view token) const;
|
||||||
|
|
||||||
void setName(const String & new_name);
|
void setName(const String & new_name);
|
||||||
|
@ -145,8 +145,12 @@ ChunkAndProgress MergeTreeSelectProcessor::read()
|
|||||||
ordered_columns.push_back(res.block.getByName(name).column);
|
ordered_columns.push_back(res.block.getByName(name).column);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto chunk = Chunk(ordered_columns, res.row_count);
|
||||||
|
if (add_part_level)
|
||||||
|
chunk.getChunkInfos().add(std::make_shared<MergeTreePartLevelInfo>(task->getInfo().data_part->info.level));
|
||||||
|
|
||||||
return ChunkAndProgress{
|
return ChunkAndProgress{
|
||||||
.chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared<MergeTreePartLevelInfo>(task->getInfo().data_part->info.level) : nullptr),
|
.chunk = std::move(chunk),
|
||||||
.num_read_rows = res.num_read_rows,
|
.num_read_rows = res.num_read_rows,
|
||||||
.num_read_bytes = res.num_read_bytes,
|
.num_read_bytes = res.num_read_bytes,
|
||||||
.is_finished = false};
|
.is_finished = false};
|
||||||
|
@ -265,7 +265,10 @@ try
|
|||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared<MergeTreePartLevelInfo>(data_part->info.level) : nullptr);
|
auto result = Chunk(std::move(res_columns), rows_read);
|
||||||
|
if (add_part_level)
|
||||||
|
result.getChunkInfos().add(std::make_shared<MergeTreePartLevelInfo>(data_part->info.level));
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -1,15 +1,22 @@
|
|||||||
#include <Storages/MergeTree/MergeTreeSink.h>
|
#include <Storages/MergeTree/MergeTreeSink.h>
|
||||||
#include <Storages/StorageMergeTree.h>
|
#include <Storages/StorageMergeTree.h>
|
||||||
#include <Interpreters/PartLog.h>
|
#include <Interpreters/PartLog.h>
|
||||||
|
#include <Processors/Transforms/DeduplicationTokenTransforms.h>
|
||||||
#include <DataTypes/ObjectUtils.h>
|
#include <DataTypes/ObjectUtils.h>
|
||||||
#include <Common/ProfileEventsScope.h>
|
#include <Common/ProfileEventsScope.h>
|
||||||
#include <Core/Settings.h>
|
#include <Core/Settings.h>
|
||||||
|
|
||||||
|
|
||||||
namespace ProfileEvents
|
namespace ProfileEvents
|
||||||
{
|
{
|
||||||
extern const Event DuplicatedInsertedBlocks;
|
extern const Event DuplicatedInsertedBlocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -59,12 +66,12 @@ void MergeTreeSink::onCancel()
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void MergeTreeSink::consume(Chunk chunk)
|
void MergeTreeSink::consume(Chunk & chunk)
|
||||||
{
|
{
|
||||||
if (num_blocks_processed > 0)
|
if (num_blocks_processed > 0)
|
||||||
storage.delayInsertOrThrowIfNeeded(nullptr, context, false);
|
storage.delayInsertOrThrowIfNeeded(nullptr, context, false);
|
||||||
|
|
||||||
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
|
auto block = getHeader().cloneWithColumns(chunk.getColumns());
|
||||||
if (!storage_snapshot->object_columns.empty())
|
if (!storage_snapshot->object_columns.empty())
|
||||||
convertDynamicColumnsToTuples(block, storage_snapshot);
|
convertDynamicColumnsToTuples(block, storage_snapshot);
|
||||||
|
|
||||||
@ -77,6 +84,18 @@ void MergeTreeSink::consume(Chunk chunk)
|
|||||||
size_t streams = 0;
|
size_t streams = 0;
|
||||||
bool support_parallel_write = false;
|
bool support_parallel_write = false;
|
||||||
|
|
||||||
|
auto token_info = chunk.getChunkInfos().get<DeduplicationToken::TokenInfo>();
|
||||||
|
if (!token_info)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}",
|
||||||
|
storage.getStorageID().getNameForLogs());
|
||||||
|
|
||||||
|
const bool need_to_define_dedup_token = !token_info->isDefined();
|
||||||
|
|
||||||
|
String block_dedup_token;
|
||||||
|
if (token_info->isDefined())
|
||||||
|
block_dedup_token = token_info->getToken();
|
||||||
|
|
||||||
for (auto & current_block : part_blocks)
|
for (auto & current_block : part_blocks)
|
||||||
{
|
{
|
||||||
ProfileEvents::Counters part_counters;
|
ProfileEvents::Counters part_counters;
|
||||||
@ -101,22 +120,16 @@ void MergeTreeSink::consume(Chunk chunk)
|
|||||||
if (!temp_part.part)
|
if (!temp_part.part)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (need_to_define_dedup_token)
|
||||||
|
{
|
||||||
|
chassert(temp_part.part);
|
||||||
|
const auto hash_value = temp_part.part->getPartBlockIDHash();
|
||||||
|
token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]));
|
||||||
|
}
|
||||||
|
|
||||||
if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite())
|
if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite())
|
||||||
support_parallel_write = true;
|
support_parallel_write = true;
|
||||||
|
|
||||||
String block_dedup_token;
|
|
||||||
if (storage.getDeduplicationLog())
|
|
||||||
{
|
|
||||||
const String & dedup_token = settings.insert_deduplication_token;
|
|
||||||
if (!dedup_token.empty())
|
|
||||||
{
|
|
||||||
/// multiple blocks can be inserted within the same insert query
|
|
||||||
/// an ordinal number is added to dedup token to generate a distinctive block id for each block
|
|
||||||
block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum);
|
|
||||||
++chunk_dedup_seqnum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t max_insert_delayed_streams_for_parallel_write;
|
size_t max_insert_delayed_streams_for_parallel_write;
|
||||||
|
|
||||||
if (settings.max_insert_delayed_streams_for_parallel_write.changed)
|
if (settings.max_insert_delayed_streams_for_parallel_write.changed)
|
||||||
@ -128,6 +141,7 @@ void MergeTreeSink::consume(Chunk chunk)
|
|||||||
|
|
||||||
/// In case of too much columns/parts in block, flush explicitly.
|
/// In case of too much columns/parts in block, flush explicitly.
|
||||||
streams += temp_part.streams.size();
|
streams += temp_part.streams.size();
|
||||||
|
|
||||||
if (streams > max_insert_delayed_streams_for_parallel_write)
|
if (streams > max_insert_delayed_streams_for_parallel_write)
|
||||||
{
|
{
|
||||||
finishDelayedChunk();
|
finishDelayedChunk();
|
||||||
@ -144,11 +158,16 @@ void MergeTreeSink::consume(Chunk chunk)
|
|||||||
{
|
{
|
||||||
.temp_part = std::move(temp_part),
|
.temp_part = std::move(temp_part),
|
||||||
.elapsed_ns = elapsed_ns,
|
.elapsed_ns = elapsed_ns,
|
||||||
.block_dedup_token = std::move(block_dedup_token),
|
.block_dedup_token = block_dedup_token,
|
||||||
.part_counters = std::move(part_counters),
|
.part_counters = std::move(part_counters),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (need_to_define_dedup_token)
|
||||||
|
{
|
||||||
|
token_info->finishChunkHashes();
|
||||||
|
}
|
||||||
|
|
||||||
finishDelayedChunk();
|
finishDelayedChunk();
|
||||||
delayed_chunk = std::make_unique<MergeTreeSink::DelayedChunk>();
|
delayed_chunk = std::make_unique<MergeTreeSink::DelayedChunk>();
|
||||||
delayed_chunk->partitions = std::move(partitions);
|
delayed_chunk->partitions = std::move(partitions);
|
||||||
@ -161,6 +180,8 @@ void MergeTreeSink::finishDelayedChunk()
|
|||||||
if (!delayed_chunk)
|
if (!delayed_chunk)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
const Settings & settings = context->getSettingsRef();
|
||||||
|
|
||||||
for (auto & partition : delayed_chunk->partitions)
|
for (auto & partition : delayed_chunk->partitions)
|
||||||
{
|
{
|
||||||
ProfileEventsScope scoped_attach(&partition.part_counters);
|
ProfileEventsScope scoped_attach(&partition.part_counters);
|
||||||
@ -179,7 +200,8 @@ void MergeTreeSink::finishDelayedChunk()
|
|||||||
storage.fillNewPartName(part, lock);
|
storage.fillNewPartName(part, lock);
|
||||||
|
|
||||||
auto * deduplication_log = storage.getDeduplicationLog();
|
auto * deduplication_log = storage.getDeduplicationLog();
|
||||||
if (deduplication_log)
|
|
||||||
|
if (settings.insert_deduplicate && deduplication_log)
|
||||||
{
|
{
|
||||||
const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token);
|
const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token);
|
||||||
auto res = deduplication_log->addPart(block_id, part->info);
|
auto res = deduplication_log->addPart(block_id, part->info);
|
||||||
|
@ -25,7 +25,7 @@ public:
|
|||||||
~MergeTreeSink() override;
|
~MergeTreeSink() override;
|
||||||
|
|
||||||
String getName() const override { return "MergeTreeSink"; }
|
String getName() const override { return "MergeTreeSink"; }
|
||||||
void consume(Chunk chunk) override;
|
void consume(Chunk & chunk) override;
|
||||||
void onStart() override;
|
void onStart() override;
|
||||||
void onFinish() override;
|
void onFinish() override;
|
||||||
void onCancel() override;
|
void onCancel() override;
|
||||||
@ -36,7 +36,6 @@ private:
|
|||||||
size_t max_parts_per_block;
|
size_t max_parts_per_block;
|
||||||
ContextPtr context;
|
ContextPtr context;
|
||||||
StorageSnapshotPtr storage_snapshot;
|
StorageSnapshotPtr storage_snapshot;
|
||||||
UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token
|
|
||||||
UInt64 num_blocks_processed = 0;
|
UInt64 num_blocks_processed = 0;
|
||||||
|
|
||||||
/// We can delay processing for previous chunk and start writing a new one.
|
/// We can delay processing for previous chunk and start writing a new one.
|
||||||
|
@ -1301,6 +1301,7 @@ void PartMergerWriter::prepare()
|
|||||||
bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
|
bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
|
||||||
{
|
{
|
||||||
Block cur_block;
|
Block cur_block;
|
||||||
|
Block projection_header;
|
||||||
if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block))
|
if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block))
|
||||||
{
|
{
|
||||||
if (ctx->minmax_idx)
|
if (ctx->minmax_idx)
|
||||||
@ -1318,14 +1319,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
|
|||||||
|
|
||||||
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds);
|
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds);
|
||||||
Block block_to_squash = projection.calculate(cur_block, ctx->context);
|
Block block_to_squash = projection.calculate(cur_block, ctx->context);
|
||||||
projection_squashes[i].header = block_to_squash;
|
projection_squashes[i].setHeader(block_to_squash.cloneEmpty());
|
||||||
Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()});
|
|
||||||
|
|
||||||
if (planned_chunk.hasChunkInfo())
|
Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}));
|
||||||
|
if (squashed_chunk)
|
||||||
{
|
{
|
||||||
Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk));
|
auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns());
|
||||||
|
|
||||||
auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns());
|
|
||||||
auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart(
|
auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart(
|
||||||
*ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num);
|
*ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num);
|
||||||
tmp_part.finalize();
|
tmp_part.finalize();
|
||||||
@ -1346,12 +1345,10 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections()
|
|||||||
{
|
{
|
||||||
const auto & projection = *ctx->projections_to_build[i];
|
const auto & projection = *ctx->projections_to_build[i];
|
||||||
auto & projection_squash_plan = projection_squashes[i];
|
auto & projection_squash_plan = projection_squashes[i];
|
||||||
auto planned_chunk = projection_squash_plan.flush();
|
auto squashed_chunk = Squashing::squash(projection_squash_plan.flush());
|
||||||
if (planned_chunk.hasChunkInfo())
|
if (squashed_chunk)
|
||||||
{
|
{
|
||||||
Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk));
|
auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns());
|
||||||
|
|
||||||
auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns());
|
|
||||||
auto temp_part = MergeTreeDataWriter::writeTempProjectionPart(
|
auto temp_part = MergeTreeDataWriter::writeTempProjectionPart(
|
||||||
*ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num);
|
*ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num);
|
||||||
temp_part.finalize();
|
temp_part.finalize();
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include <Storages/MergeTree/ReplicatedMergeTreeSink.h>
|
#include <Storages/MergeTree/ReplicatedMergeTreeSink.h>
|
||||||
#include <Storages/MergeTree/InsertBlockInfo.h>
|
#include <Storages/MergeTree/InsertBlockInfo.h>
|
||||||
#include <Interpreters/PartLog.h>
|
#include <Interpreters/PartLog.h>
|
||||||
|
#include <Processors/Transforms/DeduplicationTokenTransforms.h>
|
||||||
#include "Common/Exception.h"
|
#include "Common/Exception.h"
|
||||||
#include <Common/FailPoint.h>
|
#include <Common/FailPoint.h>
|
||||||
#include <Common/ProfileEventsScope.h>
|
#include <Common/ProfileEventsScope.h>
|
||||||
@ -19,6 +20,7 @@
|
|||||||
#include <IO/Operators.h>
|
#include <IO/Operators.h>
|
||||||
#include <fmt/core.h>
|
#include <fmt/core.h>
|
||||||
|
|
||||||
|
|
||||||
namespace ProfileEvents
|
namespace ProfileEvents
|
||||||
{
|
{
|
||||||
extern const Event DuplicatedInsertedBlocks;
|
extern const Event DuplicatedInsertedBlocks;
|
||||||
@ -255,12 +257,12 @@ size_t ReplicatedMergeTreeSinkImpl<async_insert>::checkQuorumPrecondition(const
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<bool async_insert>
|
template<bool async_insert>
|
||||||
void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
|
void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk & chunk)
|
||||||
{
|
{
|
||||||
if (num_blocks_processed > 0)
|
if (num_blocks_processed > 0)
|
||||||
storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false);
|
storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false);
|
||||||
|
|
||||||
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
|
auto block = getHeader().cloneWithColumns(chunk.getColumns());
|
||||||
|
|
||||||
const auto & settings = context->getSettingsRef();
|
const auto & settings = context->getSettingsRef();
|
||||||
|
|
||||||
@ -286,13 +288,25 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
|
|||||||
|
|
||||||
if constexpr (async_insert)
|
if constexpr (async_insert)
|
||||||
{
|
{
|
||||||
const auto & chunk_info = chunk.getChunkInfo();
|
const auto async_insert_info_ptr = chunk.getChunkInfos().get<AsyncInsertInfo>();
|
||||||
if (const auto * async_insert_info_ptr = typeid_cast<const AsyncInsertInfo *>(chunk_info.get()))
|
if (async_insert_info_ptr)
|
||||||
async_insert_info = std::make_shared<AsyncInsertInfo>(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens);
|
async_insert_info = std::make_shared<AsyncInsertInfo>(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens);
|
||||||
else
|
else
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String block_dedup_token;
|
||||||
|
auto token_info = chunk.getChunkInfos().get<DeduplicationToken::TokenInfo>();
|
||||||
|
if (!token_info)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}",
|
||||||
|
storage.getStorageID().getNameForLogs());
|
||||||
|
|
||||||
|
const bool need_to_define_dedup_token = !token_info->isDefined();
|
||||||
|
|
||||||
|
if (token_info->isDefined())
|
||||||
|
block_dedup_token = token_info->getToken();
|
||||||
|
|
||||||
auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info);
|
auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info);
|
||||||
|
|
||||||
using DelayedPartition = typename ReplicatedMergeTreeSinkImpl<async_insert>::DelayedChunk::Partition;
|
using DelayedPartition = typename ReplicatedMergeTreeSinkImpl<async_insert>::DelayedChunk::Partition;
|
||||||
@ -344,23 +358,10 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
||||||
if (deduplicate)
|
if (deduplicate)
|
||||||
{
|
{
|
||||||
String block_dedup_token;
|
|
||||||
|
|
||||||
/// We add the hash from the data and partition identifier to deduplication ID.
|
/// We add the hash from the data and partition identifier to deduplication ID.
|
||||||
/// That is, do not insert the same data to the same partition twice.
|
/// That is, do not insert the same data to the same partition twice.
|
||||||
|
|
||||||
const String & dedup_token = settings.insert_deduplication_token;
|
|
||||||
if (!dedup_token.empty())
|
|
||||||
{
|
|
||||||
/// multiple blocks can be inserted within the same insert query
|
|
||||||
/// an ordinal number is added to dedup token to generate a distinctive block id for each block
|
|
||||||
block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum);
|
|
||||||
++chunk_dedup_seqnum;
|
|
||||||
}
|
|
||||||
|
|
||||||
block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token);
|
block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token);
|
||||||
LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num));
|
LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num));
|
||||||
}
|
}
|
||||||
@ -368,6 +369,13 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
|
|||||||
{
|
{
|
||||||
LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num));
|
LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (need_to_define_dedup_token)
|
||||||
|
{
|
||||||
|
chassert(temp_part.part);
|
||||||
|
const auto hash_value = temp_part.part->getPartBlockIDHash();
|
||||||
|
token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
profile_events_scope.reset();
|
profile_events_scope.reset();
|
||||||
@ -413,17 +421,15 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (need_to_define_dedup_token)
|
||||||
|
{
|
||||||
|
token_info->finishChunkHashes();
|
||||||
|
}
|
||||||
|
|
||||||
finishDelayedChunk(zookeeper);
|
finishDelayedChunk(zookeeper);
|
||||||
delayed_chunk = std::make_unique<ReplicatedMergeTreeSinkImpl::DelayedChunk>();
|
delayed_chunk = std::make_unique<ReplicatedMergeTreeSinkImpl::DelayedChunk>();
|
||||||
delayed_chunk->partitions = std::move(partitions);
|
delayed_chunk->partitions = std::move(partitions);
|
||||||
|
|
||||||
/// If deduplicated data should not be inserted into MV, we need to set proper
|
|
||||||
/// value for `last_block_is_duplicate`, which is possible only after the part is committed.
|
|
||||||
/// Othervide we can delay commit.
|
|
||||||
/// TODO: we can also delay commit if there is no MVs.
|
|
||||||
if (!settings.deduplicate_blocks_in_dependent_materialized_views)
|
|
||||||
finishDelayedChunk(zookeeper);
|
|
||||||
|
|
||||||
++num_blocks_processed;
|
++num_blocks_processed;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -433,8 +439,6 @@ void ReplicatedMergeTreeSinkImpl<false>::finishDelayedChunk(const ZooKeeperWithF
|
|||||||
if (!delayed_chunk)
|
if (!delayed_chunk)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
last_block_is_duplicate = false;
|
|
||||||
|
|
||||||
for (auto & partition : delayed_chunk->partitions)
|
for (auto & partition : delayed_chunk->partitions)
|
||||||
{
|
{
|
||||||
ProfileEventsScope scoped_attach(&partition.part_counters);
|
ProfileEventsScope scoped_attach(&partition.part_counters);
|
||||||
@ -447,8 +451,6 @@ void ReplicatedMergeTreeSinkImpl<false>::finishDelayedChunk(const ZooKeeperWithF
|
|||||||
{
|
{
|
||||||
bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second;
|
bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second;
|
||||||
|
|
||||||
last_block_is_duplicate = last_block_is_duplicate || deduplicated;
|
|
||||||
|
|
||||||
/// Set a special error code if the block is duplicate
|
/// Set a special error code if the block is duplicate
|
||||||
int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0;
|
int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0;
|
||||||
auto counters_snapshot = std::make_shared<ProfileEvents::Counters::Snapshot>(partition.part_counters.getPartiallyAtomicSnapshot());
|
auto counters_snapshot = std::make_shared<ProfileEvents::Counters::Snapshot>(partition.part_counters.getPartiallyAtomicSnapshot());
|
||||||
@ -537,7 +539,7 @@ bool ReplicatedMergeTreeSinkImpl<false>::writeExistingPart(MergeTreeData::Mutabl
|
|||||||
ProfileEventsScope profile_events_scope;
|
ProfileEventsScope profile_events_scope;
|
||||||
|
|
||||||
String original_part_dir = part->getDataPartStorage().getPartDirectory();
|
String original_part_dir = part->getDataPartStorage().getPartDirectory();
|
||||||
auto try_rollback_part_rename = [this, &part, &original_part_dir]()
|
auto try_rollback_part_rename = [this, &part, &original_part_dir] ()
|
||||||
{
|
{
|
||||||
if (original_part_dir == part->getDataPartStorage().getPartDirectory())
|
if (original_part_dir == part->getDataPartStorage().getPartDirectory())
|
||||||
return;
|
return;
|
||||||
@ -576,16 +578,6 @@ bool ReplicatedMergeTreeSinkImpl<false>::writeExistingPart(MergeTreeData::Mutabl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool async_insert>
|
|
||||||
bool ReplicatedMergeTreeSinkImpl<async_insert>::lastBlockIsDuplicate() const
|
|
||||||
{
|
|
||||||
/// If MV is responsible for deduplication, block is not considered duplicating.
|
|
||||||
if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return last_block_is_duplicate;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<bool async_insert>
|
template<bool async_insert>
|
||||||
std::vector<String> ReplicatedMergeTreeSinkImpl<async_insert>::detectConflictsInAsyncBlockIDs(const std::vector<String> & ids)
|
std::vector<String> ReplicatedMergeTreeSinkImpl<async_insert>::detectConflictsInAsyncBlockIDs(const std::vector<String> & ids)
|
||||||
{
|
{
|
||||||
@ -1163,8 +1155,16 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::onStart()
|
|||||||
template<bool async_insert>
|
template<bool async_insert>
|
||||||
void ReplicatedMergeTreeSinkImpl<async_insert>::onFinish()
|
void ReplicatedMergeTreeSinkImpl<async_insert>::onFinish()
|
||||||
{
|
{
|
||||||
auto zookeeper = storage.getZooKeeper();
|
const auto & settings = context->getSettingsRef();
|
||||||
finishDelayedChunk(std::make_shared<ZooKeeperWithFaultInjection>(zookeeper));
|
|
||||||
|
ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance(
|
||||||
|
settings.insert_keeper_fault_injection_probability,
|
||||||
|
settings.insert_keeper_fault_injection_seed,
|
||||||
|
storage.getZooKeeper(),
|
||||||
|
"ReplicatedMergeTreeSink::onFinish",
|
||||||
|
log);
|
||||||
|
|
||||||
|
finishDelayedChunk(zookeeper);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool async_insert>
|
template<bool async_insert>
|
||||||
|
@ -51,7 +51,7 @@ public:
|
|||||||
~ReplicatedMergeTreeSinkImpl() override;
|
~ReplicatedMergeTreeSinkImpl() override;
|
||||||
|
|
||||||
void onStart() override;
|
void onStart() override;
|
||||||
void consume(Chunk chunk) override;
|
void consume(Chunk & chunk) override;
|
||||||
void onFinish() override;
|
void onFinish() override;
|
||||||
|
|
||||||
String getName() const override { return "ReplicatedMergeTreeSink"; }
|
String getName() const override { return "ReplicatedMergeTreeSink"; }
|
||||||
@ -59,9 +59,6 @@ public:
|
|||||||
/// For ATTACHing existing data on filesystem.
|
/// For ATTACHing existing data on filesystem.
|
||||||
bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part);
|
bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part);
|
||||||
|
|
||||||
/// For proper deduplication in MaterializedViews
|
|
||||||
bool lastBlockIsDuplicate() const override;
|
|
||||||
|
|
||||||
struct DelayedChunk;
|
struct DelayedChunk;
|
||||||
private:
|
private:
|
||||||
std::vector<String> detectConflictsInAsyncBlockIDs(const std::vector<String> & ids);
|
std::vector<String> detectConflictsInAsyncBlockIDs(const std::vector<String> & ids);
|
||||||
@ -119,7 +116,6 @@ private:
|
|||||||
bool allow_attach_while_readonly = false;
|
bool allow_attach_while_readonly = false;
|
||||||
bool quorum_parallel = false;
|
bool quorum_parallel = false;
|
||||||
const bool deduplicate = true;
|
const bool deduplicate = true;
|
||||||
bool last_block_is_duplicate = false;
|
|
||||||
UInt64 num_blocks_processed = 0;
|
UInt64 num_blocks_processed = 0;
|
||||||
|
|
||||||
LoggerPtr log;
|
LoggerPtr log;
|
||||||
|
@ -40,7 +40,7 @@ void MessageQueueSink::onFinish()
|
|||||||
producer->finish();
|
producer->finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MessageQueueSink::consume(Chunk chunk)
|
void MessageQueueSink::consume(Chunk & chunk)
|
||||||
{
|
{
|
||||||
const auto & columns = chunk.getColumns();
|
const auto & columns = chunk.getColumns();
|
||||||
if (columns.empty())
|
if (columns.empty())
|
||||||
|
@ -35,7 +35,7 @@ public:
|
|||||||
|
|
||||||
String getName() const override { return storage_name + "Sink"; }
|
String getName() const override { return storage_name + "Sink"; }
|
||||||
|
|
||||||
void consume(Chunk chunk) override;
|
void consume(Chunk & chunk) override;
|
||||||
|
|
||||||
void onStart() override;
|
void onStart() override;
|
||||||
void onFinish() override;
|
void onFinish() override;
|
||||||
|
@ -644,7 +644,13 @@ bool StorageNATS::streamToViews()
|
|||||||
insert->table_id = table_id;
|
insert->table_id = table_id;
|
||||||
|
|
||||||
// Only insert into dependent views and expect that input blocks contain virtual columns
|
// Only insert into dependent views and expect that input blocks contain virtual columns
|
||||||
InterpreterInsertQuery interpreter(insert, nats_context, false, true, true);
|
InterpreterInsertQuery interpreter(
|
||||||
|
insert,
|
||||||
|
nats_context,
|
||||||
|
/* allow_materialized */ false,
|
||||||
|
/* no_squash */ true,
|
||||||
|
/* no_destination */ true,
|
||||||
|
/* async_isnert */ false);
|
||||||
auto block_io = interpreter.execute();
|
auto block_io = interpreter.execute();
|
||||||
|
|
||||||
auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext());
|
auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext());
|
||||||
|
@ -40,12 +40,12 @@ StorageObjectStorageSink::StorageObjectStorageSink(
|
|||||||
configuration->format, *write_buf, sample_block, context, format_settings_);
|
configuration->format, *write_buf, sample_block, context, format_settings_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void StorageObjectStorageSink::consume(Chunk chunk)
|
void StorageObjectStorageSink::consume(Chunk & chunk)
|
||||||
{
|
{
|
||||||
std::lock_guard lock(cancel_mutex);
|
std::lock_guard lock(cancel_mutex);
|
||||||
if (cancelled)
|
if (cancelled)
|
||||||
return;
|
return;
|
||||||
writer->write(getHeader().cloneWithColumns(chunk.detachColumns()));
|
writer->write(getHeader().cloneWithColumns(chunk.getColumns()));
|
||||||
}
|
}
|
||||||
|
|
||||||
void StorageObjectStorageSink::onCancel()
|
void StorageObjectStorageSink::onCancel()
|
||||||
|
@ -20,7 +20,7 @@ public:
|
|||||||
|
|
||||||
String getName() const override { return "StorageObjectStorageSink"; }
|
String getName() const override { return "StorageObjectStorageSink"; }
|
||||||
|
|
||||||
void consume(Chunk chunk) override;
|
void consume(Chunk & chunk) override;
|
||||||
|
|
||||||
void onCancel() override;
|
void onCancel() override;
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user