mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge remote-tracking branch 'upstream/master' into group_by_all
# Conflicts: # src/Analyzer/QueryNode.h
This commit is contained in:
commit
c43dd96f14
1
.github/workflows/pull_request.yml
vendored
1
.github/workflows/pull_request.yml
vendored
@ -2023,6 +2023,7 @@ jobs:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
TestsBugfixCheck:
|
||||
needs: [CheckLabels, StyleCheck]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
|
@ -33,7 +33,7 @@ CREATE TABLE trips (
|
||||
tip_amount Float32,
|
||||
tolls_amount Float32,
|
||||
total_amount Float32,
|
||||
payment_type Enum('CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4),
|
||||
payment_type Enum('CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4, 'UNK' = 5),
|
||||
pickup_ntaname LowCardinality(String),
|
||||
dropoff_ntaname LowCardinality(String)
|
||||
)
|
||||
@ -63,7 +63,7 @@ SELECT
|
||||
payment_type,
|
||||
pickup_ntaname,
|
||||
dropoff_ntaname
|
||||
FROM url(
|
||||
FROM s3(
|
||||
'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_{0..2}.gz',
|
||||
'TabSeparatedWithNames'
|
||||
)
|
||||
|
@ -128,6 +128,24 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Migration Method for installing the deb-packages</summary>
|
||||
|
||||
```bash
|
||||
sudo apt-key del E0C56BD4
|
||||
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
|
||||
echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
|
||||
/etc/apt/sources.list.d/clickhouse.list
|
||||
sudo apt-get update
|
||||
|
||||
sudo apt-get install -y clickhouse-server clickhouse-client
|
||||
|
||||
sudo service clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs.
|
||||
|
||||
You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/main/c/).
|
||||
|
@ -1627,34 +1627,7 @@ void QueryAnalyzer::validateTableExpressionModifiers(const QueryTreeNodePtr & ta
|
||||
table_expression_node->formatASTForErrorMessage(),
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
|
||||
if (query_node || union_node)
|
||||
{
|
||||
auto table_expression_modifiers = query_node ? query_node->getTableExpressionModifiers() : union_node->getTableExpressionModifiers();
|
||||
|
||||
if (table_expression_modifiers.has_value())
|
||||
{
|
||||
String table_expression_modifiers_error_message;
|
||||
|
||||
if (table_expression_modifiers->hasFinal())
|
||||
{
|
||||
table_expression_modifiers_error_message += "FINAL";
|
||||
|
||||
if (table_expression_modifiers->hasSampleSizeRatio())
|
||||
table_expression_modifiers_error_message += ", SAMPLE";
|
||||
}
|
||||
else if (table_expression_modifiers->hasSampleSizeRatio())
|
||||
{
|
||||
table_expression_modifiers_error_message += "SAMPLE";
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"Table expression modifiers {} are not supported for subquery {}. In scope {}",
|
||||
table_expression_modifiers_error_message,
|
||||
table_expression_node->formatASTForErrorMessage(),
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
}
|
||||
}
|
||||
else if (table_node || table_function_node)
|
||||
if (table_node || table_function_node)
|
||||
{
|
||||
auto table_expression_modifiers = table_node ? table_node->getTableExpressionModifiers() : table_function_node->getTableExpressionModifiers();
|
||||
|
||||
@ -4729,17 +4702,23 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod
|
||||
|
||||
auto table_expression_modifiers = from_table_identifier.getTableExpressionModifiers();
|
||||
|
||||
if (auto * resolved_identifier_query_node = resolved_identifier->as<QueryNode>())
|
||||
auto * resolved_identifier_query_node = resolved_identifier->as<QueryNode>();
|
||||
auto * resolved_identifier_union_node = resolved_identifier->as<UnionNode>();
|
||||
|
||||
if (resolved_identifier_query_node || resolved_identifier_union_node)
|
||||
{
|
||||
resolved_identifier_query_node->setIsCTE(false);
|
||||
if (resolved_identifier_query_node)
|
||||
resolved_identifier_query_node->setIsCTE(false);
|
||||
else
|
||||
resolved_identifier_union_node->setIsCTE(false);
|
||||
|
||||
if (table_expression_modifiers.has_value())
|
||||
resolved_identifier_query_node->setTableExpressionModifiers(*table_expression_modifiers);
|
||||
}
|
||||
else if (auto * resolved_identifier_union_node = resolved_identifier->as<UnionNode>())
|
||||
{
|
||||
resolved_identifier_union_node->setIsCTE(false);
|
||||
if (table_expression_modifiers.has_value())
|
||||
resolved_identifier_union_node->setTableExpressionModifiers(*table_expression_modifiers);
|
||||
{
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"Table expression modifiers {} are not supported for subquery {}",
|
||||
table_expression_modifiers->formatForErrorMessage(),
|
||||
resolved_identifier->formatASTForErrorMessage());
|
||||
}
|
||||
}
|
||||
else if (auto * resolved_identifier_table_node = resolved_identifier->as<TableNode>())
|
||||
{
|
||||
|
@ -77,12 +77,6 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
|
||||
buffer << ", constant_value_type: " << constant_value->getType()->getName();
|
||||
}
|
||||
|
||||
if (table_expression_modifiers)
|
||||
{
|
||||
buffer << ", ";
|
||||
table_expression_modifiers->dump(buffer);
|
||||
}
|
||||
|
||||
if (hasWith())
|
||||
{
|
||||
buffer << '\n' << std::string(indent + 2, ' ') << "WITH\n";
|
||||
@ -198,13 +192,6 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
|
||||
else if (!constant_value && rhs_typed.constant_value)
|
||||
return false;
|
||||
|
||||
if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers)
|
||||
return false;
|
||||
else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers)
|
||||
return false;
|
||||
else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers)
|
||||
return false;
|
||||
|
||||
return is_subquery == rhs_typed.is_subquery &&
|
||||
is_cte == rhs_typed.is_cte &&
|
||||
cte_name == rhs_typed.cte_name &&
|
||||
@ -255,9 +242,6 @@ void QueryNode::updateTreeHashImpl(HashState & state) const
|
||||
state.update(constant_value_type_name.size());
|
||||
state.update(constant_value_type_name);
|
||||
}
|
||||
|
||||
if (table_expression_modifiers)
|
||||
table_expression_modifiers->updateTreeHash(state);
|
||||
}
|
||||
|
||||
QueryTreeNodePtr QueryNode::cloneImpl() const
|
||||
@ -276,7 +260,6 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
|
||||
result_query_node->cte_name = cte_name;
|
||||
result_query_node->projection_columns = projection_columns;
|
||||
result_query_node->constant_value = constant_value;
|
||||
result_query_node->table_expression_modifiers = table_expression_modifiers;
|
||||
|
||||
return result_query_node;
|
||||
}
|
||||
|
@ -188,24 +188,6 @@ public:
|
||||
is_group_by_all = is_group_by_all_value;
|
||||
}
|
||||
|
||||
/// Return true if query node has table expression modifiers, false otherwise
|
||||
bool hasTableExpressionModifiers() const
|
||||
{
|
||||
return table_expression_modifiers.has_value();
|
||||
}
|
||||
|
||||
/// Get table expression modifiers
|
||||
const std::optional<TableExpressionModifiers> & getTableExpressionModifiers() const
|
||||
{
|
||||
return table_expression_modifiers;
|
||||
}
|
||||
|
||||
/// Set table expression modifiers
|
||||
void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value)
|
||||
{
|
||||
table_expression_modifiers = std::move(table_expression_modifiers_value);
|
||||
}
|
||||
|
||||
/// Returns true if query node WITH section is not empty, false otherwise
|
||||
bool hasWith() const
|
||||
{
|
||||
@ -615,7 +597,6 @@ private:
|
||||
std::string cte_name;
|
||||
NamesAndTypes projection_columns;
|
||||
ConstantValuePtr constant_value;
|
||||
std::optional<TableExpressionModifiers> table_expression_modifiers;
|
||||
SettingsChanges settings_changes;
|
||||
|
||||
static constexpr size_t with_child_index = 0;
|
||||
|
@ -145,12 +145,10 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectWithUnionExpression(const ASTPtr &
|
||||
if (select_lists.children.size() == 1)
|
||||
return buildSelectOrUnionExpression(select_lists.children[0], is_subquery, cte_name);
|
||||
|
||||
auto union_node = std::make_shared<UnionNode>();
|
||||
auto union_node = std::make_shared<UnionNode>(select_with_union_query_typed.union_mode);
|
||||
union_node->setIsSubquery(is_subquery);
|
||||
union_node->setIsCTE(!cte_name.empty());
|
||||
union_node->setCTEName(cte_name);
|
||||
union_node->setUnionMode(select_with_union_query_typed.union_mode);
|
||||
union_node->setUnionModes(select_with_union_query_typed.list_of_modes);
|
||||
union_node->setOriginalAST(select_with_union_query);
|
||||
|
||||
size_t select_lists_children_size = select_lists.children.size();
|
||||
@ -173,23 +171,22 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectIntersectExceptQuery(const ASTPtr
|
||||
if (select_lists.size() == 1)
|
||||
return buildSelectExpression(select_lists[0], is_subquery, cte_name);
|
||||
|
||||
auto union_node = std::make_shared<UnionNode>();
|
||||
union_node->setIsSubquery(is_subquery);
|
||||
union_node->setIsCTE(!cte_name.empty());
|
||||
union_node->setCTEName(cte_name);
|
||||
|
||||
SelectUnionMode union_mode;
|
||||
if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT_ALL)
|
||||
union_node->setUnionMode(SelectUnionMode::INTERSECT_ALL);
|
||||
union_mode = SelectUnionMode::INTERSECT_ALL;
|
||||
else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT_DISTINCT)
|
||||
union_node->setUnionMode(SelectUnionMode::INTERSECT_DISTINCT);
|
||||
union_mode = SelectUnionMode::INTERSECT_DISTINCT;
|
||||
else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT_ALL)
|
||||
union_node->setUnionMode(SelectUnionMode::EXCEPT_ALL);
|
||||
union_mode = SelectUnionMode::EXCEPT_ALL;
|
||||
else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT_DISTINCT)
|
||||
union_node->setUnionMode(SelectUnionMode::EXCEPT_DISTINCT);
|
||||
union_mode = SelectUnionMode::EXCEPT_DISTINCT;
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION type is not initialized");
|
||||
|
||||
union_node->setUnionModes(SelectUnionModes(select_lists.size() - 1, union_node->getUnionMode()));
|
||||
auto union_node = std::make_shared<UnionNode>(union_mode);
|
||||
union_node->setIsSubquery(is_subquery);
|
||||
union_node->setIsCTE(!cte_name.empty());
|
||||
union_node->setCTEName(cte_name);
|
||||
union_node->setOriginalAST(select_intersect_except_query);
|
||||
|
||||
size_t select_lists_size = select_lists.size();
|
||||
@ -677,14 +674,10 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
|
||||
|
||||
if (table_expression_modifiers)
|
||||
{
|
||||
if (auto * query_node = node->as<QueryNode>())
|
||||
query_node->setTableExpressionModifiers(*table_expression_modifiers);
|
||||
else if (auto * union_node = node->as<UnionNode>())
|
||||
union_node->setTableExpressionModifiers(*table_expression_modifiers);
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Unexpected table expression subquery node. Expected union or query. Actual {}",
|
||||
node->formatASTForErrorMessage());
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"Table expression modifiers {} are not supported for subquery {}",
|
||||
table_expression_modifiers->formatForErrorMessage(),
|
||||
node->formatASTForErrorMessage());
|
||||
}
|
||||
|
||||
table_expressions.push_back(std::move(node));
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -39,4 +40,27 @@ void TableExpressionModifiers::updateTreeHash(SipHash & hash_state) const
|
||||
}
|
||||
}
|
||||
|
||||
String TableExpressionModifiers::formatForErrorMessage() const
|
||||
{
|
||||
WriteBufferFromOwnString buffer;
|
||||
if (has_final)
|
||||
buffer << "FINAL";
|
||||
|
||||
if (sample_size_ratio)
|
||||
{
|
||||
if (has_final)
|
||||
buffer << ' ';
|
||||
buffer << "SAMPLE " << ASTSampleRatio::toString(*sample_size_ratio);
|
||||
}
|
||||
|
||||
if (sample_offset_ratio)
|
||||
{
|
||||
if (has_final || sample_size_ratio)
|
||||
buffer << ' ';
|
||||
buffer << "OFFSET " << ASTSampleRatio::toString(*sample_offset_ratio);
|
||||
}
|
||||
|
||||
return buffer.str();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -58,6 +58,9 @@ public:
|
||||
/// Update tree hash
|
||||
void updateTreeHash(SipHash & hash_state) const;
|
||||
|
||||
/// Format for error message
|
||||
String formatForErrorMessage() const;
|
||||
|
||||
private:
|
||||
bool has_final = false;
|
||||
std::optional<Rational> sample_size_ratio;
|
||||
|
@ -30,11 +30,18 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TYPE_MISMATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
UnionNode::UnionNode()
|
||||
UnionNode::UnionNode(SelectUnionMode union_mode_)
|
||||
: IQueryTreeNode(children_size)
|
||||
, union_mode(union_mode_)
|
||||
{
|
||||
if (union_mode == SelectUnionMode::UNION_DEFAULT ||
|
||||
union_mode == SelectUnionMode::EXCEPT_DEFAULT ||
|
||||
union_mode == SelectUnionMode::INTERSECT_DEFAULT)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION mode {} must be normalized", toString(union_mode));
|
||||
|
||||
children[queries_child_index] = std::make_shared<ListNode>();
|
||||
}
|
||||
|
||||
@ -101,28 +108,8 @@ void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
|
||||
buffer << ", constant_value_type: " << constant_value->getType()->getName();
|
||||
}
|
||||
|
||||
if (table_expression_modifiers)
|
||||
{
|
||||
buffer << ", ";
|
||||
table_expression_modifiers->dump(buffer);
|
||||
}
|
||||
|
||||
buffer << ", union_mode: " << toString(union_mode);
|
||||
|
||||
size_t union_modes_size = union_modes.size();
|
||||
buffer << '\n' << std::string(indent + 2, ' ') << "UNION MODES " << union_modes_size << '\n';
|
||||
|
||||
for (size_t i = 0; i < union_modes_size; ++i)
|
||||
{
|
||||
buffer << std::string(indent + 4, ' ');
|
||||
|
||||
auto query_union_mode = union_modes[i];
|
||||
buffer << toString(query_union_mode);
|
||||
|
||||
if (i + 1 != union_modes_size)
|
||||
buffer << '\n';
|
||||
}
|
||||
|
||||
buffer << '\n' << std::string(indent + 2, ' ') << "QUERIES\n";
|
||||
getQueriesNode()->dumpTreeImpl(buffer, format_state, indent + 4);
|
||||
}
|
||||
@ -137,15 +124,8 @@ bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs) const
|
||||
else if (!constant_value && rhs_typed.constant_value)
|
||||
return false;
|
||||
|
||||
if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers)
|
||||
return false;
|
||||
else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers)
|
||||
return false;
|
||||
else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers)
|
||||
return false;
|
||||
|
||||
return is_subquery == rhs_typed.is_subquery && is_cte == rhs_typed.is_cte && cte_name == rhs_typed.cte_name &&
|
||||
union_mode == rhs_typed.union_mode && union_modes == rhs_typed.union_modes;
|
||||
union_mode == rhs_typed.union_mode;
|
||||
}
|
||||
|
||||
void UnionNode::updateTreeHashImpl(HashState & state) const
|
||||
@ -158,10 +138,6 @@ void UnionNode::updateTreeHashImpl(HashState & state) const
|
||||
|
||||
state.update(static_cast<size_t>(union_mode));
|
||||
|
||||
state.update(union_modes.size());
|
||||
for (const auto & query_union_mode : union_modes)
|
||||
state.update(static_cast<size_t>(query_union_mode));
|
||||
|
||||
if (constant_value)
|
||||
{
|
||||
auto constant_dump = applyVisitor(FieldVisitorToString(), constant_value->getValue());
|
||||
@ -172,23 +148,16 @@ void UnionNode::updateTreeHashImpl(HashState & state) const
|
||||
state.update(constant_value_type_name.size());
|
||||
state.update(constant_value_type_name);
|
||||
}
|
||||
|
||||
if (table_expression_modifiers)
|
||||
table_expression_modifiers->updateTreeHash(state);
|
||||
}
|
||||
|
||||
QueryTreeNodePtr UnionNode::cloneImpl() const
|
||||
{
|
||||
auto result_union_node = std::make_shared<UnionNode>();
|
||||
auto result_union_node = std::make_shared<UnionNode>(union_mode);
|
||||
|
||||
result_union_node->is_subquery = is_subquery;
|
||||
result_union_node->is_cte = is_cte;
|
||||
result_union_node->cte_name = cte_name;
|
||||
result_union_node->union_mode = union_mode;
|
||||
result_union_node->union_modes = union_modes;
|
||||
result_union_node->union_modes_set = union_modes_set;
|
||||
result_union_node->constant_value = constant_value;
|
||||
result_union_node->table_expression_modifiers = table_expression_modifiers;
|
||||
|
||||
return result_union_node;
|
||||
}
|
||||
@ -197,14 +166,7 @@ ASTPtr UnionNode::toASTImpl() const
|
||||
{
|
||||
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
|
||||
select_with_union_query->union_mode = union_mode;
|
||||
|
||||
if (union_mode != SelectUnionMode::UNION_DEFAULT &&
|
||||
union_mode != SelectUnionMode::EXCEPT_DEFAULT &&
|
||||
union_mode != SelectUnionMode::INTERSECT_DEFAULT)
|
||||
select_with_union_query->is_normalized = true;
|
||||
|
||||
select_with_union_query->list_of_modes = union_modes;
|
||||
select_with_union_query->set_of_modes = union_modes_set;
|
||||
select_with_union_query->is_normalized = true;
|
||||
select_with_union_query->children.push_back(getQueriesNode()->toAST());
|
||||
select_with_union_query->list_of_selects = select_with_union_query->children.back();
|
||||
|
||||
|
@ -19,6 +19,7 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
/** Union node represents union of queries in query tree.
|
||||
* Union node must be initialized with normalized union mode.
|
||||
*
|
||||
* Example: (SELECT id FROM test_table) UNION ALL (SELECT id FROM test_table_2);
|
||||
* Example: (SELECT id FROM test_table) UNION DISTINCT (SELECT id FROM test_table_2);
|
||||
@ -41,7 +42,8 @@ using UnionNodePtr = std::shared_ptr<UnionNode>;
|
||||
class UnionNode final : public IQueryTreeNode
|
||||
{
|
||||
public:
|
||||
explicit UnionNode();
|
||||
/// Construct union node with normalized union mode
|
||||
explicit UnionNode(SelectUnionMode union_mode_);
|
||||
|
||||
/// Returns true if union node is subquery, false otherwise
|
||||
bool isSubquery() const
|
||||
@ -85,25 +87,6 @@ public:
|
||||
return union_mode;
|
||||
}
|
||||
|
||||
/// Set union mode value
|
||||
void setUnionMode(SelectUnionMode union_mode_value)
|
||||
{
|
||||
union_mode = union_mode_value;
|
||||
}
|
||||
|
||||
/// Get union modes
|
||||
const SelectUnionModes & getUnionModes() const
|
||||
{
|
||||
return union_modes;
|
||||
}
|
||||
|
||||
/// Set union modes value
|
||||
void setUnionModes(const SelectUnionModes & union_modes_value)
|
||||
{
|
||||
union_modes = union_modes_value;
|
||||
union_modes_set = SelectUnionModesSet(union_modes.begin(), union_modes.end());
|
||||
}
|
||||
|
||||
/// Get union node queries
|
||||
const ListNode & getQueries() const
|
||||
{
|
||||
@ -128,24 +111,6 @@ public:
|
||||
return children[queries_child_index];
|
||||
}
|
||||
|
||||
/// Return true if union node has table expression modifiers, false otherwise
|
||||
bool hasTableExpressionModifiers() const
|
||||
{
|
||||
return table_expression_modifiers.has_value();
|
||||
}
|
||||
|
||||
/// Get table expression modifiers
|
||||
const std::optional<TableExpressionModifiers> & getTableExpressionModifiers() const
|
||||
{
|
||||
return table_expression_modifiers;
|
||||
}
|
||||
|
||||
/// Set table expression modifiers
|
||||
void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value)
|
||||
{
|
||||
table_expression_modifiers = std::move(table_expression_modifiers_value);
|
||||
}
|
||||
|
||||
/// Compute union node projection columns
|
||||
NamesAndTypes computeProjectionColumns() const;
|
||||
|
||||
@ -189,10 +154,7 @@ private:
|
||||
bool is_cte = false;
|
||||
std::string cte_name;
|
||||
SelectUnionMode union_mode;
|
||||
SelectUnionModes union_modes;
|
||||
SelectUnionModesSet union_modes_set;
|
||||
ConstantValuePtr constant_value;
|
||||
std::optional<TableExpressionModifiers> table_expression_modifiers;
|
||||
|
||||
static constexpr size_t queries_child_index = 0;
|
||||
static constexpr size_t children_size = queries_child_index + 1;
|
||||
|
@ -98,11 +98,6 @@ static ASTPtr convertIntoTableExpressionAST(const QueryTreeNodePtr & table_expre
|
||||
|
||||
if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)
|
||||
{
|
||||
if (auto * query_node = table_expression_node->as<QueryNode>())
|
||||
table_expression_modifiers = query_node->getTableExpressionModifiers();
|
||||
else if (auto * union_node = table_expression_node->as<UnionNode>())
|
||||
table_expression_modifiers = union_node->getTableExpressionModifiers();
|
||||
|
||||
result_table_expression->subquery = result_table_expression->children.back();
|
||||
}
|
||||
else if (node_type == QueryTreeNodeType::TABLE || node_type == QueryTreeNodeType::IDENTIFIER)
|
||||
|
@ -176,6 +176,9 @@ public:
|
||||
|
||||
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
|
||||
|
||||
void finalize() override { data->finalize(); }
|
||||
bool isFinalized() const override { return data->isFinalized(); }
|
||||
|
||||
bool isCollationSupported() const override { return getData().isCollationSupported(); }
|
||||
|
||||
size_t getNumberOfDimensions() const;
|
||||
|
@ -93,6 +93,8 @@ public:
|
||||
bool structureEquals(const IColumn & rhs) const override;
|
||||
double getRatioOfDefaultRows(double sample_ratio) const override;
|
||||
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
|
||||
void finalize() override { nested->finalize(); }
|
||||
bool isFinalized() const override { return nested->isFinalized(); }
|
||||
|
||||
const ColumnArray & getNestedColumn() const { return assert_cast<const ColumnArray &>(*nested); }
|
||||
ColumnArray & getNestedColumn() { return assert_cast<ColumnArray &>(*nested); }
|
||||
|
@ -732,8 +732,8 @@ void ColumnObject::get(size_t n, Field & res) const
|
||||
{
|
||||
assert(n < size());
|
||||
res = Object();
|
||||
|
||||
auto & object = res.get<Object &>();
|
||||
|
||||
for (const auto & entry : subcolumns)
|
||||
{
|
||||
auto it = object.try_emplace(entry->path.getPath()).first;
|
||||
@ -744,7 +744,6 @@ void ColumnObject::get(size_t n, Field & res) const
|
||||
void ColumnObject::insertFrom(const IColumn & src, size_t n)
|
||||
{
|
||||
insert(src[n]);
|
||||
finalize();
|
||||
}
|
||||
|
||||
void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
||||
@ -792,9 +791,8 @@ MutableColumnPtr ColumnObject::applyForSubcolumns(Func && func) const
|
||||
{
|
||||
if (!isFinalized())
|
||||
{
|
||||
auto finalized = IColumn::mutate(getPtr());
|
||||
auto finalized = cloneFinalized();
|
||||
auto & finalized_object = assert_cast<ColumnObject &>(*finalized);
|
||||
finalized_object.finalize();
|
||||
return finalized_object.applyForSubcolumns(std::forward<Func>(func));
|
||||
}
|
||||
|
||||
|
@ -198,10 +198,6 @@ public:
|
||||
Subcolumns & getSubcolumns() { return subcolumns; }
|
||||
PathsInData getKeys() const;
|
||||
|
||||
/// Finalizes all subcolumns.
|
||||
void finalize();
|
||||
bool isFinalized() const;
|
||||
|
||||
/// Part of interface
|
||||
|
||||
const char * getFamilyName() const override { return "Object"; }
|
||||
@ -219,12 +215,17 @@ public:
|
||||
void popBack(size_t length) override;
|
||||
Field operator[](size_t n) const override;
|
||||
void get(size_t n, Field & res) const override;
|
||||
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
||||
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||
MutableColumnPtr cloneResized(size_t new_size) const override;
|
||||
|
||||
/// Finalizes all subcolumns.
|
||||
void finalize() override;
|
||||
bool isFinalized() const override;
|
||||
|
||||
/// Order of rows in ColumnObject is undefined.
|
||||
void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
|
||||
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
@ -264,9 +265,7 @@ private:
|
||||
template <typename Func>
|
||||
MutableColumnPtr applyForSubcolumns(Func && func) const;
|
||||
|
||||
/// For given subcolumn return subcolumn from the same Nested type.
|
||||
/// It's used to get shared sized of Nested to insert correct default values.
|
||||
const Subcolumns::Node * getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -570,4 +570,15 @@ void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, siz
|
||||
return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
|
||||
}
|
||||
|
||||
void ColumnTuple::finalize()
|
||||
{
|
||||
for (auto & column : columns)
|
||||
column->finalize();
|
||||
}
|
||||
|
||||
bool ColumnTuple::isFinalized() const
|
||||
{
|
||||
return std::all_of(columns.begin(), columns.end(), [](const auto & column) { return column->isFinalized(); });
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -103,6 +103,8 @@ public:
|
||||
ColumnPtr compress() const override;
|
||||
double getRatioOfDefaultRows(double sample_ratio) const override;
|
||||
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
|
||||
void finalize() override;
|
||||
bool isFinalized() const override;
|
||||
|
||||
size_t tupleSize() const { return columns.size(); }
|
||||
|
||||
|
@ -453,6 +453,16 @@ public:
|
||||
return getPtr();
|
||||
}
|
||||
|
||||
/// Some columns may require finalization before using of other operations.
|
||||
virtual void finalize() {}
|
||||
virtual bool isFinalized() const { return true; }
|
||||
|
||||
MutablePtr cloneFinalized() const
|
||||
{
|
||||
auto finalized = IColumn::mutate(getPtr());
|
||||
finalized->finalize();
|
||||
return finalized;
|
||||
}
|
||||
|
||||
[[nodiscard]] static MutablePtr mutate(Ptr ptr)
|
||||
{
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <base/types.h>
|
||||
#include <base/defines.h>
|
||||
#include "ElementTypes.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -25,6 +26,7 @@ struct DummyJSONParser
|
||||
{
|
||||
public:
|
||||
Element() = default;
|
||||
static ElementType type() { return ElementType::NULL_VALUE; }
|
||||
static bool isInt64() { return false; }
|
||||
static bool isUInt64() { return false; }
|
||||
static bool isDouble() { return false; }
|
||||
|
17
src/Common/JSONParsers/ElementTypes.h
Normal file
17
src/Common/JSONParsers/ElementTypes.h
Normal file
@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
namespace DB
|
||||
{
|
||||
// Enum values match simdjson's for fast conversion
|
||||
enum class ElementType
|
||||
{
|
||||
ARRAY = '[',
|
||||
OBJECT = '{',
|
||||
INT64 = 'l',
|
||||
UINT64 = 'u',
|
||||
DOUBLE = 'd',
|
||||
STRING = '"',
|
||||
BOOL = 't',
|
||||
NULL_VALUE = 'n'
|
||||
};
|
||||
}
|
@ -6,7 +6,7 @@
|
||||
# include <base/types.h>
|
||||
# include <base/defines.h>
|
||||
# include <rapidjson/document.h>
|
||||
|
||||
# include "ElementTypes.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -26,6 +26,20 @@ struct RapidJSONParser
|
||||
ALWAYS_INLINE Element() = default;
|
||||
ALWAYS_INLINE Element(const rapidjson::Value & value_) : ptr(&value_) {} /// NOLINT
|
||||
|
||||
ALWAYS_INLINE ElementType type() const
|
||||
{
|
||||
switch (ptr->GetType())
|
||||
{
|
||||
case rapidjson::kNumberType: return ptr->IsDouble() ? ElementType::DOUBLE : (ptr->IsUint64() ? ElementType::UINT64 : ElementType::INT64);
|
||||
case rapidjson::kStringType: return ElementType::STRING;
|
||||
case rapidjson::kArrayType: return ElementType::ARRAY;
|
||||
case rapidjson::kObjectType: return ElementType::OBJECT;
|
||||
case rapidjson::kTrueType: return ElementType::BOOL;
|
||||
case rapidjson::kFalseType: return ElementType::BOOL;
|
||||
case rapidjson::kNullType: return ElementType::NULL_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool isInt64() const { return ptr->IsInt64(); }
|
||||
ALWAYS_INLINE bool isUInt64() const { return ptr->IsUint64(); }
|
||||
ALWAYS_INLINE bool isDouble() const { return ptr->IsDouble(); }
|
||||
|
@ -7,7 +7,7 @@
|
||||
# include <Common/Exception.h>
|
||||
# include <base/defines.h>
|
||||
# include <simdjson.h>
|
||||
|
||||
# include "ElementTypes.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -31,6 +31,21 @@ struct SimdJSONParser
|
||||
ALWAYS_INLINE Element() {} /// NOLINT
|
||||
ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {} /// NOLINT
|
||||
|
||||
ALWAYS_INLINE ElementType type() const
|
||||
{
|
||||
switch (element.type())
|
||||
{
|
||||
case simdjson::dom::element_type::INT64: return ElementType::INT64;
|
||||
case simdjson::dom::element_type::UINT64: return ElementType::UINT64;
|
||||
case simdjson::dom::element_type::DOUBLE: return ElementType::DOUBLE;
|
||||
case simdjson::dom::element_type::STRING: return ElementType::STRING;
|
||||
case simdjson::dom::element_type::ARRAY: return ElementType::ARRAY;
|
||||
case simdjson::dom::element_type::OBJECT: return ElementType::OBJECT;
|
||||
case simdjson::dom::element_type::BOOL: return ElementType::BOOL;
|
||||
case simdjson::dom::element_type::NULL_VALUE: return ElementType::NULL_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool isInt64() const { return element.type() == simdjson::dom::element_type::INT64; }
|
||||
ALWAYS_INLINE bool isUInt64() const { return element.type() == simdjson::dom::element_type::UINT64; }
|
||||
ALWAYS_INLINE bool isDouble() const { return element.type() == simdjson::dom::element_type::DOUBLE; }
|
||||
|
@ -48,6 +48,7 @@ public:
|
||||
bool textCanContainOnlyValidUTF8() const override { return nested->textCanContainOnlyValidUTF8(); }
|
||||
bool isComparable() const override { return nested->isComparable(); }
|
||||
bool canBeComparedWithCollation() const override { return nested->canBeComparedWithCollation(); }
|
||||
bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); }
|
||||
|
||||
bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override
|
||||
{
|
||||
|
@ -22,6 +22,27 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
DataTypeMap::DataTypeMap(const DataTypePtr & nested_)
|
||||
: nested(nested_)
|
||||
{
|
||||
const auto * type_array = typeid_cast<const DataTypeArray *>(nested.get());
|
||||
if (!type_array)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
|
||||
|
||||
const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_array->getNestedType().get());
|
||||
if (!type_tuple)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
|
||||
|
||||
if (type_tuple->getElements().size() != 2)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
|
||||
|
||||
key_type = type_tuple->getElement(0);
|
||||
value_type = type_tuple->getElement(1);
|
||||
assertKeyType();
|
||||
}
|
||||
|
||||
DataTypeMap::DataTypeMap(const DataTypes & elems_)
|
||||
{
|
||||
|
@ -23,6 +23,7 @@ private:
|
||||
public:
|
||||
static constexpr bool is_parametric = true;
|
||||
|
||||
explicit DataTypeMap(const DataTypePtr & nested_);
|
||||
explicit DataTypeMap(const DataTypes & elems);
|
||||
DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & value_type_);
|
||||
|
||||
@ -40,6 +41,7 @@ public:
|
||||
bool isComparable() const override { return key_type->isComparable() && value_type->isComparable(); }
|
||||
bool isParametric() const override { return true; }
|
||||
bool haveSubtypes() const override { return true; }
|
||||
bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); }
|
||||
|
||||
const DataTypePtr & getKeyType() const { return key_type; }
|
||||
const DataTypePtr & getValueType() const { return value_type; }
|
||||
|
@ -36,6 +36,7 @@ public:
|
||||
bool haveSubtypes() const override { return false; }
|
||||
bool equals(const IDataType & rhs) const override;
|
||||
bool isParametric() const override { return true; }
|
||||
bool hasDynamicSubcolumns() const override { return true; }
|
||||
|
||||
SerializationPtr doGetDefaultSerialization() const override;
|
||||
|
||||
|
@ -247,6 +247,11 @@ bool DataTypeTuple::haveMaximumSizeOfValue() const
|
||||
return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); });
|
||||
}
|
||||
|
||||
bool DataTypeTuple::hasDynamicSubcolumns() const
|
||||
{
|
||||
return std::any_of(elems.begin(), elems.end(), [](auto && elem) { return elem->hasDynamicSubcolumns(); });
|
||||
}
|
||||
|
||||
bool DataTypeTuple::isComparable() const
|
||||
{
|
||||
return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->isComparable(); });
|
||||
|
@ -50,6 +50,7 @@ public:
|
||||
bool isComparable() const override;
|
||||
bool textCanContainOnlyValidUTF8() const override;
|
||||
bool haveMaximumSizeOfValue() const override;
|
||||
bool hasDynamicSubcolumns() const override;
|
||||
size_t getMaximumSizeOfValueInMemory() const override;
|
||||
size_t getSizeOfValueInMemory() const override;
|
||||
|
||||
|
@ -291,6 +291,9 @@ public:
|
||||
/// Strings, Numbers, Date, DateTime, Nullable
|
||||
virtual bool canBeInsideLowCardinality() const { return false; }
|
||||
|
||||
/// Object, Array(Object), Tuple(..., Object, ...)
|
||||
virtual bool hasDynamicSubcolumns() const { return false; }
|
||||
|
||||
/// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column.
|
||||
static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint);
|
||||
|
||||
|
@ -1,17 +1,19 @@
|
||||
#include <Storages/StorageSnapshot.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <DataTypes/DataTypeObject.h>
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Storages/StorageSnapshot.h>
|
||||
#include <Columns/ColumnObject.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
@ -105,10 +107,11 @@ Array createEmptyArrayField(size_t num_dimensions)
|
||||
DataTypePtr getDataTypeByColumn(const IColumn & column)
|
||||
{
|
||||
auto idx = column.getDataType();
|
||||
if (WhichDataType(idx).isSimple())
|
||||
WhichDataType which(idx);
|
||||
if (which.isSimple())
|
||||
return DataTypeFactory::instance().get(String(magic_enum::enum_name(idx)));
|
||||
|
||||
if (WhichDataType(idx).isNothing())
|
||||
if (which.isNothing())
|
||||
return std::make_shared<DataTypeNothing>();
|
||||
|
||||
if (const auto * column_array = checkAndGetColumn<ColumnArray>(&column))
|
||||
@ -132,41 +135,124 @@ static auto extractVector(const std::vector<Tuple> & vec)
|
||||
return res;
|
||||
}
|
||||
|
||||
void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_storage_columns)
|
||||
static DataTypePtr recreateTupleWithElements(const DataTypeTuple & type_tuple, const DataTypes & elements)
|
||||
{
|
||||
std::unordered_map<String, DataTypePtr> storage_columns_map;
|
||||
for (const auto & [name, type] : extended_storage_columns)
|
||||
storage_columns_map[name] = type;
|
||||
|
||||
for (auto & column : block)
|
||||
{
|
||||
if (!isObject(column.type))
|
||||
continue;
|
||||
|
||||
const auto & column_object = assert_cast<const ColumnObject &>(*column.column);
|
||||
if (!column_object.isFinalized())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot convert to tuple column '{}' from type {}. Column should be finalized first",
|
||||
column.name, column.type->getName());
|
||||
|
||||
std::tie(column.column, column.type) = unflattenObjectToTuple(column_object);
|
||||
|
||||
auto it = storage_columns_map.find(column.name);
|
||||
if (it == storage_columns_map.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", column.name);
|
||||
|
||||
/// Check that constructed Tuple type and type in storage are compatible.
|
||||
getLeastCommonTypeForObject({column.type, it->second}, true);
|
||||
}
|
||||
return type_tuple.haveExplicitNames()
|
||||
? std::make_shared<DataTypeTuple>(elements, type_tuple.getElementNames())
|
||||
: std::make_shared<DataTypeTuple>(elements);
|
||||
}
|
||||
|
||||
void deduceTypesOfObjectColumns(const StorageSnapshotPtr & storage_snapshot, Block & block)
|
||||
static std::pair<ColumnPtr, DataTypePtr> convertObjectColumnToTuple(
|
||||
const ColumnObject & column_object, const DataTypeObject & type_object)
|
||||
{
|
||||
if (!storage_snapshot->object_columns.empty())
|
||||
if (!column_object.isFinalized())
|
||||
{
|
||||
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects();
|
||||
auto storage_columns = storage_snapshot->getColumns(options);
|
||||
convertObjectsToTuples(block, storage_columns);
|
||||
auto finalized = column_object.cloneFinalized();
|
||||
const auto & finalized_object = assert_cast<const ColumnObject &>(*finalized);
|
||||
return convertObjectColumnToTuple(finalized_object, type_object);
|
||||
}
|
||||
|
||||
const auto & subcolumns = column_object.getSubcolumns();
|
||||
|
||||
PathsInData tuple_paths;
|
||||
DataTypes tuple_types;
|
||||
Columns tuple_columns;
|
||||
|
||||
for (const auto & entry : subcolumns)
|
||||
{
|
||||
tuple_paths.emplace_back(entry->path);
|
||||
tuple_types.emplace_back(entry->data.getLeastCommonType());
|
||||
tuple_columns.emplace_back(entry->data.getFinalizedColumnPtr());
|
||||
}
|
||||
|
||||
return unflattenTuple(tuple_paths, tuple_types, tuple_columns);
|
||||
}
|
||||
|
||||
static std::pair<ColumnPtr, DataTypePtr> recursivlyConvertDynamicColumnToTuple(
|
||||
const ColumnPtr & column, const DataTypePtr & type)
|
||||
{
|
||||
if (!type->hasDynamicSubcolumns())
|
||||
return {column, type};
|
||||
|
||||
if (const auto * type_object = typeid_cast<const DataTypeObject *>(type.get()))
|
||||
{
|
||||
const auto & column_object = assert_cast<const ColumnObject &>(*column);
|
||||
return convertObjectColumnToTuple(column_object, *type_object);
|
||||
}
|
||||
|
||||
if (const auto * type_array = typeid_cast<const DataTypeArray *>(type.get()))
|
||||
{
|
||||
const auto & column_array = assert_cast<const ColumnArray &>(*column);
|
||||
auto [new_column, new_type] = recursivlyConvertDynamicColumnToTuple(
|
||||
column_array.getDataPtr(), type_array->getNestedType());
|
||||
|
||||
return
|
||||
{
|
||||
ColumnArray::create(new_column, column_array.getOffsetsPtr()),
|
||||
std::make_shared<DataTypeArray>(std::move(new_type)),
|
||||
};
|
||||
}
|
||||
|
||||
if (const auto * type_map = typeid_cast<const DataTypeMap *>(type.get()))
|
||||
{
|
||||
const auto & column_map = assert_cast<const ColumnMap &>(*column);
|
||||
auto [new_column, new_type] = recursivlyConvertDynamicColumnToTuple(
|
||||
column_map.getNestedColumnPtr(), type_map->getNestedType());
|
||||
|
||||
return
|
||||
{
|
||||
ColumnMap::create(new_column),
|
||||
std::make_shared<DataTypeMap>(std::move(new_type)),
|
||||
};
|
||||
}
|
||||
|
||||
if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
|
||||
{
|
||||
const auto & tuple_columns = assert_cast<const ColumnTuple &>(*column).getColumns();
|
||||
const auto & tuple_types = type_tuple->getElements();
|
||||
|
||||
assert(tuple_columns.size() == tuple_types.size());
|
||||
const size_t tuple_size = tuple_types.size();
|
||||
|
||||
Columns new_tuple_columns(tuple_size);
|
||||
DataTypes new_tuple_types(tuple_size);
|
||||
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
std::tie(new_tuple_columns[i], new_tuple_types[i])
|
||||
= recursivlyConvertDynamicColumnToTuple(tuple_columns[i], tuple_types[i]);
|
||||
}
|
||||
|
||||
return
|
||||
{
|
||||
ColumnTuple::create(new_tuple_columns),
|
||||
recreateTupleWithElements(*type_tuple, new_tuple_types)
|
||||
};
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type->getName());
|
||||
}
|
||||
|
||||
void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & storage_snapshot)
|
||||
{
|
||||
for (auto & column : block)
|
||||
{
|
||||
if (!column.type->hasDynamicSubcolumns())
|
||||
continue;
|
||||
|
||||
std::tie(column.column, column.type)
|
||||
= recursivlyConvertDynamicColumnToTuple(column.column, column.type);
|
||||
|
||||
GetColumnsOptions options(GetColumnsOptions::AllPhysical);
|
||||
auto storage_column = storage_snapshot->tryGetColumn(options, column.name);
|
||||
if (!storage_column)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", column.name);
|
||||
|
||||
auto storage_column_concrete = storage_snapshot->getColumn(options.withExtendedObjects(), column.name);
|
||||
|
||||
/// Check that constructed Tuple type and type in storage are compatible.
|
||||
getLeastCommonTypeForDynamicColumns(
|
||||
storage_column->type, {column.type, storage_column_concrete.type}, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -217,24 +303,8 @@ void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
|
||||
}
|
||||
}
|
||||
|
||||
DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambiguos_paths)
|
||||
static DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambiguos_paths)
|
||||
{
|
||||
if (types.empty())
|
||||
return nullptr;
|
||||
|
||||
bool all_equal = true;
|
||||
for (size_t i = 1; i < types.size(); ++i)
|
||||
{
|
||||
if (!types[i]->equals(*types[0]))
|
||||
{
|
||||
all_equal = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (all_equal)
|
||||
return types[0];
|
||||
|
||||
/// Types of subcolumns by path from all tuples.
|
||||
std::unordered_map<PathInData, DataTypes, PathInData::Hash> subcolumns_types;
|
||||
|
||||
@ -287,19 +357,139 @@ DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambi
|
||||
return unflattenTuple(tuple_paths, tuple_types);
|
||||
}
|
||||
|
||||
NameSet getNamesOfObjectColumns(const NamesAndTypesList & columns_list)
|
||||
{
|
||||
NameSet res;
|
||||
for (const auto & [name, type] : columns_list)
|
||||
if (isObject(type))
|
||||
res.insert(name);
|
||||
static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl(
|
||||
const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths);
|
||||
|
||||
return res;
|
||||
template<typename Type>
|
||||
static DataTypePtr getLeastCommonTypeForColumnWithNestedType(
|
||||
const Type & type, const DataTypes & concrete_types, bool check_ambiguos_paths)
|
||||
{
|
||||
DataTypes nested_types;
|
||||
nested_types.reserve(concrete_types.size());
|
||||
|
||||
for (const auto & concrete_type : concrete_types)
|
||||
{
|
||||
const auto * type_with_nested_conctete = typeid_cast<const Type *>(concrete_type.get());
|
||||
if (!type_with_nested_conctete)
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected {} type, got {}", demangle(typeid(Type).name()), concrete_type->getName());
|
||||
|
||||
nested_types.push_back(type_with_nested_conctete->getNestedType());
|
||||
}
|
||||
|
||||
return std::make_shared<Type>(
|
||||
getLeastCommonTypeForDynamicColumnsImpl(
|
||||
type.getNestedType(), nested_types, check_ambiguos_paths));
|
||||
}
|
||||
|
||||
bool hasObjectColumns(const ColumnsDescription & columns)
|
||||
static DataTypePtr getLeastCommonTypeForTuple(
|
||||
const DataTypeTuple & type, const DataTypes & concrete_types, bool check_ambiguos_paths)
|
||||
{
|
||||
return std::any_of(columns.begin(), columns.end(), [](const auto & column) { return isObject(column.type); });
|
||||
const auto & element_types = type.getElements();
|
||||
DataTypes new_element_types(element_types.size());
|
||||
|
||||
for (size_t i = 0; i < element_types.size(); ++i)
|
||||
{
|
||||
DataTypes concrete_element_types;
|
||||
concrete_element_types.reserve(concrete_types.size());
|
||||
|
||||
for (const auto & type_concrete : concrete_types)
|
||||
{
|
||||
const auto * type_tuple_conctete = typeid_cast<const DataTypeTuple *>(type_concrete.get());
|
||||
if (!type_tuple_conctete)
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected Tuple type, got {}", type_concrete->getName());
|
||||
|
||||
concrete_element_types.push_back(type_tuple_conctete->getElement(i));
|
||||
}
|
||||
|
||||
new_element_types[i] = getLeastCommonTypeForDynamicColumnsImpl(
|
||||
element_types[i], concrete_element_types, check_ambiguos_paths);
|
||||
}
|
||||
|
||||
return recreateTupleWithElements(type, new_element_types);
|
||||
}
|
||||
|
||||
static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl(
|
||||
const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths)
|
||||
{
|
||||
if (!type_in_storage->hasDynamicSubcolumns())
|
||||
return type_in_storage;
|
||||
|
||||
if (isObject(type_in_storage))
|
||||
return getLeastCommonTypeForObject(concrete_types, check_ambiguos_paths);
|
||||
|
||||
if (const auto * type_array = typeid_cast<const DataTypeArray *>(type_in_storage.get()))
|
||||
return getLeastCommonTypeForColumnWithNestedType(*type_array, concrete_types, check_ambiguos_paths);
|
||||
|
||||
if (const auto * type_map = typeid_cast<const DataTypeMap *>(type_in_storage.get()))
|
||||
return getLeastCommonTypeForColumnWithNestedType(*type_map, concrete_types, check_ambiguos_paths);
|
||||
|
||||
if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_in_storage.get()))
|
||||
return getLeastCommonTypeForTuple(*type_tuple, concrete_types, check_ambiguos_paths);
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
|
||||
}
|
||||
|
||||
DataTypePtr getLeastCommonTypeForDynamicColumns(
|
||||
const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths)
|
||||
{
|
||||
if (concrete_types.empty())
|
||||
return nullptr;
|
||||
|
||||
bool all_equal = true;
|
||||
for (size_t i = 1; i < concrete_types.size(); ++i)
|
||||
{
|
||||
if (!concrete_types[i]->equals(*concrete_types[0]))
|
||||
{
|
||||
all_equal = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (all_equal)
|
||||
return concrete_types[0];
|
||||
|
||||
return getLeastCommonTypeForDynamicColumnsImpl(type_in_storage, concrete_types, check_ambiguos_paths);
|
||||
}
|
||||
|
||||
DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage)
|
||||
{
|
||||
if (!type_in_storage->hasDynamicSubcolumns())
|
||||
return type_in_storage;
|
||||
|
||||
if (isObject(type_in_storage))
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
DataTypes{std::make_shared<DataTypeUInt8>()}, Names{ColumnObject::COLUMN_NAME_DUMMY});
|
||||
|
||||
if (const auto * type_array = typeid_cast<const DataTypeArray *>(type_in_storage.get()))
|
||||
return std::make_shared<DataTypeArray>(
|
||||
createConcreteEmptyDynamicColumn(type_array->getNestedType()));
|
||||
|
||||
if (const auto * type_map = typeid_cast<const DataTypeMap *>(type_in_storage.get()))
|
||||
return std::make_shared<DataTypeMap>(
|
||||
createConcreteEmptyDynamicColumn(type_map->getNestedType()));
|
||||
|
||||
if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_in_storage.get()))
|
||||
{
|
||||
const auto & elements = type_tuple->getElements();
|
||||
DataTypes new_elements;
|
||||
new_elements.reserve(elements.size());
|
||||
|
||||
for (const auto & element : elements)
|
||||
new_elements.push_back(createConcreteEmptyDynamicColumn(element));
|
||||
|
||||
return recreateTupleWithElements(*type_tuple, new_elements);
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
|
||||
}
|
||||
|
||||
bool hasDynamicSubcolumns(const ColumnsDescription & columns)
|
||||
{
|
||||
return std::any_of(columns.begin(), columns.end(),
|
||||
[](const auto & column)
|
||||
{
|
||||
return column.type->hasDynamicSubcolumns();
|
||||
});
|
||||
}
|
||||
|
||||
void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescription & object_columns, bool with_subcolumns)
|
||||
@ -320,16 +510,20 @@ void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescript
|
||||
columns_list.splice(columns_list.end(), std::move(subcolumns_list));
|
||||
}
|
||||
|
||||
void updateObjectColumns(ColumnsDescription & object_columns, const NamesAndTypesList & new_columns)
|
||||
void updateObjectColumns(
|
||||
ColumnsDescription & object_columns,
|
||||
const ColumnsDescription & storage_columns,
|
||||
const NamesAndTypesList & new_columns)
|
||||
{
|
||||
for (const auto & new_column : new_columns)
|
||||
{
|
||||
auto object_column = object_columns.tryGetColumn(GetColumnsOptions::All, new_column.name);
|
||||
if (object_column && !object_column->type->equals(*new_column.type))
|
||||
{
|
||||
auto storage_column = storage_columns.getColumn(GetColumnsOptions::All, new_column.name);
|
||||
object_columns.modify(new_column.name, [&](auto & column)
|
||||
{
|
||||
column.type = getLeastCommonTypeForObject({object_column->type, new_column.type});
|
||||
column.type = getLeastCommonTypeForDynamicColumns(storage_column.type, {object_column->type, new_column.type});
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -745,13 +939,6 @@ void replaceMissedSubcolumnsByConstants(
|
||||
addConstantToWithClause(query, name, type);
|
||||
}
|
||||
|
||||
void finalizeObjectColumns(const MutableColumns & columns)
|
||||
{
|
||||
for (const auto & column : columns)
|
||||
if (auto * column_object = typeid_cast<ColumnObject *>(column.get()))
|
||||
column_object->finalize();
|
||||
}
|
||||
|
||||
Field FieldVisitorReplaceScalars::operator()(const Array & x) const
|
||||
{
|
||||
if (num_dimensions_to_keep == 0)
|
||||
@ -768,11 +955,13 @@ size_t FieldVisitorToNumberOfDimensions::operator()(const Array & x)
|
||||
{
|
||||
const size_t size = x.size();
|
||||
size_t dimensions = 0;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t element_dimensions = applyVisitor(*this, x[i]);
|
||||
if (i > 0 && element_dimensions != dimensions)
|
||||
need_fold_dimension = true;
|
||||
|
||||
dimensions = std::max(dimensions, element_dimensions);
|
||||
}
|
||||
|
||||
@ -783,12 +972,13 @@ Field FieldVisitorFoldDimension::operator()(const Array & x) const
|
||||
{
|
||||
if (num_dimensions_to_fold == 0)
|
||||
return x;
|
||||
|
||||
const size_t size = x.size();
|
||||
Array res(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
res[i] = applyVisitor(FieldVisitorFoldDimension(num_dimensions_to_fold - 1), x[i]);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -39,27 +39,31 @@ Array createEmptyArrayField(size_t num_dimensions);
|
||||
DataTypePtr getDataTypeByColumn(const IColumn & column);
|
||||
|
||||
/// Converts Object types and columns to Tuples in @columns_list and @block
|
||||
/// and checks that types are consistent with types in @extended_storage_columns.
|
||||
void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_storage_columns);
|
||||
void deduceTypesOfObjectColumns(const StorageSnapshotPtr & storage_snapshot, Block & block);
|
||||
/// and checks that types are consistent with types in @storage_snapshot.
|
||||
void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & storage_snapshot);
|
||||
|
||||
/// Checks that each path is not the prefix of any other path.
|
||||
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths);
|
||||
|
||||
/// Receives several Tuple types and deduces the least common type among them.
|
||||
DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambiguos_paths = false);
|
||||
DataTypePtr getLeastCommonTypeForDynamicColumns(
|
||||
const DataTypePtr & type_in_storage, const DataTypes & types, bool check_ambiguos_paths = false);
|
||||
|
||||
DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage);
|
||||
|
||||
/// Converts types of object columns to tuples in @columns_list
|
||||
/// according to @object_columns and adds all tuple's subcolumns if needed.
|
||||
void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescription & object_columns, bool with_subcolumns);
|
||||
|
||||
NameSet getNamesOfObjectColumns(const NamesAndTypesList & columns_list);
|
||||
bool hasObjectColumns(const ColumnsDescription & columns);
|
||||
void finalizeObjectColumns(const MutableColumns & columns);
|
||||
/// Checks whether @columns contain any column with dynamic subcolumns.
|
||||
bool hasDynamicSubcolumns(const ColumnsDescription & columns);
|
||||
|
||||
/// Updates types of objects in @object_columns inplace
|
||||
/// according to types in new_columns.
|
||||
void updateObjectColumns(ColumnsDescription & object_columns, const NamesAndTypesList & new_columns);
|
||||
void updateObjectColumns(
|
||||
ColumnsDescription & object_columns,
|
||||
const ColumnsDescription & storage_columns,
|
||||
const NamesAndTypesList & new_columns);
|
||||
|
||||
using DataTypeTuplePtr = std::shared_ptr<DataTypeTuple>;
|
||||
|
||||
@ -142,13 +146,15 @@ public:
|
||||
{
|
||||
if (num_dimensions_to_fold == 0)
|
||||
return x;
|
||||
Array res(1,x);
|
||||
|
||||
Array res(1, x);
|
||||
for (size_t i = 1; i < num_dimensions_to_fold; ++i)
|
||||
{
|
||||
Array new_res;
|
||||
new_res.push_back(std::move(res));
|
||||
res = std::move(new_res);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -163,7 +169,7 @@ private:
|
||||
/// columns-like objects from entry to which Iterator points.
|
||||
/// columns-like object should have fields "name" and "type".
|
||||
template <typename Iterator, typename EntryColumnsGetter>
|
||||
ColumnsDescription getObjectColumns(
|
||||
ColumnsDescription getConcreteObjectColumns(
|
||||
Iterator begin, Iterator end,
|
||||
const ColumnsDescription & storage_columns,
|
||||
EntryColumnsGetter && entry_columns_getter)
|
||||
@ -176,14 +182,8 @@ ColumnsDescription getObjectColumns(
|
||||
/// dummy column will be removed.
|
||||
for (const auto & column : storage_columns)
|
||||
{
|
||||
if (isObject(column.type))
|
||||
{
|
||||
auto tuple_type = std::make_shared<DataTypeTuple>(
|
||||
DataTypes{std::make_shared<DataTypeUInt8>()},
|
||||
Names{ColumnObject::COLUMN_NAME_DUMMY});
|
||||
|
||||
types_in_entries[column.name].push_back(std::move(tuple_type));
|
||||
}
|
||||
if (column.type->hasDynamicSubcolumns())
|
||||
types_in_entries[column.name].push_back(createConcreteEmptyDynamicColumn(column.type));
|
||||
}
|
||||
|
||||
for (auto it = begin; it != end; ++it)
|
||||
@ -192,14 +192,17 @@ ColumnsDescription getObjectColumns(
|
||||
for (const auto & column : entry_columns)
|
||||
{
|
||||
auto storage_column = storage_columns.tryGetPhysical(column.name);
|
||||
if (storage_column && isObject(storage_column->type))
|
||||
if (storage_column && storage_column->type->hasDynamicSubcolumns())
|
||||
types_in_entries[column.name].push_back(column.type);
|
||||
}
|
||||
}
|
||||
|
||||
ColumnsDescription res;
|
||||
for (const auto & [name, types] : types_in_entries)
|
||||
res.add({name, getLeastCommonTypeForObject(types)});
|
||||
{
|
||||
auto storage_column = storage_columns.getPhysical(name);
|
||||
res.add({name, getLeastCommonTypeForDynamicColumns(storage_column.type, types)});
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -249,7 +249,9 @@ public:
|
||||
};
|
||||
|
||||
/// Call before serializeBinaryBulkWithMultipleStreams chain to write something before first mark.
|
||||
/// Column may be used only to retrieve the structure.
|
||||
virtual void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & /*column*/,
|
||||
SerializeBinaryBulkSettings & /*settings*/,
|
||||
SerializeBinaryBulkStatePtr & /*state*/) const {}
|
||||
|
||||
|
@ -246,11 +246,13 @@ void SerializationArray::enumerateStreams(
|
||||
}
|
||||
|
||||
void SerializationArray::serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
settings.path.push_back(Substream::ArrayElements);
|
||||
nested->serializeBinaryBulkStatePrefix(settings, state);
|
||||
const auto & column_array = assert_cast<const ColumnArray &>(column);
|
||||
nested->serializeBinaryBulkStatePrefix(column_array.getData(), settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
|
@ -41,6 +41,7 @@ public:
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
|
@ -221,6 +221,7 @@ struct DeserializeStateLowCardinality : public ISerialization::DeserializeBinary
|
||||
};
|
||||
|
||||
void SerializationLowCardinality::serializeBinaryBulkStatePrefix(
|
||||
const IColumn & /*column*/,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
|
@ -23,6 +23,7 @@ public:
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
|
@ -270,10 +270,11 @@ void SerializationMap::enumerateStreams(
|
||||
}
|
||||
|
||||
void SerializationMap::serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
nested->serializeBinaryBulkStatePrefix(settings, state);
|
||||
nested->serializeBinaryBulkStatePrefix(extractNestedColumn(column), settings, state);
|
||||
}
|
||||
|
||||
void SerializationMap::serializeBinaryBulkStateSuffix(
|
||||
|
@ -37,6 +37,7 @@ public:
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
|
@ -17,11 +17,12 @@ void SerializationNamed::enumerateStreams(
|
||||
}
|
||||
|
||||
void SerializationNamed::serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
addToPath(settings.path);
|
||||
nested_serialization->serializeBinaryBulkStatePrefix(settings, state);
|
||||
nested_serialization->serializeBinaryBulkStatePrefix(column, settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
|
@ -31,6 +31,7 @@ public:
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
|
@ -70,11 +70,13 @@ void SerializationNullable::enumerateStreams(
|
||||
}
|
||||
|
||||
void SerializationNullable::serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
settings.path.push_back(Substream::NullableElements);
|
||||
nested->serializeBinaryBulkStatePrefix(settings, state);
|
||||
const auto & column_nullable = assert_cast<const ColumnNullable &>(column);
|
||||
nested->serializeBinaryBulkStatePrefix(column_nullable.getNestedColumn(), settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
|
@ -19,6 +19,7 @@ public:
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
|
@ -13,8 +13,6 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Functions/FunctionsConversion.h>
|
||||
|
||||
#include <Common/FieldVisitorToString.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/VarInt.h>
|
||||
@ -30,6 +28,7 @@ namespace ErrorCodes
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int INCORRECT_DATA;
|
||||
extern const int CANNOT_READ_ALL_DATA;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
@ -141,7 +140,6 @@ void SerializationObject<Parser>::checkSerializationIsSupported(const TSettings
|
||||
template <typename Parser>
|
||||
struct SerializationObject<Parser>::SerializeStateObject : public ISerialization::SerializeBinaryBulkState
|
||||
{
|
||||
bool is_first = true;
|
||||
DataTypePtr nested_type;
|
||||
SerializationPtr nested_serialization;
|
||||
SerializeBinaryBulkStatePtr nested_state;
|
||||
@ -158,6 +156,7 @@ struct SerializationObject<Parser>::DeserializeStateObject : public ISerializati
|
||||
|
||||
template <typename Parser>
|
||||
void SerializationObject<Parser>::serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -166,15 +165,34 @@ void SerializationObject<Parser>::serializeBinaryBulkStatePrefix(
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
||||
"DataTypeObject doesn't support serialization with non-trivial state");
|
||||
|
||||
const auto & column_object = assert_cast<const ColumnObject &>(column);
|
||||
if (!column_object.isFinalized())
|
||||
{
|
||||
auto finalized = column_object.cloneFinalized();
|
||||
serializeBinaryBulkStatePrefix(*finalized, settings, state);
|
||||
return;
|
||||
}
|
||||
|
||||
settings.path.push_back(Substream::ObjectStructure);
|
||||
auto * stream = settings.getter(settings.path);
|
||||
settings.path.pop_back();
|
||||
|
||||
if (!stream)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for kind of binary serialization");
|
||||
|
||||
auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object);
|
||||
|
||||
writeIntBinary(static_cast<UInt8>(BinarySerializationKind::TUPLE), *stream);
|
||||
state = std::make_shared<SerializeStateObject>();
|
||||
writeStringBinary(tuple_type->getName(), *stream);
|
||||
|
||||
auto state_object = std::make_shared<SerializeStateObject>();
|
||||
state_object->nested_type = tuple_type;
|
||||
state_object->nested_serialization = tuple_type->getDefaultSerialization();
|
||||
|
||||
settings.path.back() = Substream::ObjectData;
|
||||
state_object->nested_serialization->serializeBinaryBulkStatePrefix(*tuple_column, settings, state_object->nested_state);
|
||||
|
||||
state = std::move(state_object);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
template <typename Parser>
|
||||
@ -261,33 +279,14 @@ void SerializationObject<Parser>::serializeBinaryBulkWithMultipleStreams(
|
||||
|
||||
if (!column_object.isFinalized())
|
||||
{
|
||||
auto finalized_object = column_object.clone();
|
||||
assert_cast<ColumnObject &>(*finalized_object).finalize();
|
||||
serializeBinaryBulkWithMultipleStreams(*finalized_object, offset, limit, settings, state);
|
||||
auto finalized = column_object.cloneFinalized();
|
||||
serializeBinaryBulkWithMultipleStreams(*finalized, offset, limit, settings, state);
|
||||
return;
|
||||
}
|
||||
|
||||
auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object);
|
||||
|
||||
if (state_object->is_first)
|
||||
{
|
||||
/// Actually it's a part of serializeBinaryBulkStatePrefix,
|
||||
/// but it cannot be done there, because we have to know the
|
||||
/// structure of column.
|
||||
|
||||
settings.path.push_back(Substream::ObjectStructure);
|
||||
if (auto * stream = settings.getter(settings.path))
|
||||
writeStringBinary(tuple_type->getName(), *stream);
|
||||
|
||||
state_object->nested_type = tuple_type;
|
||||
state_object->nested_serialization = tuple_type->getDefaultSerialization();
|
||||
state_object->is_first = false;
|
||||
|
||||
settings.path.back() = Substream::ObjectData;
|
||||
state_object->nested_serialization->serializeBinaryBulkStatePrefix(settings, state_object->nested_state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
else if (!state_object->nested_type->equals(*tuple_type))
|
||||
if (!state_object->nested_type->equals(*tuple_type))
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Types of internal column of Object mismatched. Expected: {}, Got: {}",
|
||||
@ -411,18 +410,63 @@ void SerializationObject<Parser>::serializeTextImpl(const IColumn & column, size
|
||||
writeChar('{', ostr);
|
||||
for (auto it = subcolumns.begin(); it != subcolumns.end(); ++it)
|
||||
{
|
||||
const auto & entry = *it;
|
||||
if (it != subcolumns.begin())
|
||||
writeCString(",", ostr);
|
||||
|
||||
writeDoubleQuoted((*it)->path.getPath(), ostr);
|
||||
writeDoubleQuoted(entry->path.getPath(), ostr);
|
||||
writeChar(':', ostr);
|
||||
|
||||
auto serialization = (*it)->data.getLeastCommonType()->getDefaultSerialization();
|
||||
serialization->serializeTextJSON((*it)->data.getFinalizedColumn(), row_num, ostr, settings);
|
||||
serializeTextFromSubcolumn(entry->data, row_num, ostr, settings);
|
||||
}
|
||||
writeChar('}', ostr);
|
||||
}
|
||||
|
||||
template <typename Parser>
|
||||
void SerializationObject<Parser>::serializeTextFromSubcolumn(
|
||||
const ColumnObject::Subcolumn & subcolumn, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const auto & least_common_type = subcolumn.getLeastCommonType();
|
||||
|
||||
if (subcolumn.isFinalized())
|
||||
{
|
||||
const auto & finalized_column = subcolumn.getFinalizedColumn();
|
||||
auto info = least_common_type->getSerializationInfo(finalized_column);
|
||||
auto serialization = least_common_type->getSerialization(*info);
|
||||
serialization->serializeTextJSON(finalized_column, row_num, ostr, settings);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t ind = row_num;
|
||||
if (ind < subcolumn.getNumberOfDefaultsInPrefix())
|
||||
{
|
||||
/// Suboptimal, but it should happen rarely.
|
||||
auto tmp_column = subcolumn.getLeastCommonType()->createColumn();
|
||||
tmp_column->insertDefault();
|
||||
|
||||
auto info = least_common_type->getSerializationInfo(*tmp_column);
|
||||
auto serialization = least_common_type->getSerialization(*info);
|
||||
serialization->serializeTextJSON(*tmp_column, 0, ostr, settings);
|
||||
return;
|
||||
}
|
||||
|
||||
ind -= subcolumn.getNumberOfDefaultsInPrefix();
|
||||
for (const auto & part : subcolumn.getData())
|
||||
{
|
||||
if (ind < part->size())
|
||||
{
|
||||
auto part_type = getDataTypeByColumn(*part);
|
||||
auto info = part_type->getSerializationInfo(*part);
|
||||
auto serialization = part_type->getSerialization(*info);
|
||||
serialization->serializeTextJSON(*part, ind, ostr, settings);
|
||||
return;
|
||||
}
|
||||
|
||||
ind -= part->size();
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index ({}) for text serialization is out of range", row_num);
|
||||
}
|
||||
|
||||
template <typename Parser>
|
||||
void SerializationObject<Parser>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
|
@ -8,7 +8,7 @@ namespace DB
|
||||
{
|
||||
|
||||
/** Serialization for data type Object.
|
||||
* Supported only test serialization/deserialization.
|
||||
* Supported only text serialization/deserialization.
|
||||
* and binary bulk serialization/deserialization without position independent
|
||||
* encoding, i.e. serialization/deserialization into Native format.
|
||||
*/
|
||||
@ -31,6 +31,7 @@ public:
|
||||
*/
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
@ -104,6 +105,7 @@ private:
|
||||
void deserializeTextImpl(IColumn & column, Reader && reader) const;
|
||||
|
||||
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;
|
||||
void serializeTextFromSubcolumn(const ColumnObject::Subcolumn & subcolumn, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;
|
||||
|
||||
/// Pool of parser objects to make SerializationObject thread safe.
|
||||
mutable SimpleObjectPool<Parser> parsers_pool;
|
||||
|
@ -178,11 +178,16 @@ void SerializationSparse::enumerateStreams(
|
||||
}
|
||||
|
||||
void SerializationSparse::serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
settings.path.push_back(Substream::SparseElements);
|
||||
nested->serializeBinaryBulkStatePrefix(settings, state);
|
||||
if (const auto * column_sparse = typeid_cast<const ColumnSparse *>(&column))
|
||||
nested->serializeBinaryBulkStatePrefix(column_sparse->getValuesColumn(), settings, state);
|
||||
else
|
||||
nested->serializeBinaryBulkStatePrefix(column, settings, state);
|
||||
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
|
@ -33,6 +33,7 @@ public:
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
|
@ -314,6 +314,7 @@ struct DeserializeBinaryBulkStateTuple : public ISerialization::DeserializeBinar
|
||||
|
||||
|
||||
void SerializationTuple::serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -321,7 +322,7 @@ void SerializationTuple::serializeBinaryBulkStatePrefix(
|
||||
tuple_state->states.resize(elems.size());
|
||||
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
elems[i]->serializeBinaryBulkStatePrefix(settings, tuple_state->states[i]);
|
||||
elems[i]->serializeBinaryBulkStatePrefix(extractElementColumn(column, i), settings, tuple_state->states[i]);
|
||||
|
||||
state = std::move(tuple_state);
|
||||
}
|
||||
|
@ -39,6 +39,7 @@ public:
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
|
@ -13,10 +13,11 @@ void SerializationWrapper::enumerateStreams(
|
||||
}
|
||||
|
||||
void SerializationWrapper::serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
nested_serialization->serializeBinaryBulkStatePrefix(settings, state);
|
||||
nested_serialization->serializeBinaryBulkStatePrefix(column, settings, state);
|
||||
}
|
||||
|
||||
void SerializationWrapper::serializeBinaryBulkStateSuffix(
|
||||
|
@ -26,6 +26,7 @@ public:
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
|
@ -31,7 +31,7 @@ TEST(SerializationObject, FromString)
|
||||
settings.getter = [&out](const auto &) { return &out; };
|
||||
|
||||
writeIntBinary(static_cast<UInt8>(1), out);
|
||||
serialization->serializeBinaryBulkStatePrefix(settings, state);
|
||||
serialization->serializeBinaryBulkStatePrefix(*column_string, settings, state);
|
||||
serialization->serializeBinaryBulkWithMultipleStreams(*column_string, 0, column_string->size(), settings, state);
|
||||
serialization->serializeBinaryBulkStateSuffix(settings, state);
|
||||
}
|
||||
|
@ -859,7 +859,7 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo
|
||||
result += fmt::format(
|
||||
", use_best_effort_in_schema_inference={}, bool_true_representation={}, bool_false_representation={},"
|
||||
" null_representation={}, delimiter={}, tuple_delimiter={}",
|
||||
settings.tsv.use_best_effort_in_schema_inference,
|
||||
settings.csv.use_best_effort_in_schema_inference,
|
||||
settings.bool_true_representation,
|
||||
settings.bool_false_representation,
|
||||
settings.csv.null_representation,
|
||||
|
@ -58,7 +58,7 @@ static void writeData(const ISerialization & serialization, const ColumnPtr & co
|
||||
settings.low_cardinality_max_dictionary_size = 0; //-V1048
|
||||
|
||||
ISerialization::SerializeBinaryBulkStatePtr state;
|
||||
serialization.serializeBinaryBulkStatePrefix(settings, state);
|
||||
serialization.serializeBinaryBulkStatePrefix(*full_column, settings, state);
|
||||
serialization.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
|
||||
serialization.serializeBinaryBulkStateSuffix(settings, state);
|
||||
}
|
||||
|
@ -3360,9 +3360,8 @@ private:
|
||||
{
|
||||
return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count)
|
||||
{
|
||||
auto res = ConvertImplGenericFromString<ColumnString>::execute(arguments, result_type, nullable_source, input_rows_count);
|
||||
auto & res_object = assert_cast<ColumnObject &>(res->assumeMutableRef());
|
||||
res_object.finalize();
|
||||
auto res = ConvertImplGenericFromString<ColumnString>::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable();
|
||||
res->finalize();
|
||||
return res;
|
||||
};
|
||||
}
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
@ -40,6 +39,7 @@
|
||||
#include <Common/JSONParsers/RapidJSONParser.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
#include <IO/readDecimalText.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
||||
@ -191,7 +191,7 @@ private:
|
||||
for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments))
|
||||
{
|
||||
const auto & column = columns[i];
|
||||
if (!isString(column.type) && !isInteger(column.type))
|
||||
if (!isString(column.type) && !isNativeInteger(column.type))
|
||||
throw Exception{"The argument " + std::to_string(i + 1) + " of function " + String(function_name)
|
||||
+ " should be a string specifying key or an integer specifying index, illegal type: " + column.type->getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
@ -623,24 +623,32 @@ public:
|
||||
static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view)
|
||||
{
|
||||
UInt8 type;
|
||||
if (element.isInt64())
|
||||
type = 'i';
|
||||
else if (element.isUInt64())
|
||||
type = 'u';
|
||||
else if (element.isDouble())
|
||||
type = 'd';
|
||||
else if (element.isBool())
|
||||
type = 'b';
|
||||
else if (element.isString())
|
||||
type = '"';
|
||||
else if (element.isArray())
|
||||
type = '[';
|
||||
else if (element.isObject())
|
||||
type = '{';
|
||||
else if (element.isNull())
|
||||
type = 0;
|
||||
else
|
||||
return false;
|
||||
switch (element.type())
|
||||
{
|
||||
case ElementType::INT64:
|
||||
type = 'i';
|
||||
break;
|
||||
case ElementType::UINT64:
|
||||
type = 'u';
|
||||
break;
|
||||
case ElementType::DOUBLE:
|
||||
type = 'd';
|
||||
break;
|
||||
case ElementType::STRING:
|
||||
type = '"';
|
||||
break;
|
||||
case ElementType::ARRAY:
|
||||
type = '[';
|
||||
break;
|
||||
case ElementType::OBJECT:
|
||||
type = '{';
|
||||
break;
|
||||
case ElementType::NULL_VALUE:
|
||||
type = 0;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
ColumnVector<Int8> & col_vec = assert_cast<ColumnVector<Int8> &>(dest);
|
||||
col_vec.insertValue(type);
|
||||
@ -666,34 +674,51 @@ public:
|
||||
{
|
||||
NumberType value;
|
||||
|
||||
if (element.isInt64())
|
||||
switch (element.type())
|
||||
{
|
||||
if (!accurate::convertNumeric(element.getInt64(), value))
|
||||
case ElementType::DOUBLE:
|
||||
if constexpr (std::is_floating_point_v<NumberType>)
|
||||
{
|
||||
/// We permit inaccurate conversion of double to float.
|
||||
/// Example: double 0.1 from JSON is not representable in float.
|
||||
/// But it will be more convenient for user to perform conversion.
|
||||
value = static_cast<NumberType>(element.getDouble());
|
||||
}
|
||||
else if (!accurate::convertNumeric<Float64, NumberType, false>(element.getDouble(), value))
|
||||
return false;
|
||||
break;
|
||||
case ElementType::UINT64:
|
||||
if (!accurate::convertNumeric<UInt64, NumberType, false>(element.getUInt64(), value))
|
||||
return false;
|
||||
break;
|
||||
case ElementType::INT64:
|
||||
if (!accurate::convertNumeric<Int64, NumberType, false>(element.getInt64(), value))
|
||||
return false;
|
||||
break;
|
||||
case ElementType::BOOL:
|
||||
if constexpr (is_integer<NumberType> && convert_bool_to_integer)
|
||||
{
|
||||
value = static_cast<NumberType>(element.getBool());
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
else if (element.isUInt64())
|
||||
{
|
||||
if (!accurate::convertNumeric(element.getUInt64(), value))
|
||||
return false;
|
||||
}
|
||||
else if (element.isDouble())
|
||||
{
|
||||
if constexpr (std::is_floating_point_v<NumberType>)
|
||||
{
|
||||
/// We permit inaccurate conversion of double to float.
|
||||
/// Example: double 0.1 from JSON is not representable in float.
|
||||
/// But it will be more convenient for user to perform conversion.
|
||||
value = static_cast<NumberType>(element.getDouble());
|
||||
case ElementType::STRING: {
|
||||
auto rb = ReadBufferFromMemory{element.getString()};
|
||||
if constexpr (std::is_floating_point_v<NumberType>)
|
||||
{
|
||||
if (!tryReadFloatText(value, rb) || !rb.eof())
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!tryReadIntText(value, rb) || !rb.eof())
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if (!accurate::convertNumeric(element.getDouble(), value))
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
else if (element.isBool() && is_integer<NumberType> && convert_bool_to_integer)
|
||||
{
|
||||
value = static_cast<NumberType>(element.getBool());
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
auto & col_vec = assert_cast<ColumnVector<NumberType> &>(dest);
|
||||
col_vec.insertValue(value);
|
||||
@ -719,9 +744,25 @@ using JSONExtractInt64Impl = JSONExtractNumericImpl<JSONParser, Int64>;
|
||||
template <typename JSONParser>
|
||||
using JSONExtractUInt64Impl = JSONExtractNumericImpl<JSONParser, UInt64>;
|
||||
template <typename JSONParser>
|
||||
using JSONExtractInt128Impl = JSONExtractNumericImpl<JSONParser, Int128>;
|
||||
template <typename JSONParser>
|
||||
using JSONExtractUInt128Impl = JSONExtractNumericImpl<JSONParser, UInt128>;
|
||||
template <typename JSONParser>
|
||||
using JSONExtractInt256Impl = JSONExtractNumericImpl<JSONParser, Int256>;
|
||||
template <typename JSONParser>
|
||||
using JSONExtractUInt256Impl = JSONExtractNumericImpl<JSONParser, UInt256>;
|
||||
template <typename JSONParser>
|
||||
using JSONExtractFloat32Impl = JSONExtractNumericImpl<JSONParser, Float32>;
|
||||
template <typename JSONParser>
|
||||
using JSONExtractFloat64Impl = JSONExtractNumericImpl<JSONParser, Float64>;
|
||||
template <typename JSONParser>
|
||||
using JSONExtractDecimal32Impl = JSONExtractNumericImpl<JSONParser, Decimal32>;
|
||||
template <typename JSONParser>
|
||||
using JSONExtractDecimal64Impl = JSONExtractNumericImpl<JSONParser, Decimal64>;
|
||||
template <typename JSONParser>
|
||||
using JSONExtractDecimal128Impl = JSONExtractNumericImpl<JSONParser, Decimal128>;
|
||||
template <typename JSONParser>
|
||||
using JSONExtractDecimal256Impl = JSONExtractNumericImpl<JSONParser, Decimal256>;
|
||||
|
||||
|
||||
template <typename JSONParser>
|
||||
@ -739,11 +780,22 @@ public:
|
||||
|
||||
static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view)
|
||||
{
|
||||
if (!element.isBool())
|
||||
return false;
|
||||
bool value;
|
||||
switch (element.type())
|
||||
{
|
||||
case ElementType::BOOL:
|
||||
value = element.getBool();
|
||||
break;
|
||||
case ElementType::INT64:
|
||||
case ElementType::UINT64:
|
||||
value = element.getUInt64() != 0;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
auto & col_vec = assert_cast<ColumnVector<UInt8> &>(dest);
|
||||
col_vec.insertValue(static_cast<UInt8>(element.getBool()));
|
||||
col_vec.insertValue(static_cast<UInt8>(value));
|
||||
return true;
|
||||
}
|
||||
};
|
||||
@ -845,12 +897,35 @@ struct JSONExtractTree
|
||||
explicit DecimalNode(DataTypePtr data_type_) : data_type(data_type_) {}
|
||||
bool insertResultToColumn(IColumn & dest, const Element & element) override
|
||||
{
|
||||
if (!element.isDouble())
|
||||
return false;
|
||||
|
||||
const auto * type = assert_cast<const DataTypeDecimal<DecimalType> *>(data_type.get());
|
||||
auto result = convertToDecimal<DataTypeNumber<Float64>, DataTypeDecimal<DecimalType>>(element.getDouble(), type->getScale());
|
||||
assert_cast<ColumnDecimal<DecimalType> &>(dest).insert(result);
|
||||
|
||||
DecimalType value{};
|
||||
|
||||
switch (element.type())
|
||||
{
|
||||
case ElementType::DOUBLE:
|
||||
value = convertToDecimal<DataTypeNumber<Float64>, DataTypeDecimal<DecimalType>>(
|
||||
element.getDouble(), type->getScale());
|
||||
break;
|
||||
case ElementType::UINT64:
|
||||
value = convertToDecimal<DataTypeNumber<UInt64>, DataTypeDecimal<DecimalType>>(
|
||||
element.getUInt64(), type->getScale());
|
||||
break;
|
||||
case ElementType::INT64:
|
||||
value = convertToDecimal<DataTypeNumber<Int64>, DataTypeDecimal<DecimalType>>(
|
||||
element.getInt64(), type->getScale());
|
||||
break;
|
||||
case ElementType::STRING: {
|
||||
auto rb = ReadBufferFromMemory{element.getString()};
|
||||
if (!SerializationDecimal<DecimalType>::tryReadText(value, rb, DecimalUtils::max_precision<DecimalType>, type->getScale()))
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
assert_cast<ColumnDecimal<DecimalType> &>(dest).insert(value);
|
||||
return true;
|
||||
}
|
||||
private:
|
||||
@ -1088,10 +1163,14 @@ struct JSONExtractTree
|
||||
case TypeIndex::UInt16: return std::make_unique<NumericNode<UInt16>>();
|
||||
case TypeIndex::UInt32: return std::make_unique<NumericNode<UInt32>>();
|
||||
case TypeIndex::UInt64: return std::make_unique<NumericNode<UInt64>>();
|
||||
case TypeIndex::UInt128: return std::make_unique<NumericNode<UInt128>>();
|
||||
case TypeIndex::UInt256: return std::make_unique<NumericNode<UInt256>>();
|
||||
case TypeIndex::Int8: return std::make_unique<NumericNode<Int8>>();
|
||||
case TypeIndex::Int16: return std::make_unique<NumericNode<Int16>>();
|
||||
case TypeIndex::Int32: return std::make_unique<NumericNode<Int32>>();
|
||||
case TypeIndex::Int64: return std::make_unique<NumericNode<Int64>>();
|
||||
case TypeIndex::Int128: return std::make_unique<NumericNode<Int128>>();
|
||||
case TypeIndex::Int256: return std::make_unique<NumericNode<Int256>>();
|
||||
case TypeIndex::Float32: return std::make_unique<NumericNode<Float32>>();
|
||||
case TypeIndex::Float64: return std::make_unique<NumericNode<Float64>>();
|
||||
case TypeIndex::String: return std::make_unique<StringNode>();
|
||||
|
@ -104,7 +104,7 @@ struct LowerUpperUTF8Impl
|
||||
|
||||
/** Converts a single code point starting at `src` to desired case, storing result starting at `dst`.
|
||||
* `src` and `dst` are incremented by corresponding sequence lengths. */
|
||||
static void toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst)
|
||||
static bool toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool partial)
|
||||
{
|
||||
if (src[0] <= ascii_upper_bound)
|
||||
{
|
||||
@ -136,6 +136,11 @@ struct LowerUpperUTF8Impl
|
||||
static const Poco::UTF8Encoding utf8;
|
||||
|
||||
size_t src_sequence_length = UTF8::seqLength(*src);
|
||||
/// In case partial buffer was passed (due to SSE optimization)
|
||||
/// we cannot convert it with current src_end, but we may have more
|
||||
/// bytes to convert and eventually got correct symbol.
|
||||
if (partial && src_sequence_length > static_cast<size_t>(src_end-src))
|
||||
return false;
|
||||
|
||||
auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
|
||||
if (src_code_point)
|
||||
@ -152,7 +157,7 @@ struct LowerUpperUTF8Impl
|
||||
{
|
||||
src += dst_sequence_length;
|
||||
dst += dst_sequence_length;
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -161,6 +166,8 @@ struct LowerUpperUTF8Impl
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -229,16 +236,13 @@ private:
|
||||
const UInt8 * expected_end = std::min(src + bytes_sse, row_end);
|
||||
|
||||
while (src < expected_end)
|
||||
toCase(src, expected_end, dst);
|
||||
|
||||
/// adjust src_end_sse by pushing it forward or backward
|
||||
const auto diff = src - expected_end;
|
||||
if (diff != 0)
|
||||
{
|
||||
if (src_end_sse + diff < src_end)
|
||||
src_end_sse += diff;
|
||||
else
|
||||
src_end_sse -= bytes_sse - diff;
|
||||
if (!toCase(src, expected_end, dst, /* partial= */ true))
|
||||
{
|
||||
/// Fallback to handling byte by byte.
|
||||
src_end_sse = src;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -255,7 +259,7 @@ private:
|
||||
chassert(row_end >= src);
|
||||
|
||||
while (src < row_end)
|
||||
toCase(src, row_end, dst);
|
||||
toCase(src, row_end, dst, /* partial= */ false);
|
||||
++offset_it;
|
||||
}
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ public:
|
||||
|
||||
auto serialization = elem.type->getDefaultSerialization();
|
||||
|
||||
serialization->serializeBinaryBulkStatePrefix(settings, state);
|
||||
serialization->serializeBinaryBulkStatePrefix(*full_column, settings, state);
|
||||
serialization->serializeBinaryBulkWithMultipleStreams(*full_column,
|
||||
0 /** offset */, 0 /** limit */,
|
||||
settings, state);
|
||||
|
@ -16,6 +16,8 @@ public:
|
||||
requires (sizeof(CharT) == 1)
|
||||
ReadBufferFromMemory(const CharT * buf, size_t size)
|
||||
: SeekableReadBuffer(const_cast<char *>(reinterpret_cast<const char *>(buf)), size, 0) {}
|
||||
explicit ReadBufferFromMemory(const std::string_view&& str)
|
||||
: SeekableReadBuffer(const_cast<char *>(str.data()), str.size(), 0) {}
|
||||
|
||||
off_t seek(off_t off, int whence) override;
|
||||
|
||||
|
@ -147,23 +147,32 @@ inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exp
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale, bool digits_only = false)
|
||||
template <typename T, typename ReturnType=void>
|
||||
inline ReturnType readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale, bool digits_only = false)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
uint32_t digits = precision;
|
||||
int32_t exponent;
|
||||
readDigits<true>(buf, x, digits, exponent, digits_only);
|
||||
auto ok = readDigits<throw_exception>(buf, x, digits, exponent, digits_only);
|
||||
|
||||
if (!throw_exception && !ok)
|
||||
return ReturnType(false);
|
||||
|
||||
if (static_cast<int32_t>(digits) + exponent > static_cast<int32_t>(precision - scale))
|
||||
{
|
||||
static constexpr const char * pattern =
|
||||
"Decimal value is too big: {} digits were read: {}e{}."
|
||||
" Expected to read decimal with scale {} and precision {}";
|
||||
if constexpr (throw_exception)
|
||||
{
|
||||
static constexpr const char * pattern = "Decimal value is too big: {} digits were read: {}e{}."
|
||||
" Expected to read decimal with scale {} and precision {}";
|
||||
|
||||
if constexpr (is_big_int_v<typename T::NativeType>)
|
||||
throw Exception(fmt::format(pattern, digits, x.value, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
if constexpr (is_big_int_v<typename T::NativeType>)
|
||||
throw Exception(fmt::format(pattern, digits, x.value, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
else
|
||||
throw Exception(fmt::format(pattern, digits, x, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
}
|
||||
else
|
||||
throw Exception(fmt::format(pattern, digits, x, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
if (static_cast<int32_t>(scale) + exponent < 0)
|
||||
@ -175,7 +184,7 @@ inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_
|
||||
/// Too big negative exponent
|
||||
x.value = 0;
|
||||
scale = 0;
|
||||
return;
|
||||
return ReturnType(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -184,26 +193,18 @@ inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_
|
||||
assert(divisor > 0); /// This is for Clang Static Analyzer. It is not smart enough to infer it automatically.
|
||||
x.value /= divisor;
|
||||
scale = 0;
|
||||
return;
|
||||
return ReturnType(true);
|
||||
}
|
||||
}
|
||||
|
||||
scale += exponent;
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool tryReadDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale)
|
||||
{
|
||||
uint32_t digits = precision;
|
||||
int32_t exponent;
|
||||
|
||||
if (!readDigits<false>(buf, x, digits, exponent, true) ||
|
||||
static_cast<int32_t>(digits) + exponent > static_cast<int32_t>(precision - scale) ||
|
||||
static_cast<int32_t>(scale) + exponent < 0)
|
||||
return false;
|
||||
|
||||
scale += exponent;
|
||||
return true;
|
||||
return readDecimalText<T, bool>(buf, x, precision, scale, true);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -829,7 +829,7 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat
|
||||
{
|
||||
for (const auto & [name, type] : properties.columns.getAllPhysical())
|
||||
{
|
||||
if (isObject(type))
|
||||
if (type->hasDynamicSubcolumns())
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Cannot create table with column '{}' which type is '{}' "
|
||||
@ -1398,7 +1398,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
|
||||
/// we can safely destroy the object without a call to "shutdown", because there is guarantee
|
||||
/// that no background threads/similar resources remain after exception from "startup".
|
||||
|
||||
if (!res->supportsDynamicSubcolumns() && hasObjectColumns(res->getInMemoryMetadataPtr()->getColumns()))
|
||||
if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()))
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Cannot create table with column of type Object, "
|
||||
|
@ -387,6 +387,9 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
|
||||
}
|
||||
else if (isObject(type))
|
||||
{
|
||||
if (src.getType() == Field::Types::Object)
|
||||
return src; /// Already in needed type.
|
||||
|
||||
const auto * from_type_tuple = typeid_cast<const DataTypeTuple *>(from_type_hint);
|
||||
if (src.getType() == Field::Types::Tuple && from_type_tuple && from_type_tuple->haveExplicitNames())
|
||||
{
|
||||
|
@ -232,7 +232,9 @@ Chunk IRowInputFormat::generate()
|
||||
return {};
|
||||
}
|
||||
|
||||
finalizeObjectColumns(columns);
|
||||
for (const auto & column : columns)
|
||||
column->finalize();
|
||||
|
||||
Chunk chunk(std::move(columns), num_rows);
|
||||
return chunk;
|
||||
}
|
||||
|
@ -101,7 +101,9 @@ Chunk ValuesBlockInputFormat::generate()
|
||||
return {};
|
||||
}
|
||||
|
||||
finalizeObjectColumns(columns);
|
||||
for (const auto & column : columns)
|
||||
column->finalize();
|
||||
|
||||
size_t rows_in_block = columns[0]->size();
|
||||
return Chunk{std::move(columns), rows_in_block};
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ void optimizePrimaryKeyCondition(QueryPlan::Node & root)
|
||||
size_t next_child = 0;
|
||||
};
|
||||
|
||||
std::deque<Frame> stack;
|
||||
std::vector<Frame> stack;
|
||||
stack.push_back({.node = &root});
|
||||
|
||||
while (!stack.empty())
|
||||
@ -27,29 +27,29 @@ void optimizePrimaryKeyCondition(QueryPlan::Node & root)
|
||||
/// Traverse all children first.
|
||||
if (frame.next_child < frame.node->children.size())
|
||||
{
|
||||
stack.push_back({.node = frame.node->children[frame.next_child]});
|
||||
|
||||
auto next_frame = Frame{.node = frame.node->children[frame.next_child]};
|
||||
++frame.next_child;
|
||||
stack.push_back(next_frame);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto add_filter = [&](auto & storage)
|
||||
auto add_read_from_storage_filter = [&](auto & storage)
|
||||
{
|
||||
for (auto iter=stack.rbegin() + 1; iter!=stack.rend(); ++iter)
|
||||
for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter)
|
||||
{
|
||||
if (auto * filter_step = typeid_cast<FilterStep *>(iter->node->step.get()))
|
||||
storage.addFilter(filter_step->getExpression(), filter_step->getFilterColumnName());
|
||||
else if (typeid_cast<ExpressionStep *>(iter->node->step.get()))
|
||||
;
|
||||
continue;
|
||||
else
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if (auto * read_from_merge_tree = typeid_cast<ReadFromMergeTree *>(frame.node->step.get()))
|
||||
add_filter(*read_from_merge_tree);
|
||||
add_read_from_storage_filter(*read_from_merge_tree);
|
||||
else if (auto * read_from_merge = typeid_cast<ReadFromMerge *>(frame.node->step.get()))
|
||||
add_filter(*read_from_merge);
|
||||
add_read_from_storage_filter(*read_from_merge);
|
||||
|
||||
stack.pop_back();
|
||||
}
|
||||
|
@ -925,8 +925,15 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
for (const auto & node : added_filter_nodes.nodes)
|
||||
nodes.nodes.push_back(node);
|
||||
|
||||
key_condition.emplace(
|
||||
std::move(nodes), query_info.syntax_analyzer_result, query_info.prepared_sets, context, primary_key_columns, primary_key.expression);
|
||||
NameSet array_join_name_set;
|
||||
if (query_info.syntax_analyzer_result)
|
||||
array_join_name_set = query_info.syntax_analyzer_result->getArrayJoinSourceNameSet();
|
||||
|
||||
key_condition.emplace(std::move(nodes),
|
||||
context,
|
||||
primary_key_columns,
|
||||
primary_key.expression,
|
||||
array_join_name_set);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <Interpreters/getHeaderForProcessingStage.h>
|
||||
#include <Interpreters/SelectQueryOptions.h>
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
#include <QueryPipeline/narrowPipe.h>
|
||||
#include <QueryPipeline/Pipe.h>
|
||||
#include <QueryPipeline/RemoteQueryExecutor.h>
|
||||
@ -25,6 +24,8 @@
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
#include <Storages/HDFS/HDFSCommon.h>
|
||||
#include <Storages/StorageDictionary.h>
|
||||
#include <Storages/addColumnsStructureToQueryWithClusterEngine.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
@ -56,6 +57,7 @@ StorageHDFSCluster::StorageHDFSCluster(
|
||||
{
|
||||
auto columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_);
|
||||
storage_metadata.setColumns(columns);
|
||||
add_columns_structure_to_query = true;
|
||||
}
|
||||
else
|
||||
storage_metadata.setColumns(columns_);
|
||||
@ -92,6 +94,11 @@ Pipe StorageHDFSCluster::read(
|
||||
|
||||
const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
|
||||
|
||||
auto query_to_send = query_info.original_query->clone();
|
||||
if (add_columns_structure_to_query)
|
||||
addColumnsStructureToQueryWithClusterEngine(
|
||||
query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 3, getName());
|
||||
|
||||
for (const auto & replicas : cluster->getShardsAddresses())
|
||||
{
|
||||
/// There will be only one replica, because we consider each replica as a shard
|
||||
@ -110,7 +117,7 @@ Pipe StorageHDFSCluster::read(
|
||||
/// So, task_identifier is passed as constructor argument. It is more obvious.
|
||||
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
|
||||
connection,
|
||||
queryToString(query_info.original_query),
|
||||
queryToString(query_to_send),
|
||||
header,
|
||||
context,
|
||||
/*throttler=*/nullptr,
|
||||
|
@ -44,6 +44,7 @@ private:
|
||||
String uri;
|
||||
String format_name;
|
||||
String compression_method;
|
||||
bool add_columns_structure_to_query = false;
|
||||
};
|
||||
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Storages/KeyDescription.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <stack>
|
||||
@ -194,289 +195,6 @@ static String firstStringThatIsGreaterThanAllStringsWithPrefix(const String & pr
|
||||
return res;
|
||||
}
|
||||
|
||||
static void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & out, bool legacy = false)
|
||||
{
|
||||
switch (node.type)
|
||||
{
|
||||
case (ActionsDAG::ActionType::INPUT):
|
||||
writeString(node.result_name, out);
|
||||
break;
|
||||
case (ActionsDAG::ActionType::COLUMN):
|
||||
{
|
||||
/// If it was created from ASTLiteral, then result_name can be an alias.
|
||||
/// We need to convert value back to string here.
|
||||
if (const auto * column_const = typeid_cast<const ColumnConst *>(node.column.get()))
|
||||
writeString(applyVisitor(FieldVisitorToString(), column_const->getField()), out);
|
||||
/// It may be possible that column is ColumnSet
|
||||
else
|
||||
writeString(node.result_name, out);
|
||||
break;
|
||||
}
|
||||
case (ActionsDAG::ActionType::ALIAS):
|
||||
appendColumnNameWithoutAlias(*node.children.front(), out, legacy);
|
||||
break;
|
||||
case (ActionsDAG::ActionType::ARRAY_JOIN):
|
||||
writeCString("arrayJoin(", out);
|
||||
appendColumnNameWithoutAlias(*node.children.front(), out, legacy);
|
||||
writeChar(')', out);
|
||||
break;
|
||||
case (ActionsDAG::ActionType::FUNCTION):
|
||||
{
|
||||
auto name = node.function_base->getName();
|
||||
if (legacy && name == "modulo")
|
||||
writeCString("moduleLegacy", out);
|
||||
else
|
||||
writeString(name, out);
|
||||
|
||||
writeChar('(', out);
|
||||
bool first = true;
|
||||
for (const auto * arg : node.children)
|
||||
{
|
||||
if (!first)
|
||||
writeCString(", ", out);
|
||||
first = false;
|
||||
|
||||
appendColumnNameWithoutAlias(*arg, out, legacy);
|
||||
}
|
||||
writeChar(')', out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::string getColumnNameWithoutAlias(const ActionsDAG::Node & node, bool legacy = false)
|
||||
{
|
||||
WriteBufferFromOwnString out;
|
||||
appendColumnNameWithoutAlias(node, out, legacy);
|
||||
return std::move(out.str());
|
||||
}
|
||||
|
||||
class KeyCondition::Tree
|
||||
{
|
||||
public:
|
||||
explicit Tree(const IAST * ast_) : ast(ast_) { assert(ast); }
|
||||
explicit Tree(const ActionsDAG::Node * dag_) : dag(dag_) { assert(dag); }
|
||||
|
||||
std::string getColumnName() const
|
||||
{
|
||||
if (ast)
|
||||
return ast->getColumnNameWithoutAlias();
|
||||
else
|
||||
return getColumnNameWithoutAlias(*dag);
|
||||
}
|
||||
|
||||
std::string getColumnNameLegacy() const
|
||||
{
|
||||
if (ast)
|
||||
{
|
||||
auto adjusted_ast = ast->clone();
|
||||
KeyDescription::moduloToModuloLegacyRecursive(adjusted_ast);
|
||||
return adjusted_ast->getColumnNameWithoutAlias();
|
||||
}
|
||||
else
|
||||
return getColumnNameWithoutAlias(*dag, true);
|
||||
}
|
||||
|
||||
bool isFunction() const
|
||||
{
|
||||
if (ast)
|
||||
return typeid_cast<const ASTFunction *>(ast);
|
||||
else
|
||||
return dag->type == ActionsDAG::ActionType::FUNCTION;
|
||||
}
|
||||
|
||||
bool isConstant() const
|
||||
{
|
||||
if (ast)
|
||||
return typeid_cast<const ASTLiteral *>(ast);
|
||||
else
|
||||
return dag->column && isColumnConst(*dag->column);
|
||||
}
|
||||
|
||||
ColumnWithTypeAndName getConstant() const
|
||||
{
|
||||
if (!isConstant())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "KeyCondition::Tree node is not a constant");
|
||||
|
||||
ColumnWithTypeAndName res;
|
||||
|
||||
if (ast)
|
||||
{
|
||||
const auto * literal = assert_cast<const ASTLiteral *>(ast);
|
||||
res.type = applyVisitor(FieldToDataType(), literal->value);
|
||||
res.column = res.type->createColumnConst(0, literal->value);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
res.type = dag->result_type;
|
||||
res.column = dag->column;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
bool tryGetConstant(const Block & block_with_constants, Field & out_value, DataTypePtr & out_type) const
|
||||
{
|
||||
if (ast)
|
||||
{
|
||||
// Constant expr should use alias names if any
|
||||
String column_name = ast->getColumnName();
|
||||
|
||||
if (const auto * lit = ast->as<ASTLiteral>())
|
||||
{
|
||||
/// By default block_with_constants has only one column named "_dummy".
|
||||
/// If block contains only constants it's may not be preprocessed by
|
||||
// ExpressionAnalyzer, so try to look up in the default column.
|
||||
if (!block_with_constants.has(column_name))
|
||||
column_name = "_dummy";
|
||||
|
||||
/// Simple literal
|
||||
out_value = lit->value;
|
||||
out_type = block_with_constants.getByName(column_name).type;
|
||||
|
||||
/// If constant is not Null, we can assume it's type is not Nullable as well.
|
||||
if (!out_value.isNull())
|
||||
out_type = removeNullable(out_type);
|
||||
|
||||
return true;
|
||||
}
|
||||
else if (block_with_constants.has(column_name) && isColumnConst(*block_with_constants.getByName(column_name).column))
|
||||
{
|
||||
/// An expression which is dependent on constants only
|
||||
const auto & expr_info = block_with_constants.getByName(column_name);
|
||||
out_value = (*expr_info.column)[0];
|
||||
out_type = expr_info.type;
|
||||
|
||||
if (!out_value.isNull())
|
||||
out_type = removeNullable(out_type);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dag->column && isColumnConst(*dag->column))
|
||||
{
|
||||
out_value = (*dag->column)[0];
|
||||
out_type = dag->result_type;
|
||||
|
||||
if (!out_value.isNull())
|
||||
out_type = removeNullable(out_type);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ConstSetPtr tryGetPreparedSet(
|
||||
const PreparedSetsPtr & sets,
|
||||
const std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
|
||||
const DataTypes & data_types) const
|
||||
{
|
||||
if (sets && ast)
|
||||
{
|
||||
if (ast->as<ASTSubquery>() || ast->as<ASTTableIdentifier>())
|
||||
return sets->get(PreparedSetKey::forSubquery(*ast));
|
||||
|
||||
/// We have `PreparedSetKey::forLiteral` but it is useless here as we don't have enough information
|
||||
/// about types in left argument of the IN operator. Instead, we manually iterate through all the sets
|
||||
/// and find the one for the right arg based on the AST structure (getTreeHash), after that we check
|
||||
/// that the types it was prepared with are compatible with the types of the primary key.
|
||||
auto types_match = [&indexes_mapping, &data_types](const SetPtr & candidate_set)
|
||||
{
|
||||
assert(indexes_mapping.size() == data_types.size());
|
||||
|
||||
for (size_t i = 0; i < indexes_mapping.size(); ++i)
|
||||
{
|
||||
if (!candidate_set->areTypesEqual(indexes_mapping[i].tuple_index, data_types[i]))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
for (const auto & set : sets->getByTreeHash(ast->getTreeHash()))
|
||||
{
|
||||
if (types_match(set))
|
||||
return set;
|
||||
}
|
||||
}
|
||||
else if (dag->column)
|
||||
{
|
||||
const IColumn * col = dag->column.get();
|
||||
if (const auto * col_const = typeid_cast<const ColumnConst *>(col))
|
||||
col = &col_const->getDataColumn();
|
||||
|
||||
if (const auto * col_set = typeid_cast<const ColumnSet *>(col))
|
||||
{
|
||||
auto set = col_set->getData();
|
||||
if (set->isCreated())
|
||||
return set;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
FunctionTree asFunction() const;
|
||||
|
||||
protected:
|
||||
const IAST * ast = nullptr;
|
||||
const ActionsDAG::Node * dag = nullptr;
|
||||
};
|
||||
|
||||
class KeyCondition::FunctionTree : public KeyCondition::Tree
|
||||
{
|
||||
public:
|
||||
std::string getFunctionName() const
|
||||
{
|
||||
if (ast)
|
||||
return assert_cast<const ASTFunction *>(ast)->name;
|
||||
else
|
||||
return dag->function_base->getName();
|
||||
}
|
||||
|
||||
size_t numArguments() const
|
||||
{
|
||||
if (ast)
|
||||
{
|
||||
const auto * func = assert_cast<const ASTFunction *>(ast);
|
||||
return func->arguments ? func->arguments->children.size() : 0;
|
||||
}
|
||||
else
|
||||
return dag->children.size();
|
||||
}
|
||||
|
||||
Tree getArgumentAt(size_t idx) const
|
||||
{
|
||||
if (ast)
|
||||
return Tree(assert_cast<const ASTFunction *>(ast)->arguments->children[idx].get());
|
||||
else
|
||||
return Tree(dag->children[idx]);
|
||||
}
|
||||
|
||||
private:
|
||||
using Tree::Tree;
|
||||
|
||||
friend class Tree;
|
||||
};
|
||||
|
||||
|
||||
KeyCondition::FunctionTree KeyCondition::Tree::asFunction() const
|
||||
{
|
||||
if (!isFunction())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "KeyCondition::Tree node is not a function");
|
||||
|
||||
if (ast)
|
||||
return KeyCondition::FunctionTree(ast);
|
||||
else
|
||||
return KeyCondition::FunctionTree(dag);
|
||||
}
|
||||
|
||||
|
||||
/// A dictionary containing actions to the corresponding functions to turn them into `RPNElement`
|
||||
const KeyCondition::AtomMap KeyCondition::atom_map
|
||||
{
|
||||
{
|
||||
@ -972,16 +690,17 @@ static NameSet getAllSubexpressionNames(const ExpressionActions & key_expr)
|
||||
KeyCondition::KeyCondition(
|
||||
const ASTPtr & query,
|
||||
const ASTs & additional_filter_asts,
|
||||
TreeRewriterResultPtr syntax_analyzer_result,
|
||||
PreparedSetsPtr prepared_sets_,
|
||||
Block block_with_constants,
|
||||
PreparedSetsPtr prepared_sets,
|
||||
ContextPtr context,
|
||||
const Names & key_column_names,
|
||||
const ExpressionActionsPtr & key_expr_,
|
||||
NameSet array_joined_column_names_,
|
||||
bool single_point_,
|
||||
bool strict_)
|
||||
: key_expr(key_expr_)
|
||||
, key_subexpr_names(getAllSubexpressionNames(*key_expr))
|
||||
, prepared_sets(prepared_sets_)
|
||||
, array_joined_column_names(std::move(array_joined_column_names_))
|
||||
, single_point(single_point_)
|
||||
, strict(strict_)
|
||||
{
|
||||
@ -992,82 +711,64 @@ KeyCondition::KeyCondition(
|
||||
key_columns[name] = i;
|
||||
}
|
||||
|
||||
if (!syntax_analyzer_result)
|
||||
auto filter_node = buildFilterNode(query, additional_filter_asts);
|
||||
|
||||
if (!filter_node)
|
||||
{
|
||||
rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
|
||||
return;
|
||||
}
|
||||
|
||||
/** Evaluation of expressions that depend only on constants.
|
||||
* For the index to be used, if it is written, for example `WHERE Date = toDate(now())`.
|
||||
/** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)),
|
||||
* the use of NOT operator in predicate will result in the indexing algorithm leave out some data.
|
||||
* This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict
|
||||
* when parsing the AST into internal RPN representation.
|
||||
* To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's
|
||||
* are pushed down and applied (when possible) to leaf nodes.
|
||||
*/
|
||||
Block block_with_constants = getBlockWithConstants(query, syntax_analyzer_result, context);
|
||||
auto inverted_filter_node = cloneASTWithInversionPushDown(filter_node);
|
||||
|
||||
if (syntax_analyzer_result)
|
||||
{
|
||||
for (const auto & [name, _] : syntax_analyzer_result->array_join_result_to_source)
|
||||
array_joined_columns.insert(name);
|
||||
}
|
||||
RPNBuilder<RPNElement> builder(
|
||||
inverted_filter_node,
|
||||
std::move(context),
|
||||
std::move(block_with_constants),
|
||||
std::move(prepared_sets),
|
||||
[&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); });
|
||||
rpn = std::move(builder).extractRPN();
|
||||
}
|
||||
|
||||
const ASTSelectQuery & select = query->as<ASTSelectQuery &>();
|
||||
|
||||
ASTs filters;
|
||||
if (select.where())
|
||||
filters.push_back(select.where());
|
||||
|
||||
if (select.prewhere())
|
||||
filters.push_back(select.prewhere());
|
||||
|
||||
for (const auto & filter_ast : additional_filter_asts)
|
||||
filters.push_back(filter_ast);
|
||||
|
||||
if (!filters.empty())
|
||||
{
|
||||
ASTPtr filter_query;
|
||||
if (filters.size() == 1)
|
||||
{
|
||||
filter_query = filters.front();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto function = std::make_shared<ASTFunction>();
|
||||
|
||||
function->name = "and";
|
||||
function->arguments = std::make_shared<ASTExpressionList>();
|
||||
function->children.push_back(function->arguments);
|
||||
function->arguments->children = std::move(filters);
|
||||
|
||||
filter_query = function;
|
||||
}
|
||||
|
||||
/** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)),
|
||||
* the use of NOT operator in predicate will result in the indexing algorithm leave out some data.
|
||||
* This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict
|
||||
* when parsing the AST into internal RPN representation.
|
||||
* To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's
|
||||
* are pushed down and applied (when possible) to leaf nodes.
|
||||
*/
|
||||
auto ast = cloneASTWithInversionPushDown(filter_query);
|
||||
traverseAST(Tree(ast.get()), context, block_with_constants);
|
||||
}
|
||||
else
|
||||
{
|
||||
rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
|
||||
}
|
||||
KeyCondition::KeyCondition(
|
||||
const SelectQueryInfo & query_info,
|
||||
ContextPtr context,
|
||||
const Names & key_column_names,
|
||||
const ExpressionActionsPtr & key_expr_,
|
||||
bool single_point_,
|
||||
bool strict_)
|
||||
: KeyCondition(
|
||||
query_info.query,
|
||||
query_info.filter_asts,
|
||||
KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context),
|
||||
query_info.prepared_sets,
|
||||
context,
|
||||
key_column_names,
|
||||
key_expr_,
|
||||
query_info.syntax_analyzer_result->getArrayJoinSourceNameSet(),
|
||||
single_point_,
|
||||
strict_)
|
||||
{
|
||||
}
|
||||
|
||||
KeyCondition::KeyCondition(
|
||||
ActionDAGNodes dag_nodes,
|
||||
TreeRewriterResultPtr syntax_analyzer_result,
|
||||
PreparedSetsPtr prepared_sets_,
|
||||
ContextPtr context,
|
||||
const Names & key_column_names,
|
||||
const ExpressionActionsPtr & key_expr_,
|
||||
NameSet array_joined_column_names_,
|
||||
bool single_point_,
|
||||
bool strict_)
|
||||
: key_expr(key_expr_)
|
||||
, key_subexpr_names(getAllSubexpressionNames(*key_expr))
|
||||
, prepared_sets(prepared_sets_)
|
||||
, array_joined_column_names(std::move(array_joined_column_names_))
|
||||
, single_point(single_point_)
|
||||
, strict(strict_)
|
||||
{
|
||||
@ -1078,29 +779,23 @@ KeyCondition::KeyCondition(
|
||||
key_columns[name] = i;
|
||||
}
|
||||
|
||||
if (!syntax_analyzer_result)
|
||||
if (dag_nodes.nodes.empty())
|
||||
{
|
||||
rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
|
||||
return;
|
||||
}
|
||||
|
||||
for (const auto & [name, _] : syntax_analyzer_result->array_join_result_to_source)
|
||||
array_joined_columns.insert(name);
|
||||
auto inverted_dag = cloneASTWithInversionPushDown(std::move(dag_nodes.nodes), context);
|
||||
assert(inverted_dag->getOutputs().size() == 1);
|
||||
|
||||
if (!dag_nodes.nodes.empty())
|
||||
const auto * inverted_dag_filter_node = inverted_dag->getOutputs()[0];
|
||||
|
||||
RPNBuilder<RPNElement> builder(inverted_dag_filter_node, context, [&](const RPNBuilderTreeNode & node, RPNElement & out)
|
||||
{
|
||||
auto inverted_dag = cloneASTWithInversionPushDown(std::move(dag_nodes.nodes), context);
|
||||
return extractAtomFromTree(node, out);
|
||||
});
|
||||
|
||||
// std::cerr << "========== inverted dag: " << inverted_dag->dumpDAG() << std::endl;
|
||||
|
||||
Block empty;
|
||||
for (const auto * node : inverted_dag->getOutputs())
|
||||
traverseAST(Tree(node), context, empty);
|
||||
}
|
||||
else
|
||||
{
|
||||
rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
|
||||
}
|
||||
rpn = std::move(builder).extractRPN();
|
||||
}
|
||||
|
||||
bool KeyCondition::addCondition(const String & column, const Range & range)
|
||||
@ -1112,12 +807,12 @@ bool KeyCondition::addCondition(const String & column, const Range & range)
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Computes value of constant expression and its data type.
|
||||
* Returns false, if expression isn't constant.
|
||||
*/
|
||||
bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type)
|
||||
{
|
||||
return Tree(expr.get()).tryGetConstant(block_with_constants, out_value, out_type);
|
||||
RPNBuilderTreeContext tree_context(nullptr, block_with_constants, nullptr);
|
||||
RPNBuilderTreeNode node(expr.get(), tree_context);
|
||||
|
||||
return node.tryGetConstant(out_value, out_type);
|
||||
}
|
||||
|
||||
|
||||
@ -1201,39 +896,6 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr &
|
||||
return {field.columns, field.row_idx, result_idx};
|
||||
}
|
||||
|
||||
void KeyCondition::traverseAST(const Tree & node, ContextPtr context, Block & block_with_constants)
|
||||
{
|
||||
RPNElement element;
|
||||
|
||||
if (node.isFunction())
|
||||
{
|
||||
auto func = node.asFunction();
|
||||
if (tryParseLogicalOperatorFromAST(func, element))
|
||||
{
|
||||
size_t num_args = func.numArguments();
|
||||
for (size_t i = 0; i < num_args; ++i)
|
||||
{
|
||||
traverseAST(func.getArgumentAt(i), context, block_with_constants);
|
||||
|
||||
/** The first part of the condition is for the correct support of `and` and `or` functions of arbitrary arity
|
||||
* - in this case `n - 1` elements are added (where `n` is the number of arguments).
|
||||
*/
|
||||
if (i != 0 || element.function == RPNElement::FUNCTION_NOT)
|
||||
rpn.emplace_back(element);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!tryParseAtomFromAST(node, context, block_with_constants, element))
|
||||
{
|
||||
element.function = RPNElement::FUNCTION_UNKNOWN;
|
||||
}
|
||||
|
||||
rpn.emplace_back(std::move(element));
|
||||
}
|
||||
|
||||
/** The key functional expression constraint may be inferred from a plain column in the expression.
|
||||
* For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`,
|
||||
* it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())`
|
||||
@ -1355,7 +1017,7 @@ bool KeyCondition::transformConstantWithValidFunctions(
|
||||
}
|
||||
|
||||
bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
|
||||
const Tree & node,
|
||||
const RPNBuilderTreeNode & node,
|
||||
size_t & out_key_column_num,
|
||||
DataTypePtr & out_key_column_type,
|
||||
Field & out_value,
|
||||
@ -1363,7 +1025,7 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
|
||||
{
|
||||
String expr_name = node.getColumnName();
|
||||
|
||||
if (array_joined_columns.contains(expr_name))
|
||||
if (array_joined_column_names.contains(expr_name))
|
||||
return false;
|
||||
|
||||
if (!key_subexpr_names.contains(expr_name))
|
||||
@ -1390,11 +1052,15 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
|
||||
|
||||
/// Looking for possible transformation of `column = constant` into `partition_expr = function(constant)`
|
||||
bool KeyCondition::canConstantBeWrappedByFunctions(
|
||||
const Tree & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type)
|
||||
const RPNBuilderTreeNode & node,
|
||||
size_t & out_key_column_num,
|
||||
DataTypePtr & out_key_column_type,
|
||||
Field & out_value,
|
||||
DataTypePtr & out_type)
|
||||
{
|
||||
String expr_name = node.getColumnName();
|
||||
|
||||
if (array_joined_columns.contains(expr_name))
|
||||
if (array_joined_column_names.contains(expr_name))
|
||||
return false;
|
||||
|
||||
if (!key_subexpr_names.contains(expr_name))
|
||||
@ -1408,7 +1074,7 @@ bool KeyCondition::canConstantBeWrappedByFunctions(
|
||||
/// The case `f(modulo(...))` for totally monotonic `f ` is considered to be rare.
|
||||
///
|
||||
/// Note: for negative values, we can filter more partitions then needed.
|
||||
expr_name = node.getColumnNameLegacy();
|
||||
expr_name = node.getColumnNameWithModuloLegacy();
|
||||
|
||||
if (!key_subexpr_names.contains(expr_name))
|
||||
return false;
|
||||
@ -1425,8 +1091,7 @@ bool KeyCondition::canConstantBeWrappedByFunctions(
|
||||
}
|
||||
|
||||
bool KeyCondition::tryPrepareSetIndex(
|
||||
const FunctionTree & func,
|
||||
ContextPtr context,
|
||||
const RPNBuilderFunctionTreeNode & func,
|
||||
RPNElement & out,
|
||||
size_t & out_key_column_num)
|
||||
{
|
||||
@ -1436,13 +1101,12 @@ bool KeyCondition::tryPrepareSetIndex(
|
||||
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> indexes_mapping;
|
||||
DataTypes data_types;
|
||||
|
||||
auto get_key_tuple_position_mapping = [&](const Tree & node, size_t tuple_index)
|
||||
auto get_key_tuple_position_mapping = [&](const RPNBuilderTreeNode & node, size_t tuple_index)
|
||||
{
|
||||
MergeTreeSetIndex::KeyTuplePositionMapping index_mapping;
|
||||
index_mapping.tuple_index = tuple_index;
|
||||
DataTypePtr data_type;
|
||||
if (isKeyPossiblyWrappedByMonotonicFunctions(
|
||||
node, context, index_mapping.key_index, data_type, index_mapping.functions))
|
||||
if (isKeyPossiblyWrappedByMonotonicFunctions(node, index_mapping.key_index, data_type, index_mapping.functions))
|
||||
{
|
||||
indexes_mapping.push_back(index_mapping);
|
||||
data_types.push_back(data_type);
|
||||
@ -1456,25 +1120,29 @@ bool KeyCondition::tryPrepareSetIndex(
|
||||
{
|
||||
/// Note: in case of ActionsDAG, tuple may be a constant.
|
||||
/// In this case, there is no keys in tuple. So, we don't have to check it.
|
||||
auto left_arg_tuple = left_arg.asFunction();
|
||||
auto left_arg_tuple = left_arg.toFunctionNode();
|
||||
if (left_arg_tuple.getFunctionName() == "tuple")
|
||||
{
|
||||
left_args_count = left_arg_tuple.numArguments();
|
||||
left_args_count = left_arg_tuple.getArgumentsSize();
|
||||
for (size_t i = 0; i < left_args_count; ++i)
|
||||
get_key_tuple_position_mapping(left_arg_tuple.getArgumentAt(i), i);
|
||||
}
|
||||
else
|
||||
{
|
||||
get_key_tuple_position_mapping(left_arg, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
get_key_tuple_position_mapping(left_arg, 0);
|
||||
}
|
||||
|
||||
if (indexes_mapping.empty())
|
||||
return false;
|
||||
|
||||
const auto right_arg = func.getArgumentAt(1);
|
||||
|
||||
auto prepared_set = right_arg.tryGetPreparedSet(prepared_sets, indexes_mapping, data_types);
|
||||
auto prepared_set = right_arg.tryGetPreparedSet(indexes_mapping, data_types);
|
||||
if (!prepared_set)
|
||||
return false;
|
||||
|
||||
@ -1568,13 +1236,12 @@ private:
|
||||
|
||||
|
||||
bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
|
||||
const Tree & node,
|
||||
ContextPtr context,
|
||||
const RPNBuilderTreeNode & node,
|
||||
size_t & out_key_column_num,
|
||||
DataTypePtr & out_key_res_column_type,
|
||||
MonotonicFunctionsChain & out_functions_chain)
|
||||
{
|
||||
std::vector<FunctionTree> chain_not_tested_for_monotonicity;
|
||||
std::vector<RPNBuilderFunctionTreeNode> chain_not_tested_for_monotonicity;
|
||||
DataTypePtr key_column_type;
|
||||
|
||||
if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_key_column_num, key_column_type, chain_not_tested_for_monotonicity))
|
||||
@ -1583,17 +1250,17 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
|
||||
for (auto it = chain_not_tested_for_monotonicity.rbegin(); it != chain_not_tested_for_monotonicity.rend(); ++it)
|
||||
{
|
||||
auto function = *it;
|
||||
auto func_builder = FunctionFactory::instance().tryGet(function.getFunctionName(), context);
|
||||
auto func_builder = FunctionFactory::instance().tryGet(function.getFunctionName(), node.getTreeContext().getQueryContext());
|
||||
if (!func_builder)
|
||||
return false;
|
||||
ColumnsWithTypeAndName arguments;
|
||||
ColumnWithTypeAndName const_arg;
|
||||
FunctionWithOptionalConstArg::Kind kind = FunctionWithOptionalConstArg::Kind::NO_CONST;
|
||||
if (function.numArguments() == 2)
|
||||
if (function.getArgumentsSize() == 2)
|
||||
{
|
||||
if (function.getArgumentAt(0).isConstant())
|
||||
{
|
||||
const_arg = function.getArgumentAt(0).getConstant();
|
||||
const_arg = function.getArgumentAt(0).getConstantColumn();
|
||||
arguments.push_back(const_arg);
|
||||
arguments.push_back({ nullptr, key_column_type, "" });
|
||||
kind = FunctionWithOptionalConstArg::Kind::LEFT_CONST;
|
||||
@ -1601,7 +1268,7 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
|
||||
else if (function.getArgumentAt(1).isConstant())
|
||||
{
|
||||
arguments.push_back({ nullptr, key_column_type, "" });
|
||||
const_arg = function.getArgumentAt(1).getConstant();
|
||||
const_arg = function.getArgumentAt(1).getConstantColumn();
|
||||
arguments.push_back(const_arg);
|
||||
kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST;
|
||||
}
|
||||
@ -1627,10 +1294,10 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
|
||||
}
|
||||
|
||||
bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
|
||||
const Tree & node,
|
||||
const RPNBuilderTreeNode & node,
|
||||
size_t & out_key_column_num,
|
||||
DataTypePtr & out_key_column_type,
|
||||
std::vector<FunctionTree> & out_functions_chain)
|
||||
std::vector<RPNBuilderFunctionTreeNode> & out_functions_chain)
|
||||
{
|
||||
/** By itself, the key column can be a functional expression. for example, `intHash32(UserID)`.
|
||||
* Therefore, use the full name of the expression for search.
|
||||
@ -1640,7 +1307,7 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
|
||||
// Key columns should use canonical names for index analysis
|
||||
String name = node.getColumnName();
|
||||
|
||||
if (array_joined_columns.contains(name))
|
||||
if (array_joined_column_names.contains(name))
|
||||
return false;
|
||||
|
||||
auto it = key_columns.find(name);
|
||||
@ -1653,37 +1320,39 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
|
||||
|
||||
if (node.isFunction())
|
||||
{
|
||||
auto func = node.asFunction();
|
||||
auto function_node = node.toFunctionNode();
|
||||
|
||||
size_t num_args = func.numArguments();
|
||||
if (num_args > 2 || num_args == 0)
|
||||
size_t arguments_size = function_node.getArgumentsSize();
|
||||
if (arguments_size > 2 || arguments_size == 0)
|
||||
return false;
|
||||
|
||||
out_functions_chain.push_back(func);
|
||||
bool ret = false;
|
||||
if (num_args == 2)
|
||||
out_functions_chain.push_back(function_node);
|
||||
|
||||
bool result = false;
|
||||
if (arguments_size == 2)
|
||||
{
|
||||
if (func.getArgumentAt(0).isConstant())
|
||||
if (function_node.getArgumentAt(0).isConstant())
|
||||
{
|
||||
ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(func.getArgumentAt(1), out_key_column_num, out_key_column_type, out_functions_chain);
|
||||
result = isKeyPossiblyWrappedByMonotonicFunctionsImpl(function_node.getArgumentAt(1), out_key_column_num, out_key_column_type, out_functions_chain);
|
||||
}
|
||||
else if (func.getArgumentAt(1).isConstant())
|
||||
else if (function_node.getArgumentAt(1).isConstant())
|
||||
{
|
||||
ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(func.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain);
|
||||
result = isKeyPossiblyWrappedByMonotonicFunctionsImpl(function_node.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(func.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain);
|
||||
result = isKeyPossiblyWrappedByMonotonicFunctionsImpl(function_node.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain);
|
||||
}
|
||||
return ret;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const KeyCondition::Tree & node)
|
||||
static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const String & node_column_name)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -1693,13 +1362,13 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value,
|
||||
{
|
||||
throw Exception("Key expression contains comparison between inconvertible types: " +
|
||||
desired_type->getName() + " and " + src_type->getName() +
|
||||
" inside " + node.getColumnName(),
|
||||
" inside " + node_column_name,
|
||||
ErrorCodes::BAD_TYPE_OF_FIELD);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Block & block_with_constants, RPNElement & out)
|
||||
bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out)
|
||||
{
|
||||
/** Functions < > = != <= >= in `notIn` isNull isNotNull, where one argument is a constant, and the other is one of columns of key,
|
||||
* or itself, wrapped in a chain of possibly-monotonic functions,
|
||||
@ -1709,8 +1378,8 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
|
||||
DataTypePtr const_type;
|
||||
if (node.isFunction())
|
||||
{
|
||||
auto func = node.asFunction();
|
||||
size_t num_args = func.numArguments();
|
||||
auto func = node.toFunctionNode();
|
||||
size_t num_args = func.getArgumentsSize();
|
||||
|
||||
DataTypePtr key_expr_type; /// Type of expression containing key column
|
||||
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
|
||||
@ -1722,7 +1391,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
|
||||
|
||||
if (num_args == 1)
|
||||
{
|
||||
if (!(isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), context, key_column_num, key_expr_type, chain)))
|
||||
if (!(isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), key_column_num, key_expr_type, chain)))
|
||||
return false;
|
||||
|
||||
if (key_column_num == static_cast<size_t>(-1))
|
||||
@ -1753,7 +1422,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
|
||||
|
||||
if (functionIsInOrGlobalInOperator(func_name))
|
||||
{
|
||||
if (tryPrepareSetIndex(func, context, out, key_column_num))
|
||||
if (tryPrepareSetIndex(func, out, key_column_num))
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
is_set_const = true;
|
||||
@ -1761,7 +1430,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else if (func.getArgumentAt(1).tryGetConstant(block_with_constants, const_value, const_type))
|
||||
else if (func.getArgumentAt(1).tryGetConstant(const_value, const_type))
|
||||
{
|
||||
/// If the const operand is null, the atom will be always false
|
||||
if (const_value.isNull())
|
||||
@ -1770,7 +1439,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), context, key_column_num, key_expr_type, chain))
|
||||
if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), key_column_num, key_expr_type, chain))
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
}
|
||||
@ -1791,7 +1460,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else if (func.getArgumentAt(0).tryGetConstant(block_with_constants, const_value, const_type))
|
||||
else if (func.getArgumentAt(0).tryGetConstant(const_value, const_type))
|
||||
{
|
||||
/// If the const operand is null, the atom will be always false
|
||||
if (const_value.isNull())
|
||||
@ -1800,7 +1469,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(1), context, key_column_num, key_expr_type, chain))
|
||||
if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(1), key_column_num, key_expr_type, chain))
|
||||
{
|
||||
key_arg_pos = 1;
|
||||
}
|
||||
@ -1880,7 +1549,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
|
||||
|
||||
if (!const_type->equals(*common_type))
|
||||
{
|
||||
castValueToType(common_type, const_value, const_type, node);
|
||||
castValueToType(common_type, const_value, const_type, node.getColumnName());
|
||||
|
||||
// Need to set is_constant_transformed unless we're doing exact conversion
|
||||
if (!key_expr_type_not_null->equals(*common_type))
|
||||
@ -1925,7 +1594,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
|
||||
|
||||
return atom_it->second(out, const_value);
|
||||
}
|
||||
else if (node.tryGetConstant(block_with_constants, const_value, const_type))
|
||||
else if (node.tryGetConstant(const_value, const_type))
|
||||
{
|
||||
/// For cases where it says, for example, `WHERE 0 AND something`
|
||||
|
||||
@ -1948,32 +1617,6 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
|
||||
return false;
|
||||
}
|
||||
|
||||
bool KeyCondition::tryParseLogicalOperatorFromAST(const FunctionTree & func, RPNElement & out)
|
||||
{
|
||||
/// Functions AND, OR, NOT.
|
||||
/// Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
|
||||
/// (or, the same thing - calling the function `and` from one argument).
|
||||
|
||||
if (func.getFunctionName() == "not")
|
||||
{
|
||||
if (func.numArguments() != 1)
|
||||
return false;
|
||||
|
||||
out.function = RPNElement::FUNCTION_NOT;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (func.getFunctionName() == "and" || func.getFunctionName() == "indexHint")
|
||||
out.function = RPNElement::FUNCTION_AND;
|
||||
else if (func.getFunctionName() == "or")
|
||||
out.function = RPNElement::FUNCTION_OR;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
String KeyCondition::toString() const
|
||||
{
|
||||
String res;
|
||||
|
@ -2,11 +2,16 @@
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include <Interpreters/Set.h>
|
||||
#include <Core/SortDescription.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
|
||||
#include <Interpreters/Set.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Interpreters/TreeRewriter.h>
|
||||
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
#include <Storages/MergeTree/RPNBuilder.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -205,45 +210,37 @@ public:
|
||||
class KeyCondition
|
||||
{
|
||||
public:
|
||||
/// Does not take into account the SAMPLE section. all_columns - the set of all columns of the table.
|
||||
/// Construct key condition from AST SELECT query WHERE, PREWHERE and additional filters
|
||||
KeyCondition(
|
||||
const ASTPtr & query,
|
||||
const ASTs & additional_filter_asts,
|
||||
TreeRewriterResultPtr syntax_analyzer_result,
|
||||
Block block_with_constants,
|
||||
PreparedSetsPtr prepared_sets_,
|
||||
ContextPtr context,
|
||||
const Names & key_column_names,
|
||||
const ExpressionActionsPtr & key_expr,
|
||||
NameSet array_joined_column_names,
|
||||
bool single_point_ = false,
|
||||
bool strict_ = false);
|
||||
|
||||
/** Construct key condition from AST SELECT query WHERE, PREWHERE and additional filters.
|
||||
* Select query, additional filters, prepared sets are initialized using query info.
|
||||
*/
|
||||
KeyCondition(
|
||||
const SelectQueryInfo & query_info,
|
||||
ContextPtr context,
|
||||
const Names & key_column_names,
|
||||
const ExpressionActionsPtr & key_expr_,
|
||||
bool single_point_ = false,
|
||||
bool strict_ = false)
|
||||
: KeyCondition(
|
||||
query_info.query,
|
||||
query_info.filter_asts,
|
||||
query_info.syntax_analyzer_result,
|
||||
query_info.prepared_sets,
|
||||
context,
|
||||
key_column_names,
|
||||
key_expr_,
|
||||
single_point_,
|
||||
strict_)
|
||||
{
|
||||
}
|
||||
bool strict_ = false);
|
||||
|
||||
/// Construct key condition from ActionsDAG nodes
|
||||
KeyCondition(
|
||||
ActionDAGNodes dag_nodes,
|
||||
TreeRewriterResultPtr syntax_analyzer_result,
|
||||
PreparedSetsPtr prepared_sets_,
|
||||
ContextPtr context,
|
||||
const Names & key_column_names,
|
||||
const ExpressionActionsPtr & key_expr,
|
||||
NameSet array_joined_column_names,
|
||||
bool single_point_ = false,
|
||||
bool strict_ = false);
|
||||
|
||||
@ -275,6 +272,7 @@ public:
|
||||
/// Checks that the index can not be used
|
||||
/// FUNCTION_UNKNOWN will be AND'ed (if any).
|
||||
bool alwaysUnknownOrTrue() const;
|
||||
|
||||
/// Checks that the index can not be used
|
||||
/// Does not allow any FUNCTION_UNKNOWN (will instantly return true).
|
||||
bool anyUnknownOrAlwaysTrue() const;
|
||||
@ -313,10 +311,18 @@ public:
|
||||
* Returns false, if expression isn't constant.
|
||||
*/
|
||||
static bool getConstant(
|
||||
const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type);
|
||||
const ASTPtr & expr,
|
||||
Block & block_with_constants,
|
||||
Field & out_value,
|
||||
DataTypePtr & out_type);
|
||||
|
||||
/** Calculate expressions, that depend only on constants.
|
||||
* For index to work when something like "WHERE Date = toDate(now())" is written.
|
||||
*/
|
||||
static Block getBlockWithConstants(
|
||||
const ASTPtr & query, const TreeRewriterResultPtr & syntax_analyzer_result, ContextPtr context);
|
||||
const ASTPtr & query,
|
||||
const TreeRewriterResultPtr & syntax_analyzer_result,
|
||||
ContextPtr context);
|
||||
|
||||
static std::optional<Range> applyMonotonicFunctionsChainToRange(
|
||||
Range key_range,
|
||||
@ -373,14 +379,11 @@ private:
|
||||
using RPN = std::vector<RPNElement>;
|
||||
using ColumnIndices = std::map<String, size_t>;
|
||||
|
||||
using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value)>;
|
||||
|
||||
public:
|
||||
using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value)>;
|
||||
static const AtomMap atom_map;
|
||||
|
||||
class Tree;
|
||||
class FunctionTree;
|
||||
|
||||
private:
|
||||
BoolMask checkInRange(
|
||||
size_t used_key_size,
|
||||
@ -390,9 +393,7 @@ private:
|
||||
bool right_bounded,
|
||||
BoolMask initial_mask) const;
|
||||
|
||||
void traverseAST(const Tree & node, ContextPtr context, Block & block_with_constants);
|
||||
bool tryParseAtomFromAST(const Tree & node, ContextPtr context, Block & block_with_constants, RPNElement & out);
|
||||
static bool tryParseLogicalOperatorFromAST(const FunctionTree & func, RPNElement & out);
|
||||
bool extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out);
|
||||
|
||||
/** Is node the key column
|
||||
* or expression in which column of key is wrapped by chain of functions,
|
||||
@ -401,17 +402,16 @@ private:
|
||||
* and fills chain of possibly-monotonic functions.
|
||||
*/
|
||||
bool isKeyPossiblyWrappedByMonotonicFunctions(
|
||||
const Tree & node,
|
||||
ContextPtr context,
|
||||
const RPNBuilderTreeNode & node,
|
||||
size_t & out_key_column_num,
|
||||
DataTypePtr & out_key_res_column_type,
|
||||
MonotonicFunctionsChain & out_functions_chain);
|
||||
|
||||
bool isKeyPossiblyWrappedByMonotonicFunctionsImpl(
|
||||
const Tree & node,
|
||||
const RPNBuilderTreeNode & node,
|
||||
size_t & out_key_column_num,
|
||||
DataTypePtr & out_key_column_type,
|
||||
std::vector<FunctionTree> & out_functions_chain);
|
||||
std::vector<RPNBuilderFunctionTreeNode> & out_functions_chain);
|
||||
|
||||
bool transformConstantWithValidFunctions(
|
||||
const String & expr_name,
|
||||
@ -422,21 +422,24 @@ private:
|
||||
std::function<bool(IFunctionBase &, const IDataType &)> always_monotonic) const;
|
||||
|
||||
bool canConstantBeWrappedByMonotonicFunctions(
|
||||
const Tree & node,
|
||||
const RPNBuilderTreeNode & node,
|
||||
size_t & out_key_column_num,
|
||||
DataTypePtr & out_key_column_type,
|
||||
Field & out_value,
|
||||
DataTypePtr & out_type);
|
||||
|
||||
bool canConstantBeWrappedByFunctions(
|
||||
const Tree & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type);
|
||||
const RPNBuilderTreeNode & node,
|
||||
size_t & out_key_column_num,
|
||||
DataTypePtr & out_key_column_type,
|
||||
Field & out_value,
|
||||
DataTypePtr & out_type);
|
||||
|
||||
/// If it's possible to make an RPNElement
|
||||
/// that will filter values (possibly tuples) by the content of 'prepared_set',
|
||||
/// do it and return true.
|
||||
bool tryPrepareSetIndex(
|
||||
const FunctionTree & func,
|
||||
ContextPtr context,
|
||||
const RPNBuilderFunctionTreeNode & func,
|
||||
RPNElement & out,
|
||||
size_t & out_key_column_num);
|
||||
|
||||
@ -472,11 +475,12 @@ private:
|
||||
/// All intermediate columns are used to calculate key_expr.
|
||||
const NameSet key_subexpr_names;
|
||||
|
||||
NameSet array_joined_columns;
|
||||
PreparedSetsPtr prepared_sets;
|
||||
/// Array joined column names
|
||||
NameSet array_joined_column_names;
|
||||
|
||||
// If true, always allow key_expr to be wrapped by function
|
||||
bool single_point;
|
||||
|
||||
// If true, do not use always_monotonic information to transform constants
|
||||
bool strict;
|
||||
};
|
||||
|
@ -153,7 +153,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical();
|
||||
global_ctx->storage_columns = global_ctx->metadata_snapshot->getColumns().getAllPhysical();
|
||||
|
||||
auto object_columns = MergeTreeData::getObjectColumns(global_ctx->future_part->parts, global_ctx->metadata_snapshot->getColumns());
|
||||
auto object_columns = MergeTreeData::getConcreteObjectColumns(global_ctx->future_part->parts, global_ctx->metadata_snapshot->getColumns());
|
||||
global_ctx->storage_snapshot = std::make_shared<StorageSnapshot>(*global_ctx->data, global_ctx->metadata_snapshot, object_columns);
|
||||
extendObjectColumns(global_ctx->storage_columns, object_columns, false);
|
||||
|
||||
|
@ -7124,18 +7124,18 @@ ReservationPtr MergeTreeData::balancedReservation(
|
||||
return reserved_space;
|
||||
}
|
||||
|
||||
ColumnsDescription MergeTreeData::getObjectColumns(
|
||||
ColumnsDescription MergeTreeData::getConcreteObjectColumns(
|
||||
const DataPartsVector & parts, const ColumnsDescription & storage_columns)
|
||||
{
|
||||
return DB::getObjectColumns(
|
||||
return DB::getConcreteObjectColumns(
|
||||
parts.begin(), parts.end(),
|
||||
storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); });
|
||||
}
|
||||
|
||||
ColumnsDescription MergeTreeData::getObjectColumns(
|
||||
ColumnsDescription MergeTreeData::getConcreteObjectColumns(
|
||||
boost::iterator_range<DataPartIteratorByStateAndInfo> range, const ColumnsDescription & storage_columns)
|
||||
{
|
||||
return DB::getObjectColumns(
|
||||
return DB::getConcreteObjectColumns(
|
||||
range.begin(), range.end(),
|
||||
storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); });
|
||||
}
|
||||
@ -7144,21 +7144,21 @@ void MergeTreeData::resetObjectColumnsFromActiveParts(const DataPartsLock & /*lo
|
||||
{
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
const auto & columns = metadata_snapshot->getColumns();
|
||||
if (!hasObjectColumns(columns))
|
||||
if (!hasDynamicSubcolumns(columns))
|
||||
return;
|
||||
|
||||
auto range = getDataPartsStateRange(DataPartState::Active);
|
||||
object_columns = getObjectColumns(range, columns);
|
||||
object_columns = getConcreteObjectColumns(range, columns);
|
||||
}
|
||||
|
||||
void MergeTreeData::updateObjectColumns(const DataPartPtr & part, const DataPartsLock & /*lock*/)
|
||||
{
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
const auto & columns = metadata_snapshot->getColumns();
|
||||
if (!hasObjectColumns(columns))
|
||||
if (!hasDynamicSubcolumns(columns))
|
||||
return;
|
||||
|
||||
DB::updateObjectColumns(object_columns, part->getColumns());
|
||||
DB::updateObjectColumns(object_columns, columns, part->getColumns());
|
||||
}
|
||||
|
||||
StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const
|
||||
|
@ -779,10 +779,10 @@ public:
|
||||
return column_sizes;
|
||||
}
|
||||
|
||||
const ColumnsDescription & getObjectColumns() const { return object_columns; }
|
||||
const ColumnsDescription & getConcreteObjectColumns() const { return object_columns; }
|
||||
|
||||
/// Creates description of columns of data type Object from the range of data parts.
|
||||
static ColumnsDescription getObjectColumns(
|
||||
static ColumnsDescription getConcreteObjectColumns(
|
||||
const DataPartsVector & parts, const ColumnsDescription & storage_columns);
|
||||
|
||||
IndexSizeByName getSecondaryIndexSizes() const override
|
||||
@ -1151,7 +1151,7 @@ protected:
|
||||
}
|
||||
|
||||
/// Creates description of columns of data type Object from the range of data parts.
|
||||
static ColumnsDescription getObjectColumns(
|
||||
static ColumnsDescription getConcreteObjectColumns(
|
||||
boost::iterator_range<DataPartIteratorByStateAndInfo> range, const ColumnsDescription & storage_columns);
|
||||
|
||||
std::optional<UInt64> totalRowsByPartitionPredicateImpl(
|
||||
|
@ -131,7 +131,7 @@ void writeColumnSingleGranule(
|
||||
serialize_settings.position_independent_encoding = true; //-V1048
|
||||
serialize_settings.low_cardinality_max_dictionary_size = 0; //-V1048
|
||||
|
||||
serialization->serializeBinaryBulkStatePrefix(serialize_settings, state);
|
||||
serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state);
|
||||
serialization->serializeBinaryBulkWithMultipleStreams(*column.column, from_row, number_of_rows, serialize_settings, state);
|
||||
serialization->serializeBinaryBulkStateSuffix(serialize_settings, state);
|
||||
}
|
||||
|
@ -355,7 +355,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
|
||||
{
|
||||
ISerialization::SerializeBinaryBulkSettings serialize_settings;
|
||||
serialize_settings.getter = createStreamGetter(name_and_type, offset_columns);
|
||||
serialization->serializeBinaryBulkStatePrefix(serialize_settings, it->second);
|
||||
serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second);
|
||||
}
|
||||
|
||||
const auto & global_settings = storage.getContext()->getSettingsRef();
|
||||
|
@ -288,7 +288,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart(
|
||||
auto columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames());
|
||||
|
||||
for (auto & column : columns)
|
||||
if (isObject(column.type))
|
||||
if (column.type->hasDynamicSubcolumns())
|
||||
column.type = block.getByName(column.name).type;
|
||||
|
||||
static const String TMP_PREFIX = "tmp_insert_";
|
||||
|
@ -6,11 +6,13 @@
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Storages/MergeTree/RPNBuilder.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Interpreters/misc.h>
|
||||
#include <Interpreters/BloomFilterHash.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
@ -28,19 +30,7 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
PreparedSetKey getPreparedSetKey(const ASTPtr & node, const DataTypePtr & data_type)
|
||||
{
|
||||
/// If the data type is tuple, let's try unbox once
|
||||
if (node->as<ASTSubquery>() || node->as<ASTIdentifier>())
|
||||
return PreparedSetKey::forSubquery(*node);
|
||||
|
||||
if (const auto * date_type_tuple = typeid_cast<const DataTypeTuple *>(&*data_type))
|
||||
return PreparedSetKey::forLiteral(*node, date_type_tuple->getElements());
|
||||
|
||||
return PreparedSetKey::forLiteral(*node, DataTypes(1, data_type));
|
||||
}
|
||||
|
||||
ColumnWithTypeAndName getPreparedSetInfo(const SetPtr & prepared_set)
|
||||
ColumnWithTypeAndName getPreparedSetInfo(const ConstSetPtr & prepared_set)
|
||||
{
|
||||
if (prepared_set->getDataTypes().size() == 1)
|
||||
return {prepared_set->getSetElements()[0], prepared_set->getElementsTypes()[0], "dummy"};
|
||||
@ -110,8 +100,22 @@ MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter(
|
||||
const SelectQueryInfo & info_, ContextPtr context_, const Block & header_, size_t hash_functions_)
|
||||
: WithContext(context_), header(header_), query_info(info_), hash_functions(hash_functions_)
|
||||
{
|
||||
auto atom_from_ast = [this](auto & node, auto, auto & constants, auto & out) { return traverseAtomAST(node, constants, out); };
|
||||
rpn = std::move(RPNBuilder<RPNElement>(info_, getContext(), atom_from_ast).extractRPN());
|
||||
ASTPtr filter_node = buildFilterNode(query_info.query);
|
||||
|
||||
if (!filter_node)
|
||||
{
|
||||
rpn.push_back(RPNElement::FUNCTION_UNKNOWN);
|
||||
return;
|
||||
}
|
||||
|
||||
auto block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context_);
|
||||
RPNBuilder<RPNElement> builder(
|
||||
filter_node,
|
||||
context_,
|
||||
std::move(block_with_constants),
|
||||
query_info.prepared_sets,
|
||||
[&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); });
|
||||
rpn = std::move(builder).extractRPN();
|
||||
}
|
||||
|
||||
bool MergeTreeIndexConditionBloomFilter::alwaysUnknownOrTrue() const
|
||||
@ -235,12 +239,13 @@ bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndex
|
||||
return rpn_stack[0].can_be_true;
|
||||
}
|
||||
|
||||
bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out)
|
||||
bool MergeTreeIndexConditionBloomFilter::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out)
|
||||
{
|
||||
{
|
||||
Field const_value;
|
||||
DataTypePtr const_type;
|
||||
if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type))
|
||||
|
||||
if (node.tryGetConstant(const_value, const_type))
|
||||
{
|
||||
if (const_value.getType() == Field::Types::UInt64)
|
||||
{
|
||||
@ -262,56 +267,62 @@ bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Bl
|
||||
}
|
||||
}
|
||||
|
||||
return traverseFunction(node, block_with_constants, out, nullptr);
|
||||
return traverseFunction(node, out, nullptr /*parent*/);
|
||||
}
|
||||
|
||||
bool MergeTreeIndexConditionBloomFilter::traverseFunction(const ASTPtr & node, Block & block_with_constants, RPNElement & out, const ASTPtr & parent)
|
||||
bool MergeTreeIndexConditionBloomFilter::traverseFunction(const RPNBuilderTreeNode & node, RPNElement & out, const RPNBuilderTreeNode * parent)
|
||||
{
|
||||
bool maybe_useful = false;
|
||||
|
||||
if (const auto * function = node->as<ASTFunction>())
|
||||
if (node.isFunction())
|
||||
{
|
||||
if (!function->arguments)
|
||||
return false;
|
||||
const auto function = node.toFunctionNode();
|
||||
auto arguments_size = function.getArgumentsSize();
|
||||
auto function_name = function.getFunctionName();
|
||||
|
||||
const ASTs & arguments = function->arguments->children;
|
||||
for (const auto & arg : arguments)
|
||||
for (size_t i = 0; i < arguments_size; ++i)
|
||||
{
|
||||
if (traverseFunction(arg, block_with_constants, out, node))
|
||||
auto argument = function.getArgumentAt(i);
|
||||
if (traverseFunction(argument, out, &node))
|
||||
maybe_useful = true;
|
||||
}
|
||||
|
||||
if (arguments.size() != 2)
|
||||
if (arguments_size != 2)
|
||||
return false;
|
||||
|
||||
if (functionIsInOrGlobalInOperator(function->name))
|
||||
{
|
||||
auto prepared_set = getPreparedSet(arguments[1]);
|
||||
auto lhs_argument = function.getArgumentAt(0);
|
||||
auto rhs_argument = function.getArgumentAt(1);
|
||||
|
||||
if (prepared_set)
|
||||
if (functionIsInOrGlobalInOperator(function_name))
|
||||
{
|
||||
ConstSetPtr prepared_set = rhs_argument.tryGetPreparedSet();
|
||||
|
||||
if (prepared_set && prepared_set->hasExplicitSetElements())
|
||||
{
|
||||
if (traverseASTIn(function->name, arguments[0], prepared_set, out))
|
||||
const auto prepared_info = getPreparedSetInfo(prepared_set);
|
||||
if (traverseTreeIn(function_name, lhs_argument, prepared_set, prepared_info.type, prepared_info.column, out))
|
||||
maybe_useful = true;
|
||||
}
|
||||
}
|
||||
else if (function->name == "equals" ||
|
||||
function->name == "notEquals" ||
|
||||
function->name == "has" ||
|
||||
function->name == "mapContains" ||
|
||||
function->name == "indexOf" ||
|
||||
function->name == "hasAny" ||
|
||||
function->name == "hasAll")
|
||||
else if (function_name == "equals" ||
|
||||
function_name == "notEquals" ||
|
||||
function_name == "has" ||
|
||||
function_name == "mapContains" ||
|
||||
function_name == "indexOf" ||
|
||||
function_name == "hasAny" ||
|
||||
function_name == "hasAll")
|
||||
{
|
||||
Field const_value;
|
||||
DataTypePtr const_type;
|
||||
if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type))
|
||||
|
||||
if (rhs_argument.tryGetConstant(const_value, const_type))
|
||||
{
|
||||
if (traverseASTEquals(function->name, arguments[0], const_type, const_value, out, parent))
|
||||
if (traverseTreeEquals(function_name, lhs_argument, const_type, const_value, out, parent))
|
||||
maybe_useful = true;
|
||||
}
|
||||
else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type))
|
||||
else if (lhs_argument.tryGetConstant(const_value, const_type))
|
||||
{
|
||||
if (traverseASTEquals(function->name, arguments[1], const_type, const_value, out, parent))
|
||||
if (traverseTreeEquals(function_name, rhs_argument, const_type, const_value, out, parent))
|
||||
maybe_useful = true;
|
||||
}
|
||||
}
|
||||
@ -320,28 +331,20 @@ bool MergeTreeIndexConditionBloomFilter::traverseFunction(const ASTPtr & node, B
|
||||
return maybe_useful;
|
||||
}
|
||||
|
||||
bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
|
||||
bool MergeTreeIndexConditionBloomFilter::traverseTreeIn(
|
||||
const String & function_name,
|
||||
const ASTPtr & key_ast,
|
||||
const SetPtr & prepared_set,
|
||||
RPNElement & out)
|
||||
{
|
||||
const auto prepared_info = getPreparedSetInfo(prepared_set);
|
||||
return traverseASTIn(function_name, key_ast, prepared_set, prepared_info.type, prepared_info.column, out);
|
||||
}
|
||||
|
||||
bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
|
||||
const String & function_name,
|
||||
const ASTPtr & key_ast,
|
||||
const SetPtr & prepared_set,
|
||||
const RPNBuilderTreeNode & key_node,
|
||||
const ConstSetPtr & prepared_set,
|
||||
const DataTypePtr & type,
|
||||
const ColumnPtr & column,
|
||||
RPNElement & out)
|
||||
{
|
||||
if (header.has(key_ast->getColumnName()))
|
||||
auto key_node_column_name = key_node.getColumnName();
|
||||
|
||||
if (header.has(key_node_column_name))
|
||||
{
|
||||
size_t row_size = column->size();
|
||||
size_t position = header.getPositionByName(key_ast->getColumnName());
|
||||
size_t position = header.getPositionByName(key_node_column_name);
|
||||
const DataTypePtr & index_type = header.getByPosition(position).type;
|
||||
const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type);
|
||||
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size)));
|
||||
@ -355,30 +358,33 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
|
||||
return true;
|
||||
}
|
||||
|
||||
if (const auto * function = key_ast->as<ASTFunction>())
|
||||
if (key_node.isFunction())
|
||||
{
|
||||
auto key_node_function = key_node.toFunctionNode();
|
||||
auto key_node_function_name = key_node_function.getFunctionName();
|
||||
size_t key_node_function_arguments_size = key_node_function.getArgumentsSize();
|
||||
|
||||
WhichDataType which(type);
|
||||
|
||||
if (which.isTuple() && function->name == "tuple")
|
||||
if (which.isTuple() && key_node_function_name == "tuple")
|
||||
{
|
||||
const auto & tuple_column = typeid_cast<const ColumnTuple *>(column.get());
|
||||
const auto & tuple_data_type = typeid_cast<const DataTypeTuple *>(type.get());
|
||||
const ASTs & arguments = typeid_cast<const ASTExpressionList &>(*function->arguments).children;
|
||||
|
||||
if (tuple_data_type->getElements().size() != arguments.size() || tuple_column->getColumns().size() != arguments.size())
|
||||
if (tuple_data_type->getElements().size() != key_node_function_arguments_size || tuple_column->getColumns().size() != key_node_function_arguments_size)
|
||||
throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
bool match_with_subtype = false;
|
||||
const auto & sub_columns = tuple_column->getColumns();
|
||||
const auto & sub_data_types = tuple_data_type->getElements();
|
||||
|
||||
for (size_t index = 0; index < arguments.size(); ++index)
|
||||
match_with_subtype |= traverseASTIn(function_name, arguments[index], nullptr, sub_data_types[index], sub_columns[index], out);
|
||||
for (size_t index = 0; index < key_node_function_arguments_size; ++index)
|
||||
match_with_subtype |= traverseTreeIn(function_name, key_node_function.getArgumentAt(index), nullptr, sub_data_types[index], sub_columns[index], out);
|
||||
|
||||
return match_with_subtype;
|
||||
}
|
||||
|
||||
if (function->name == "arrayElement")
|
||||
if (key_node_function_name == "arrayElement")
|
||||
{
|
||||
/** Try to parse arrayElement for mapKeys index.
|
||||
* It is important to ignore keys like column_map['Key'] IN ('') because if key does not exists in map
|
||||
@ -387,7 +393,6 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
|
||||
* We cannot skip keys that does not exist in map if comparison is with default type value because
|
||||
* that way we skip necessary granules where map key does not exists.
|
||||
*/
|
||||
|
||||
if (!prepared_set)
|
||||
return false;
|
||||
|
||||
@ -400,28 +405,26 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
|
||||
if (set_contain_default_value)
|
||||
return false;
|
||||
|
||||
const auto * column_ast_identifier = function->arguments.get()->children[0].get()->as<ASTIdentifier>();
|
||||
if (!column_ast_identifier)
|
||||
return false;
|
||||
|
||||
const auto & col_name = column_ast_identifier->name();
|
||||
auto map_keys_index_column_name = fmt::format("mapKeys({})", col_name);
|
||||
auto map_values_index_column_name = fmt::format("mapValues({})", col_name);
|
||||
auto first_argument = key_node_function.getArgumentAt(0);
|
||||
const auto column_name = first_argument.getColumnName();
|
||||
auto map_keys_index_column_name = fmt::format("mapKeys({})", column_name);
|
||||
auto map_values_index_column_name = fmt::format("mapValues({})", column_name);
|
||||
|
||||
if (header.has(map_keys_index_column_name))
|
||||
{
|
||||
/// For mapKeys we serialize key argument with bloom filter
|
||||
|
||||
auto & argument = function->arguments.get()->children[1];
|
||||
auto second_argument = key_node_function.getArgumentAt(1);
|
||||
|
||||
if (const auto * literal = argument->as<ASTLiteral>())
|
||||
Field constant_value;
|
||||
DataTypePtr constant_type;
|
||||
|
||||
if (second_argument.tryGetConstant(constant_value, constant_type))
|
||||
{
|
||||
size_t position = header.getPositionByName(map_keys_index_column_name);
|
||||
const DataTypePtr & index_type = header.getByPosition(position).type;
|
||||
|
||||
auto element_key = literal->value;
|
||||
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type);
|
||||
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), element_key)));
|
||||
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), constant_value)));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -459,74 +462,97 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
|
||||
}
|
||||
|
||||
|
||||
static bool indexOfCanUseBloomFilter(const ASTPtr & parent)
|
||||
static bool indexOfCanUseBloomFilter(const RPNBuilderTreeNode * parent)
|
||||
{
|
||||
if (!parent)
|
||||
return true;
|
||||
|
||||
if (!parent->isFunction())
|
||||
return false;
|
||||
|
||||
auto function = parent->toFunctionNode();
|
||||
auto function_name = function.getFunctionName();
|
||||
|
||||
/// `parent` is a function where `indexOf` is located.
|
||||
/// Example: `indexOf(arr, x) = 1`, parent is a function named `equals`.
|
||||
if (const auto * function = parent->as<ASTFunction>())
|
||||
if (function_name == "and")
|
||||
{
|
||||
if (function->name == "and")
|
||||
return true;
|
||||
}
|
||||
else if (function_name == "equals" /// notEquals is not applicable
|
||||
|| function_name == "greater" || function_name == "greaterOrEquals"
|
||||
|| function_name == "less" || function_name == "lessOrEquals")
|
||||
{
|
||||
size_t function_arguments_size = function.getArgumentsSize();
|
||||
if (function_arguments_size != 2)
|
||||
return false;
|
||||
|
||||
/// We don't allow constant expressions like `indexOf(arr, x) = 1 + 0` but it's negligible.
|
||||
|
||||
/// We should return true when the corresponding expression implies that the array contains the element.
|
||||
/// Example: when `indexOf(arr, x)` > 10 is written, it means that arr definitely should contain the element
|
||||
/// (at least at 11th position but it does not matter).
|
||||
|
||||
bool reversed = false;
|
||||
Field constant_value;
|
||||
DataTypePtr constant_type;
|
||||
|
||||
if (function.getArgumentAt(0).tryGetConstant(constant_value, constant_type))
|
||||
{
|
||||
reversed = true;
|
||||
}
|
||||
else if (function.getArgumentAt(1).tryGetConstant(constant_value, constant_type))
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
Field zero(0);
|
||||
bool constant_equal_zero = applyVisitor(FieldVisitorAccurateEquals(), constant_value, zero);
|
||||
|
||||
if (function_name == "equals" && !constant_equal_zero)
|
||||
{
|
||||
/// indexOf(...) = c, c != 0
|
||||
return true;
|
||||
}
|
||||
else if (function->name == "equals" /// notEquals is not applicable
|
||||
|| function->name == "greater" || function->name == "greaterOrEquals"
|
||||
|| function->name == "less" || function->name == "lessOrEquals")
|
||||
else if (function_name == "notEquals" && constant_equal_zero)
|
||||
{
|
||||
if (function->arguments->children.size() != 2)
|
||||
return false;
|
||||
|
||||
/// We don't allow constant expressions like `indexOf(arr, x) = 1 + 0` but it's negligible.
|
||||
|
||||
/// We should return true when the corresponding expression implies that the array contains the element.
|
||||
/// Example: when `indexOf(arr, x)` > 10 is written, it means that arr definitely should contain the element
|
||||
/// (at least at 11th position but it does not matter).
|
||||
|
||||
bool reversed = false;
|
||||
const ASTLiteral * constant = nullptr;
|
||||
|
||||
if (const ASTLiteral * left = function->arguments->children[0]->as<ASTLiteral>())
|
||||
{
|
||||
constant = left;
|
||||
reversed = true;
|
||||
}
|
||||
else if (const ASTLiteral * right = function->arguments->children[1]->as<ASTLiteral>())
|
||||
{
|
||||
constant = right;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
Field zero(0);
|
||||
return (function->name == "equals" /// indexOf(...) = c, c != 0
|
||||
&& !applyVisitor(FieldVisitorAccurateEquals(), constant->value, zero))
|
||||
|| (function->name == "notEquals" /// indexOf(...) != c, c = 0
|
||||
&& applyVisitor(FieldVisitorAccurateEquals(), constant->value, zero))
|
||||
|| (function->name == (reversed ? "less" : "greater") /// indexOf(...) > c, c >= 0
|
||||
&& !applyVisitor(FieldVisitorAccurateLess(), constant->value, zero))
|
||||
|| (function->name == (reversed ? "lessOrEquals" : "greaterOrEquals") /// indexOf(...) >= c, c > 0
|
||||
&& applyVisitor(FieldVisitorAccurateLess(), zero, constant->value));
|
||||
/// indexOf(...) != c, c = 0
|
||||
return true;
|
||||
}
|
||||
else if (function_name == (reversed ? "less" : "greater") && !applyVisitor(FieldVisitorAccurateLess(), constant_value, zero))
|
||||
{
|
||||
/// indexOf(...) > c, c >= 0
|
||||
return true;
|
||||
}
|
||||
else if (function_name == (reversed ? "lessOrEquals" : "greaterOrEquals") && applyVisitor(FieldVisitorAccurateLess(), zero, constant_value))
|
||||
{
|
||||
/// indexOf(...) >= c, c > 0
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
|
||||
bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals(
|
||||
const String & function_name,
|
||||
const ASTPtr & key_ast,
|
||||
const RPNBuilderTreeNode & key_node,
|
||||
const DataTypePtr & value_type,
|
||||
const Field & value_field,
|
||||
RPNElement & out,
|
||||
const ASTPtr & parent)
|
||||
const RPNBuilderTreeNode * parent)
|
||||
{
|
||||
if (header.has(key_ast->getColumnName()))
|
||||
auto key_column_name = key_node.getColumnName();
|
||||
|
||||
if (header.has(key_column_name))
|
||||
{
|
||||
size_t position = header.getPositionByName(key_ast->getColumnName());
|
||||
size_t position = header.getPositionByName(key_column_name);
|
||||
const DataTypePtr & index_type = header.getByPosition(position).type;
|
||||
const auto * array_type = typeid_cast<const DataTypeArray *>(index_type.get());
|
||||
|
||||
@ -602,13 +628,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
|
||||
|
||||
if (function_name == "mapContains" || function_name == "has")
|
||||
{
|
||||
const auto * key_ast_identifier = key_ast.get()->as<const ASTIdentifier>();
|
||||
if (!key_ast_identifier)
|
||||
return false;
|
||||
|
||||
const auto & col_name = key_ast_identifier->name();
|
||||
auto map_keys_index_column_name = fmt::format("mapKeys({})", col_name);
|
||||
|
||||
auto map_keys_index_column_name = fmt::format("mapKeys({})", key_column_name);
|
||||
if (!header.has(map_keys_index_column_name))
|
||||
return false;
|
||||
|
||||
@ -629,29 +649,32 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
|
||||
return true;
|
||||
}
|
||||
|
||||
if (const auto * function = key_ast->as<ASTFunction>())
|
||||
if (key_node.isFunction())
|
||||
{
|
||||
WhichDataType which(value_type);
|
||||
|
||||
if (which.isTuple() && function->name == "tuple")
|
||||
auto key_node_function = key_node.toFunctionNode();
|
||||
auto key_node_function_name = key_node_function.getFunctionName();
|
||||
size_t key_node_function_arguments_size = key_node_function.getArgumentsSize();
|
||||
|
||||
if (which.isTuple() && key_node_function_name == "tuple")
|
||||
{
|
||||
const Tuple & tuple = value_field.get<const Tuple &>();
|
||||
const auto * value_tuple_data_type = typeid_cast<const DataTypeTuple *>(value_type.get());
|
||||
const ASTs & arguments = typeid_cast<const ASTExpressionList &>(*function->arguments).children;
|
||||
|
||||
if (tuple.size() != arguments.size())
|
||||
if (tuple.size() != key_node_function_arguments_size)
|
||||
throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
bool match_with_subtype = false;
|
||||
const DataTypes & subtypes = value_tuple_data_type->getElements();
|
||||
|
||||
for (size_t index = 0; index < tuple.size(); ++index)
|
||||
match_with_subtype |= traverseASTEquals(function_name, arguments[index], subtypes[index], tuple[index], out, key_ast);
|
||||
match_with_subtype |= traverseTreeEquals(function_name, key_node_function.getArgumentAt(index), subtypes[index], tuple[index], out, &key_node);
|
||||
|
||||
return match_with_subtype;
|
||||
}
|
||||
|
||||
if (function->name == "arrayElement" && (function_name == "equals" || function_name == "notEquals"))
|
||||
if (key_node_function_name == "arrayElement" && (function_name == "equals" || function_name == "notEquals"))
|
||||
{
|
||||
/** Try to parse arrayElement for mapKeys index.
|
||||
* It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map
|
||||
@ -663,27 +686,22 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
|
||||
if (value_field == value_type->getDefault())
|
||||
return false;
|
||||
|
||||
const auto * column_ast_identifier = function->arguments.get()->children[0].get()->as<ASTIdentifier>();
|
||||
if (!column_ast_identifier)
|
||||
return false;
|
||||
auto first_argument = key_node_function.getArgumentAt(0);
|
||||
const auto column_name = first_argument.getColumnName();
|
||||
|
||||
const auto & col_name = column_ast_identifier->name();
|
||||
|
||||
auto map_keys_index_column_name = fmt::format("mapKeys({})", col_name);
|
||||
auto map_values_index_column_name = fmt::format("mapValues({})", col_name);
|
||||
auto map_keys_index_column_name = fmt::format("mapKeys({})", column_name);
|
||||
auto map_values_index_column_name = fmt::format("mapValues({})", column_name);
|
||||
|
||||
size_t position = 0;
|
||||
Field const_value = value_field;
|
||||
DataTypePtr const_type;
|
||||
|
||||
if (header.has(map_keys_index_column_name))
|
||||
{
|
||||
position = header.getPositionByName(map_keys_index_column_name);
|
||||
auto second_argument = key_node_function.getArgumentAt(1);
|
||||
|
||||
auto & argument = function->arguments.get()->children[1];
|
||||
|
||||
if (const auto * literal = argument->as<ASTLiteral>())
|
||||
const_value = literal->value;
|
||||
else
|
||||
if (!second_argument.tryGetConstant(const_value, const_type))
|
||||
return false;
|
||||
}
|
||||
else if (header.has(map_values_index_column_name))
|
||||
@ -708,23 +726,4 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
|
||||
return false;
|
||||
}
|
||||
|
||||
SetPtr MergeTreeIndexConditionBloomFilter::getPreparedSet(const ASTPtr & node)
|
||||
{
|
||||
if (header.has(node->getColumnName()))
|
||||
{
|
||||
const auto & column_and_type = header.getByName(node->getColumnName());
|
||||
auto set_key = getPreparedSetKey(node, column_and_type.type);
|
||||
if (auto prepared_set = query_info.prepared_sets->get(set_key))
|
||||
return prepared_set;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto & set : query_info.prepared_sets->getByTreeHash(node->getTreeHash()))
|
||||
if (set->hasExplicitSetElements())
|
||||
return set;
|
||||
}
|
||||
|
||||
return DB::SetPtr();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -62,35 +62,27 @@ private:
|
||||
const size_t hash_functions;
|
||||
std::vector<RPNElement> rpn;
|
||||
|
||||
SetPtr getPreparedSet(const ASTPtr & node);
|
||||
|
||||
bool mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const;
|
||||
|
||||
bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
|
||||
bool extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out);
|
||||
|
||||
bool traverseFunction(const ASTPtr & node, Block & block_with_constants, RPNElement & out, const ASTPtr & parent);
|
||||
bool traverseFunction(const RPNBuilderTreeNode & node, RPNElement & out, const RPNBuilderTreeNode * parent);
|
||||
|
||||
bool traverseASTIn(
|
||||
bool traverseTreeIn(
|
||||
const String & function_name,
|
||||
const ASTPtr & key_ast,
|
||||
const SetPtr & prepared_set,
|
||||
RPNElement & out);
|
||||
|
||||
bool traverseASTIn(
|
||||
const String & function_name,
|
||||
const ASTPtr & key_ast,
|
||||
const SetPtr & prepared_set,
|
||||
const RPNBuilderTreeNode & key_node,
|
||||
const ConstSetPtr & prepared_set,
|
||||
const DataTypePtr & type,
|
||||
const ColumnPtr & column,
|
||||
RPNElement & out);
|
||||
|
||||
bool traverseASTEquals(
|
||||
bool traverseTreeEquals(
|
||||
const String & function_name,
|
||||
const ASTPtr & key_ast,
|
||||
const RPNBuilderTreeNode & key_node,
|
||||
const DataTypePtr & value_type,
|
||||
const Field & value_field,
|
||||
RPNElement & out,
|
||||
const ASTPtr & parent);
|
||||
const RPNBuilderTreeNode * parent);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -11,9 +11,11 @@
|
||||
#include <Interpreters/misc.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/RPNBuilder.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Core/Defines.h>
|
||||
|
||||
#include <Poco/Logger.h>
|
||||
@ -148,13 +150,22 @@ MergeTreeConditionFullText::MergeTreeConditionFullText(
|
||||
, token_extractor(token_extactor_)
|
||||
, prepared_sets(query_info.prepared_sets)
|
||||
{
|
||||
rpn = std::move(
|
||||
RPNBuilder<RPNElement>(
|
||||
query_info, context,
|
||||
[this] (const ASTPtr & node, ContextPtr /* context */, Block & block_with_constants, RPNElement & out) -> bool
|
||||
{
|
||||
return this->traverseAtomAST(node, block_with_constants, out);
|
||||
}).extractRPN());
|
||||
ASTPtr filter_node = buildFilterNode(query_info.query);
|
||||
|
||||
if (!filter_node)
|
||||
{
|
||||
rpn.push_back(RPNElement::FUNCTION_UNKNOWN);
|
||||
return;
|
||||
}
|
||||
|
||||
auto block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context);
|
||||
RPNBuilder<RPNElement> builder(
|
||||
filter_node,
|
||||
context,
|
||||
std::move(block_with_constants),
|
||||
query_info.prepared_sets,
|
||||
[&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); });
|
||||
rpn = std::move(builder).extractRPN();
|
||||
}
|
||||
|
||||
bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
|
||||
@ -306,13 +317,13 @@ bool MergeTreeConditionFullText::getKey(const std::string & key_column_name, siz
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MergeTreeConditionFullText::traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out)
|
||||
bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out)
|
||||
{
|
||||
{
|
||||
Field const_value;
|
||||
DataTypePtr const_type;
|
||||
|
||||
if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type))
|
||||
if (node.tryGetConstant(const_value, const_type))
|
||||
{
|
||||
/// Check constant like in KeyCondition
|
||||
if (const_value.getType() == Field::Types::UInt64
|
||||
@ -329,53 +340,56 @@ bool MergeTreeConditionFullText::traverseAtomAST(const ASTPtr & node, Block & bl
|
||||
}
|
||||
}
|
||||
|
||||
if (const auto * function = node->as<ASTFunction>())
|
||||
if (node.isFunction())
|
||||
{
|
||||
if (!function->arguments)
|
||||
auto function_node = node.toFunctionNode();
|
||||
auto function_name = function_node.getFunctionName();
|
||||
|
||||
size_t arguments_size = function_node.getArgumentsSize();
|
||||
if (arguments_size != 2)
|
||||
return false;
|
||||
|
||||
const ASTs & arguments = function->arguments->children;
|
||||
auto left_argument = function_node.getArgumentAt(0);
|
||||
auto right_argument = function_node.getArgumentAt(1);
|
||||
|
||||
if (arguments.size() != 2)
|
||||
return false;
|
||||
|
||||
if (functionIsInOrGlobalInOperator(function->name))
|
||||
if (functionIsInOrGlobalInOperator(function_name))
|
||||
{
|
||||
if (tryPrepareSetBloomFilter(arguments, out))
|
||||
if (tryPrepareSetBloomFilter(left_argument, right_argument, out))
|
||||
{
|
||||
if (function->name == "notIn")
|
||||
if (function_name == "notIn")
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_NOT_IN;
|
||||
return true;
|
||||
}
|
||||
else if (function->name == "in")
|
||||
else if (function_name == "in")
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_IN;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (function->name == "equals" ||
|
||||
function->name == "notEquals" ||
|
||||
function->name == "has" ||
|
||||
function->name == "mapContains" ||
|
||||
function->name == "like" ||
|
||||
function->name == "notLike" ||
|
||||
function->name == "hasToken" ||
|
||||
function->name == "startsWith" ||
|
||||
function->name == "endsWith" ||
|
||||
function->name == "multiSearchAny")
|
||||
else if (function_name == "equals" ||
|
||||
function_name == "notEquals" ||
|
||||
function_name == "has" ||
|
||||
function_name == "mapContains" ||
|
||||
function_name == "like" ||
|
||||
function_name == "notLike" ||
|
||||
function_name == "hasToken" ||
|
||||
function_name == "startsWith" ||
|
||||
function_name == "endsWith" ||
|
||||
function_name == "multiSearchAny")
|
||||
{
|
||||
Field const_value;
|
||||
DataTypePtr const_type;
|
||||
if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type))
|
||||
|
||||
if (right_argument.tryGetConstant(const_value, const_type))
|
||||
{
|
||||
if (traverseASTEquals(function->name, arguments[0], const_type, const_value, out))
|
||||
if (traverseTreeEquals(function_name, left_argument, const_type, const_value, out))
|
||||
return true;
|
||||
}
|
||||
else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type) && (function->name == "equals" || function->name == "notEquals"))
|
||||
else if (left_argument.tryGetConstant(const_value, const_type) && (function_name == "equals" || function_name == "notEquals"))
|
||||
{
|
||||
if (traverseASTEquals(function->name, arguments[1], const_type, const_value, out))
|
||||
if (traverseTreeEquals(function_name, right_argument, const_type, const_value, out))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -384,9 +398,9 @@ bool MergeTreeConditionFullText::traverseAtomAST(const ASTPtr & node, Block & bl
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MergeTreeConditionFullText::traverseASTEquals(
|
||||
bool MergeTreeConditionFullText::traverseTreeEquals(
|
||||
const String & function_name,
|
||||
const ASTPtr & key_ast,
|
||||
const RPNBuilderTreeNode & key_node,
|
||||
const DataTypePtr & value_type,
|
||||
const Field & value_field,
|
||||
RPNElement & out)
|
||||
@ -397,13 +411,17 @@ bool MergeTreeConditionFullText::traverseASTEquals(
|
||||
|
||||
Field const_value = value_field;
|
||||
|
||||
auto column_name = key_node.getColumnName();
|
||||
size_t key_column_num = 0;
|
||||
bool key_exists = getKey(key_ast->getColumnName(), key_column_num);
|
||||
bool map_key_exists = getKey(fmt::format("mapKeys({})", key_ast->getColumnName()), key_column_num);
|
||||
bool key_exists = getKey(column_name, key_column_num);
|
||||
bool map_key_exists = getKey(fmt::format("mapKeys({})", column_name), key_column_num);
|
||||
|
||||
if (const auto * function = key_ast->as<ASTFunction>())
|
||||
if (key_node.isFunction())
|
||||
{
|
||||
if (function->name == "arrayElement")
|
||||
auto key_function_node = key_node.toFunctionNode();
|
||||
auto key_function_node_function_name = key_function_node.getFunctionName();
|
||||
|
||||
if (key_function_node_function_name == "arrayElement")
|
||||
{
|
||||
/** Try to parse arrayElement for mapKeys index.
|
||||
* It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map
|
||||
@ -415,11 +433,8 @@ bool MergeTreeConditionFullText::traverseASTEquals(
|
||||
if (value_field == value_type->getDefault())
|
||||
return false;
|
||||
|
||||
const auto * column_ast_identifier = function->arguments.get()->children[0].get()->as<ASTIdentifier>();
|
||||
if (!column_ast_identifier)
|
||||
return false;
|
||||
|
||||
const auto & map_column_name = column_ast_identifier->name();
|
||||
auto first_argument = key_function_node.getArgumentAt(0);
|
||||
const auto map_column_name = first_argument.getColumnName();
|
||||
|
||||
size_t map_keys_key_column_num = 0;
|
||||
auto map_keys_index_column_name = fmt::format("mapKeys({})", map_column_name);
|
||||
@ -431,12 +446,11 @@ bool MergeTreeConditionFullText::traverseASTEquals(
|
||||
|
||||
if (map_keys_exists)
|
||||
{
|
||||
auto & argument = function->arguments.get()->children[1];
|
||||
auto second_argument = key_function_node.getArgumentAt(1);
|
||||
DataTypePtr const_type;
|
||||
|
||||
if (const auto * literal = argument->as<ASTLiteral>())
|
||||
if (second_argument.tryGetConstant(const_value, const_type))
|
||||
{
|
||||
auto element_key = literal->value;
|
||||
const_value = element_key;
|
||||
key_column_num = map_keys_key_column_num;
|
||||
key_exists = true;
|
||||
}
|
||||
@ -567,23 +581,24 @@ bool MergeTreeConditionFullText::traverseASTEquals(
|
||||
}
|
||||
|
||||
bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
|
||||
const ASTs & args,
|
||||
const RPNBuilderTreeNode & left_argument,
|
||||
const RPNBuilderTreeNode & right_argument,
|
||||
RPNElement & out)
|
||||
{
|
||||
const ASTPtr & left_arg = args[0];
|
||||
const ASTPtr & right_arg = args[1];
|
||||
|
||||
std::vector<KeyTuplePositionMapping> key_tuple_mapping;
|
||||
DataTypes data_types;
|
||||
|
||||
const auto * left_arg_tuple = typeid_cast<const ASTFunction *>(left_arg.get());
|
||||
if (left_arg_tuple && left_arg_tuple->name == "tuple")
|
||||
auto left_argument_function_node_optional = left_argument.toFunctionNodeOrNull();
|
||||
|
||||
if (left_argument_function_node_optional && left_argument_function_node_optional->getFunctionName() == "tuple")
|
||||
{
|
||||
const auto & tuple_elements = left_arg_tuple->arguments->children;
|
||||
for (size_t i = 0; i < tuple_elements.size(); ++i)
|
||||
const auto & left_argument_function_node = *left_argument_function_node_optional;
|
||||
size_t left_argument_function_node_arguments_size = left_argument_function_node.getArgumentsSize();
|
||||
|
||||
for (size_t i = 0; i < left_argument_function_node_arguments_size; ++i)
|
||||
{
|
||||
size_t key = 0;
|
||||
if (getKey(tuple_elements[i]->getColumnName(), key))
|
||||
if (getKey(left_argument_function_node.getArgumentAt(i).getColumnName(), key))
|
||||
{
|
||||
key_tuple_mapping.emplace_back(i, key);
|
||||
data_types.push_back(index_data_types[key]);
|
||||
@ -593,7 +608,7 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
|
||||
else
|
||||
{
|
||||
size_t key = 0;
|
||||
if (getKey(left_arg->getColumnName(), key))
|
||||
if (getKey(left_argument.getColumnName(), key))
|
||||
{
|
||||
key_tuple_mapping.emplace_back(0, key);
|
||||
data_types.push_back(index_data_types[key]);
|
||||
@ -603,19 +618,10 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
|
||||
if (key_tuple_mapping.empty())
|
||||
return false;
|
||||
|
||||
PreparedSetKey set_key;
|
||||
if (typeid_cast<const ASTSubquery *>(right_arg.get()) || typeid_cast<const ASTIdentifier *>(right_arg.get()))
|
||||
set_key = PreparedSetKey::forSubquery(*right_arg);
|
||||
else
|
||||
set_key = PreparedSetKey::forLiteral(*right_arg, data_types);
|
||||
|
||||
auto prepared_set = prepared_sets->get(set_key);
|
||||
auto prepared_set = right_argument.tryGetPreparedSet(data_types);
|
||||
if (!prepared_set)
|
||||
return false;
|
||||
|
||||
if (!prepared_set->hasExplicitSetElements())
|
||||
return false;
|
||||
|
||||
for (const auto & data_type : prepared_set->getDataTypes())
|
||||
if (data_type->getTypeId() != TypeIndex::String && data_type->getTypeId() != TypeIndex::FixedString)
|
||||
return false;
|
||||
|
@ -122,17 +122,17 @@ private:
|
||||
|
||||
using RPN = std::vector<RPNElement>;
|
||||
|
||||
bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
|
||||
bool extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out);
|
||||
|
||||
bool traverseASTEquals(
|
||||
bool traverseTreeEquals(
|
||||
const String & function_name,
|
||||
const ASTPtr & key_ast,
|
||||
const RPNBuilderTreeNode & key_node,
|
||||
const DataTypePtr & value_type,
|
||||
const Field & value_field,
|
||||
RPNElement & out);
|
||||
|
||||
bool getKey(const std::string & key_column_name, size_t & key_column_num);
|
||||
bool tryPrepareSetBloomFilter(const ASTs & args, RPNElement & out);
|
||||
bool tryPrepareSetBloomFilter(const RPNBuilderTreeNode & left_argument, const RPNBuilderTreeNode & right_argument, RPNElement & out);
|
||||
|
||||
static bool createFunctionEqualsCondition(
|
||||
RPNElement & out, const Field & value, const BloomFilterParameters & params, TokenExtractorPtr token_extractor);
|
||||
|
@ -74,8 +74,9 @@ void MergeTreeIndexGranuleSet::serializeBinary(WriteBuffer & ostr) const
|
||||
auto serialization = type->getDefaultSerialization();
|
||||
ISerialization::SerializeBinaryBulkStatePtr state;
|
||||
|
||||
serialization->serializeBinaryBulkStatePrefix(settings, state);
|
||||
serialization->serializeBinaryBulkWithMultipleStreams(*block.getByPosition(i).column, 0, size(), settings, state);
|
||||
const auto & column = *block.getByPosition(i).column;
|
||||
serialization->serializeBinaryBulkStatePrefix(column, settings, state);
|
||||
serialization->serializeBinaryBulkWithMultipleStreams(column, 0, size(), settings, state);
|
||||
serialization->serializeBinaryBulkStateSuffix(settings, state);
|
||||
}
|
||||
}
|
||||
|
47
src/Storages/MergeTree/MergeTreeIndexUtils.cpp
Normal file
47
src/Storages/MergeTree/MergeTreeIndexUtils.cpp
Normal file
@ -0,0 +1,47 @@
|
||||
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
|
||||
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr buildFilterNode(const ASTPtr & select_query, ASTs additional_filters)
|
||||
{
|
||||
auto & select_query_typed = select_query->as<ASTSelectQuery &>();
|
||||
|
||||
ASTs filters;
|
||||
if (select_query_typed.where())
|
||||
filters.push_back(select_query_typed.where());
|
||||
|
||||
if (select_query_typed.prewhere())
|
||||
filters.push_back(select_query_typed.prewhere());
|
||||
|
||||
filters.insert(filters.end(), additional_filters.begin(), additional_filters.end());
|
||||
|
||||
if (filters.empty())
|
||||
return nullptr;
|
||||
|
||||
ASTPtr filter_node;
|
||||
|
||||
if (filters.size() == 1)
|
||||
{
|
||||
filter_node = filters.front();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto function = std::make_shared<ASTFunction>();
|
||||
|
||||
function->name = "and";
|
||||
function->arguments = std::make_shared<ASTExpressionList>();
|
||||
function->children.push_back(function->arguments);
|
||||
function->arguments->children = std::move(filters);
|
||||
|
||||
filter_node = std::move(function);
|
||||
}
|
||||
|
||||
return filter_node;
|
||||
}
|
||||
|
||||
}
|
13
src/Storages/MergeTree/MergeTreeIndexUtils.h
Normal file
13
src/Storages/MergeTree/MergeTreeIndexUtils.h
Normal file
@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Build AST filter node for index analysis from WHERE and PREWHERE sections of select query and additional filters.
|
||||
* If select query does not have WHERE and PREWHERE and additional filters are empty null is returned.
|
||||
*/
|
||||
ASTPtr buildFilterNode(const ASTPtr & select_query, ASTs additional_filters = {});
|
||||
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
#include <Storages/MergeTree/MergeTreeSink.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <Interpreters/PartLog.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -56,8 +56,9 @@ struct MergeTreeSink::DelayedChunk
|
||||
void MergeTreeSink::consume(Chunk chunk)
|
||||
{
|
||||
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
|
||||
if (!storage_snapshot->object_columns.empty())
|
||||
convertDynamicColumnsToTuples(block, storage_snapshot);
|
||||
|
||||
deduceTypesOfObjectColumns(storage_snapshot, block);
|
||||
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context);
|
||||
|
||||
using DelayedPartitions = std::vector<MergeTreeSink::DelayedChunk::Partition>;
|
||||
|
417
src/Storages/MergeTree/RPNBuilder.cpp
Normal file
417
src/Storages/MergeTree/RPNBuilder.cpp
Normal file
@ -0,0 +1,417 @@
|
||||
#include <Storages/MergeTree/RPNBuilder.h>
|
||||
|
||||
#include <Common/FieldVisitorToString.h>
|
||||
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
|
||||
#include <DataTypes/FieldToDataType.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnSet.h>
|
||||
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
#include <Storages/KeyDescription.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & out, bool legacy = false)
|
||||
{
|
||||
switch (node.type)
|
||||
{
|
||||
case ActionsDAG::ActionType::INPUT:
|
||||
writeString(node.result_name, out);
|
||||
break;
|
||||
case ActionsDAG::ActionType::COLUMN:
|
||||
{
|
||||
/// If it was created from ASTLiteral, then result_name can be an alias.
|
||||
/// We need to convert value back to string here.
|
||||
if (const auto * column_const = typeid_cast<const ColumnConst *>(node.column.get()))
|
||||
writeString(applyVisitor(FieldVisitorToString(), column_const->getField()), out);
|
||||
/// It may be possible that column is ColumnSet
|
||||
else
|
||||
writeString(node.result_name, out);
|
||||
break;
|
||||
}
|
||||
case ActionsDAG::ActionType::ALIAS:
|
||||
appendColumnNameWithoutAlias(*node.children.front(), out, legacy);
|
||||
break;
|
||||
case ActionsDAG::ActionType::ARRAY_JOIN:
|
||||
writeCString("arrayJoin(", out);
|
||||
appendColumnNameWithoutAlias(*node.children.front(), out, legacy);
|
||||
writeChar(')', out);
|
||||
break;
|
||||
case ActionsDAG::ActionType::FUNCTION:
|
||||
{
|
||||
auto name = node.function_base->getName();
|
||||
if (legacy && name == "modulo")
|
||||
writeCString("moduleLegacy", out);
|
||||
else
|
||||
writeString(name, out);
|
||||
|
||||
writeChar('(', out);
|
||||
bool first = true;
|
||||
for (const auto * arg : node.children)
|
||||
{
|
||||
if (!first)
|
||||
writeCString(", ", out);
|
||||
first = false;
|
||||
|
||||
appendColumnNameWithoutAlias(*arg, out, legacy);
|
||||
}
|
||||
writeChar(')', out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String getColumnNameWithoutAlias(const ActionsDAG::Node & node, bool legacy = false)
|
||||
{
|
||||
WriteBufferFromOwnString out;
|
||||
appendColumnNameWithoutAlias(node, out, legacy);
|
||||
return std::move(out.str());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
RPNBuilderTreeContext::RPNBuilderTreeContext(ContextPtr query_context_)
|
||||
: query_context(std::move(query_context_))
|
||||
{}
|
||||
|
||||
RPNBuilderTreeContext::RPNBuilderTreeContext(ContextPtr query_context_, Block block_with_constants_, PreparedSetsPtr prepared_sets_)
|
||||
: query_context(std::move(query_context_))
|
||||
, block_with_constants(std::move(block_with_constants_))
|
||||
, prepared_sets(std::move(prepared_sets_))
|
||||
{}
|
||||
|
||||
RPNBuilderTreeNode::RPNBuilderTreeNode(const ActionsDAG::Node * dag_node_, RPNBuilderTreeContext & tree_context_)
|
||||
: dag_node(dag_node_)
|
||||
, tree_context(tree_context_)
|
||||
{
|
||||
assert(dag_node);
|
||||
}
|
||||
|
||||
RPNBuilderTreeNode::RPNBuilderTreeNode(const IAST * ast_node_, RPNBuilderTreeContext & tree_context_)
|
||||
: ast_node(ast_node_)
|
||||
, tree_context(tree_context_)
|
||||
{
|
||||
assert(ast_node);
|
||||
}
|
||||
|
||||
std::string RPNBuilderTreeNode::getColumnName() const
|
||||
{
|
||||
if (ast_node)
|
||||
return ast_node->getColumnNameWithoutAlias();
|
||||
else
|
||||
return getColumnNameWithoutAlias(*dag_node);
|
||||
}
|
||||
|
||||
std::string RPNBuilderTreeNode::getColumnNameWithModuloLegacy() const
|
||||
{
|
||||
if (ast_node)
|
||||
{
|
||||
auto adjusted_ast = ast_node->clone();
|
||||
KeyDescription::moduloToModuloLegacyRecursive(adjusted_ast);
|
||||
return adjusted_ast->getColumnNameWithoutAlias();
|
||||
}
|
||||
else
|
||||
{
|
||||
return getColumnNameWithoutAlias(*dag_node, true /*legacy*/);
|
||||
}
|
||||
}
|
||||
|
||||
bool RPNBuilderTreeNode::isFunction() const
|
||||
{
|
||||
if (ast_node)
|
||||
return typeid_cast<const ASTFunction *>(ast_node);
|
||||
else
|
||||
return dag_node->type == ActionsDAG::ActionType::FUNCTION;
|
||||
}
|
||||
|
||||
bool RPNBuilderTreeNode::isConstant() const
|
||||
{
|
||||
if (ast_node)
|
||||
{
|
||||
bool is_literal = typeid_cast<const ASTLiteral *>(ast_node);
|
||||
if (is_literal)
|
||||
return true;
|
||||
|
||||
String column_name = ast_node->getColumnName();
|
||||
const auto & block_with_constants = tree_context.getBlockWithConstants();
|
||||
|
||||
if (block_with_constants.has(column_name) && isColumnConst(*block_with_constants.getByName(column_name).column))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return dag_node->column && isColumnConst(*dag_node->column);
|
||||
}
|
||||
}
|
||||
|
||||
ColumnWithTypeAndName RPNBuilderTreeNode::getConstantColumn() const
|
||||
{
|
||||
if (!isConstant())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "RPNBuilderTree node is not a constant");
|
||||
|
||||
ColumnWithTypeAndName result;
|
||||
|
||||
if (ast_node)
|
||||
{
|
||||
const auto * literal = assert_cast<const ASTLiteral *>(ast_node);
|
||||
if (literal)
|
||||
{
|
||||
result.type = applyVisitor(FieldToDataType(), literal->value);
|
||||
result.column = result.type->createColumnConst(0, literal->value);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
String column_name = ast_node->getColumnName();
|
||||
const auto & block_with_constants = tree_context.getBlockWithConstants();
|
||||
|
||||
return block_with_constants.getByName(column_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
result.type = dag_node->result_type;
|
||||
result.column = dag_node->column;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool RPNBuilderTreeNode::tryGetConstant(Field & output_value, DataTypePtr & output_type) const
|
||||
{
|
||||
if (ast_node)
|
||||
{
|
||||
// Constant expr should use alias names if any
|
||||
String column_name = ast_node->getColumnName();
|
||||
const auto & block_with_constants = tree_context.getBlockWithConstants();
|
||||
|
||||
if (const auto * literal = ast_node->as<ASTLiteral>())
|
||||
{
|
||||
/// By default block_with_constants has only one column named "_dummy".
|
||||
/// If block contains only constants it's may not be preprocessed by
|
||||
// ExpressionAnalyzer, so try to look up in the default column.
|
||||
if (!block_with_constants.has(column_name))
|
||||
column_name = "_dummy";
|
||||
|
||||
/// Simple literal
|
||||
output_value = literal->value;
|
||||
output_type = block_with_constants.getByName(column_name).type;
|
||||
|
||||
/// If constant is not Null, we can assume it's type is not Nullable as well.
|
||||
if (!output_value.isNull())
|
||||
output_type = removeNullable(output_type);
|
||||
|
||||
return true;
|
||||
}
|
||||
else if (block_with_constants.has(column_name) &&
|
||||
isColumnConst(*block_with_constants.getByName(column_name).column))
|
||||
{
|
||||
/// An expression which is dependent on constants only
|
||||
const auto & constant_column = block_with_constants.getByName(column_name);
|
||||
output_value = (*constant_column.column)[0];
|
||||
output_type = constant_column.type;
|
||||
|
||||
if (!output_value.isNull())
|
||||
output_type = removeNullable(output_type);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dag_node->column && isColumnConst(*dag_node->column))
|
||||
{
|
||||
output_value = (*dag_node->column)[0];
|
||||
output_type = dag_node->result_type;
|
||||
|
||||
if (!output_value.isNull())
|
||||
output_type = removeNullable(output_type);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
ConstSetPtr tryGetSetFromDAGNode(const ActionsDAG::Node * dag_node)
|
||||
{
|
||||
if (!dag_node->column)
|
||||
return {};
|
||||
|
||||
const IColumn * column = dag_node->column.get();
|
||||
if (const auto * column_const = typeid_cast<const ColumnConst *>(column))
|
||||
column = &column_const->getDataColumn();
|
||||
|
||||
if (const auto * column_set = typeid_cast<const ColumnSet *>(column))
|
||||
{
|
||||
auto set = column_set->getData();
|
||||
|
||||
if (set->isCreated())
|
||||
return set;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ConstSetPtr RPNBuilderTreeNode::tryGetPreparedSet() const
|
||||
{
|
||||
const auto & prepared_sets = getTreeContext().getPreparedSets();
|
||||
|
||||
if (ast_node && prepared_sets)
|
||||
{
|
||||
auto prepared_sets_with_same_hash = prepared_sets->getByTreeHash(ast_node->getTreeHash());
|
||||
for (auto & set : prepared_sets_with_same_hash)
|
||||
if (set->isCreated())
|
||||
return set;
|
||||
}
|
||||
else if (dag_node)
|
||||
{
|
||||
return tryGetSetFromDAGNode(dag_node);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
ConstSetPtr RPNBuilderTreeNode::tryGetPreparedSet(const DataTypes & data_types) const
|
||||
{
|
||||
const auto & prepared_sets = getTreeContext().getPreparedSets();
|
||||
|
||||
if (prepared_sets && ast_node)
|
||||
{
|
||||
if (ast_node->as<ASTSubquery>() || ast_node->as<ASTTableIdentifier>())
|
||||
return prepared_sets->get(PreparedSetKey::forSubquery(*ast_node));
|
||||
|
||||
return prepared_sets->get(PreparedSetKey::forLiteral(*ast_node, data_types));
|
||||
}
|
||||
else if (dag_node)
|
||||
{
|
||||
return tryGetSetFromDAGNode(dag_node);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ConstSetPtr RPNBuilderTreeNode::tryGetPreparedSet(
|
||||
const std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
|
||||
const DataTypes & data_types) const
|
||||
{
|
||||
const auto & prepared_sets = getTreeContext().getPreparedSets();
|
||||
|
||||
if (prepared_sets && ast_node)
|
||||
{
|
||||
if (ast_node->as<ASTSubquery>() || ast_node->as<ASTTableIdentifier>())
|
||||
return prepared_sets->get(PreparedSetKey::forSubquery(*ast_node));
|
||||
|
||||
/// We have `PreparedSetKey::forLiteral` but it is useless here as we don't have enough information
|
||||
/// about types in left argument of the IN operator. Instead, we manually iterate through all the sets
|
||||
/// and find the one for the right arg based on the AST structure (getTreeHash), after that we check
|
||||
/// that the types it was prepared with are compatible with the types of the primary key.
|
||||
auto types_match = [&indexes_mapping, &data_types](const SetPtr & candidate_set)
|
||||
{
|
||||
assert(indexes_mapping.size() == data_types.size());
|
||||
|
||||
for (size_t i = 0; i < indexes_mapping.size(); ++i)
|
||||
{
|
||||
if (!candidate_set->areTypesEqual(indexes_mapping[i].tuple_index, data_types[i]))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
auto tree_hash = ast_node->getTreeHash();
|
||||
for (const auto & set : prepared_sets->getByTreeHash(tree_hash))
|
||||
{
|
||||
if (types_match(set))
|
||||
return set;
|
||||
}
|
||||
}
|
||||
else if (dag_node->column)
|
||||
{
|
||||
return tryGetSetFromDAGNode(dag_node);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
RPNBuilderFunctionTreeNode RPNBuilderTreeNode::toFunctionNode() const
|
||||
{
|
||||
if (!isFunction())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "RPNBuilderTree node is not a function");
|
||||
|
||||
if (this->ast_node)
|
||||
return RPNBuilderFunctionTreeNode(this->ast_node, tree_context);
|
||||
else
|
||||
return RPNBuilderFunctionTreeNode(this->dag_node, tree_context);
|
||||
}
|
||||
|
||||
std::optional<RPNBuilderFunctionTreeNode> RPNBuilderTreeNode::toFunctionNodeOrNull() const
|
||||
{
|
||||
if (!isFunction())
|
||||
return {};
|
||||
|
||||
if (this->ast_node)
|
||||
return RPNBuilderFunctionTreeNode(this->ast_node, tree_context);
|
||||
else
|
||||
return RPNBuilderFunctionTreeNode(this->dag_node, tree_context);
|
||||
}
|
||||
|
||||
std::string RPNBuilderFunctionTreeNode::getFunctionName() const
|
||||
{
|
||||
if (ast_node)
|
||||
return assert_cast<const ASTFunction *>(ast_node)->name;
|
||||
else
|
||||
return dag_node->function_base->getName();
|
||||
}
|
||||
|
||||
size_t RPNBuilderFunctionTreeNode::getArgumentsSize() const
|
||||
{
|
||||
if (ast_node)
|
||||
{
|
||||
const auto * ast_function = assert_cast<const ASTFunction *>(ast_node);
|
||||
return ast_function->arguments ? ast_function->arguments->children.size() : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return dag_node->children.size();
|
||||
}
|
||||
}
|
||||
|
||||
RPNBuilderTreeNode RPNBuilderFunctionTreeNode::getArgumentAt(size_t index) const
|
||||
{
|
||||
if (ast_node)
|
||||
{
|
||||
const auto * ast_function = assert_cast<const ASTFunction *>(ast_node);
|
||||
return RPNBuilderTreeNode(ast_function->arguments->children[index].get(), tree_context);
|
||||
}
|
||||
else
|
||||
{
|
||||
return RPNBuilderTreeNode(dag_node->children[index], tree_context);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,111 +1,266 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Storages/MergeTree/KeyCondition.h>
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/Set.h>
|
||||
#include <Interpreters/PreparedSets.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Builds reverse polish notation
|
||||
template <typename RPNElement>
|
||||
class RPNBuilder : WithContext
|
||||
/** Context of RPNBuilderTree.
|
||||
*
|
||||
* For AST tree context, precalculated block with constants and prepared sets are required for index analysis.
|
||||
* For DAG tree precalculated block with constants and prepared sets are not required, because constants and sets already
|
||||
* calculated inside COLUMN actions dag node.
|
||||
*/
|
||||
class RPNBuilderTreeContext
|
||||
{
|
||||
public:
|
||||
using RPN = std::vector<RPNElement>;
|
||||
using AtomFromASTFunc = std::function<
|
||||
bool(const ASTPtr & node, ContextPtr context, Block & block_with_constants, RPNElement & out)>;
|
||||
/// Construct RPNBuilderTreeContext for ActionsDAG tree
|
||||
explicit RPNBuilderTreeContext(ContextPtr query_context_);
|
||||
|
||||
RPNBuilder(const SelectQueryInfo & query_info, ContextPtr context_, const AtomFromASTFunc & atom_from_ast_)
|
||||
: WithContext(context_), atom_from_ast(atom_from_ast_)
|
||||
/// Construct RPNBuilderTreeContext for AST tree
|
||||
explicit RPNBuilderTreeContext(ContextPtr query_context_, Block block_with_constants_, PreparedSetsPtr prepared_sets_);
|
||||
|
||||
/// Get query context
|
||||
const ContextPtr & getQueryContext() const
|
||||
{
|
||||
/** Evaluation of expressions that depend only on constants.
|
||||
* For the index to be used, if it is written, for example `WHERE Date = toDate(now())`.
|
||||
*/
|
||||
block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, getContext());
|
||||
|
||||
/// Transform WHERE section to Reverse Polish notation
|
||||
const ASTSelectQuery & select = typeid_cast<const ASTSelectQuery &>(*query_info.query);
|
||||
if (select.where())
|
||||
{
|
||||
traverseAST(select.where());
|
||||
|
||||
if (select.prewhere())
|
||||
{
|
||||
traverseAST(select.prewhere());
|
||||
rpn.emplace_back(RPNElement::FUNCTION_AND);
|
||||
}
|
||||
}
|
||||
else if (select.prewhere())
|
||||
{
|
||||
traverseAST(select.prewhere());
|
||||
}
|
||||
else
|
||||
{
|
||||
rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
|
||||
}
|
||||
return query_context;
|
||||
}
|
||||
|
||||
RPN && extractRPN() { return std::move(rpn); }
|
||||
/** Get block with constants.
|
||||
* Valid only for AST tree.
|
||||
*/
|
||||
const Block & getBlockWithConstants() const
|
||||
{
|
||||
return block_with_constants;
|
||||
}
|
||||
|
||||
/** Get prepared sets.
|
||||
* Valid only for AST tree.
|
||||
*/
|
||||
const PreparedSetsPtr & getPreparedSets() const
|
||||
{
|
||||
return prepared_sets;
|
||||
}
|
||||
|
||||
private:
|
||||
void traverseAST(const ASTPtr & node)
|
||||
/// Valid for both AST and ActionDAG tree
|
||||
ContextPtr query_context;
|
||||
|
||||
/// Valid only for AST tree
|
||||
Block block_with_constants;
|
||||
|
||||
/// Valid only for AST tree
|
||||
PreparedSetsPtr prepared_sets;
|
||||
};
|
||||
|
||||
class RPNBuilderFunctionTreeNode;
|
||||
|
||||
/** RPNBuilderTreeNode is wrapper around DAG or AST node.
|
||||
* It defines unified interface for index analysis.
|
||||
*/
|
||||
class RPNBuilderTreeNode
|
||||
{
|
||||
public:
|
||||
/// Construct RPNBuilderTreeNode with non null dag node and tree context
|
||||
explicit RPNBuilderTreeNode(const ActionsDAG::Node * dag_node_, RPNBuilderTreeContext & tree_context_);
|
||||
|
||||
/// Construct RPNBuilderTreeNode with non null ast node and tree context
|
||||
explicit RPNBuilderTreeNode(const IAST * ast_node_, RPNBuilderTreeContext & tree_context_);
|
||||
|
||||
/// Get column name
|
||||
std::string getColumnName() const;
|
||||
|
||||
/** Get column name.
|
||||
* Function `modulo` is replaced with `moduloLegacy`.
|
||||
*/
|
||||
std::string getColumnNameWithModuloLegacy() const;
|
||||
|
||||
/// Is node function
|
||||
bool isFunction() const;
|
||||
|
||||
/// Is node constant
|
||||
bool isConstant() const;
|
||||
|
||||
/** Get constant as constant column.
|
||||
* Node must be constant before calling these method, otherwise logical exception is thrown.
|
||||
*/
|
||||
ColumnWithTypeAndName getConstantColumn() const;
|
||||
|
||||
/** Try get constant from node. If node is constant returns true, and constant value and constant type output parameters are set.
|
||||
* Otherwise false is returned.
|
||||
*/
|
||||
bool tryGetConstant(Field & output_value, DataTypePtr & output_type) const;
|
||||
|
||||
/// Try get prepared set from node
|
||||
ConstSetPtr tryGetPreparedSet() const;
|
||||
|
||||
/// Try get prepared set from node that match data types
|
||||
ConstSetPtr tryGetPreparedSet(const DataTypes & data_types) const;
|
||||
|
||||
/// Try get prepared set from node that match indexes mapping and data types
|
||||
ConstSetPtr tryGetPreparedSet(
|
||||
const std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
|
||||
const DataTypes & data_types) const;
|
||||
|
||||
/** Convert node to function node.
|
||||
* Node must be function before calling these method, otherwise exception is thrown.
|
||||
*/
|
||||
RPNBuilderFunctionTreeNode toFunctionNode() const;
|
||||
|
||||
/// Convert node to function node or null optional
|
||||
std::optional<RPNBuilderFunctionTreeNode> toFunctionNodeOrNull() const;
|
||||
|
||||
/// Get tree context
|
||||
const RPNBuilderTreeContext & getTreeContext() const
|
||||
{
|
||||
return tree_context;
|
||||
}
|
||||
|
||||
/// Get tree context
|
||||
RPNBuilderTreeContext & getTreeContext()
|
||||
{
|
||||
return tree_context;
|
||||
}
|
||||
|
||||
protected:
|
||||
const IAST * ast_node = nullptr;
|
||||
const ActionsDAG::Node * dag_node = nullptr;
|
||||
RPNBuilderTreeContext & tree_context;
|
||||
};
|
||||
|
||||
/** RPNBuilderFunctionTreeNode is wrapper around RPNBuilderTreeNode with function type.
|
||||
* It provide additional functionality that is specific for function.
|
||||
*/
|
||||
class RPNBuilderFunctionTreeNode : public RPNBuilderTreeNode
|
||||
{
|
||||
public:
|
||||
using RPNBuilderTreeNode::RPNBuilderTreeNode;
|
||||
|
||||
/// Get function name
|
||||
std::string getFunctionName() const;
|
||||
|
||||
/// Get function arguments size
|
||||
size_t getArgumentsSize() const;
|
||||
|
||||
/// Get function argument at index
|
||||
RPNBuilderTreeNode getArgumentAt(size_t index) const;
|
||||
};
|
||||
|
||||
/** RPN Builder build stack of reverse polish notation elements (RPNElements) required for index analysis.
|
||||
*
|
||||
* RPNBuilder client must provide RPNElement type that has following interface:
|
||||
*
|
||||
* struct RPNElementInterface
|
||||
* {
|
||||
* enum Function
|
||||
* {
|
||||
* FUNCTION_UNKNOWN, /// Can take any value.
|
||||
* /// Operators of the logical expression.
|
||||
* FUNCTION_NOT,
|
||||
* FUNCTION_AND,
|
||||
* FUNCTION_OR,
|
||||
* ...
|
||||
* };
|
||||
*
|
||||
* RPNElementInterface();
|
||||
*
|
||||
* Function function = FUNCTION_UNKNOWN;
|
||||
*
|
||||
* }
|
||||
*
|
||||
* RPNBuilder take care of building stack of RPNElements with `NOT`, `AND`, `OR` types.
|
||||
* In addition client must provide ExtractAtomFromTreeFunction that returns true and RPNElement as output parameter,
|
||||
* if it can convert RPNBuilderTree node to RPNElement, false otherwise.
|
||||
*/
|
||||
template <typename RPNElement>
|
||||
class RPNBuilder
|
||||
{
|
||||
public:
|
||||
using RPNElements = std::vector<RPNElement>;
|
||||
using ExtractAtomFromTreeFunction = std::function<bool (const RPNBuilderTreeNode & node, RPNElement & out)>;
|
||||
|
||||
explicit RPNBuilder(const ActionsDAG::Node * filter_actions_dag_node,
|
||||
ContextPtr query_context_,
|
||||
const ExtractAtomFromTreeFunction & extract_atom_from_tree_function_)
|
||||
: tree_context(std::move(query_context_))
|
||||
, extract_atom_from_tree_function(extract_atom_from_tree_function_)
|
||||
{
|
||||
traverseTree(RPNBuilderTreeNode(filter_actions_dag_node, tree_context));
|
||||
}
|
||||
|
||||
RPNBuilder(const ASTPtr & filter_node,
|
||||
ContextPtr query_context_,
|
||||
Block block_with_constants_,
|
||||
PreparedSetsPtr prepared_sets_,
|
||||
const ExtractAtomFromTreeFunction & extract_atom_from_tree_function_)
|
||||
: tree_context(std::move(query_context_), std::move(block_with_constants_), std::move(prepared_sets_))
|
||||
, extract_atom_from_tree_function(extract_atom_from_tree_function_)
|
||||
{
|
||||
traverseTree(RPNBuilderTreeNode(filter_node.get(), tree_context));
|
||||
}
|
||||
|
||||
RPNElements && extractRPN() && { return std::move(rpn_elements); }
|
||||
|
||||
private:
|
||||
void traverseTree(const RPNBuilderTreeNode & node)
|
||||
{
|
||||
RPNElement element;
|
||||
|
||||
if (ASTFunction * func = typeid_cast<ASTFunction *>(&*node))
|
||||
if (node.isFunction())
|
||||
{
|
||||
if (operatorFromAST(func, element))
|
||||
auto function_node = node.toFunctionNode();
|
||||
|
||||
if (extractLogicalOperatorFromTree(function_node, element))
|
||||
{
|
||||
auto & args = typeid_cast<ASTExpressionList &>(*func->arguments).children;
|
||||
for (size_t i = 0, size = args.size(); i < size; ++i)
|
||||
size_t arguments_size = function_node.getArgumentsSize();
|
||||
|
||||
for (size_t argument_index = 0; argument_index < arguments_size; ++argument_index)
|
||||
{
|
||||
traverseAST(args[i]);
|
||||
auto function_node_argument = function_node.getArgumentAt(argument_index);
|
||||
traverseTree(function_node_argument);
|
||||
|
||||
/** The first part of the condition is for the correct support of `and` and `or` functions of arbitrary arity
|
||||
* - in this case `n - 1` elements are added (where `n` is the number of arguments).
|
||||
*/
|
||||
if (i != 0 || element.function == RPNElement::FUNCTION_NOT)
|
||||
rpn.emplace_back(std::move(element));
|
||||
if (argument_index != 0 || element.function == RPNElement::FUNCTION_NOT)
|
||||
rpn_elements.emplace_back(std::move(element));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!atom_from_ast(node, getContext(), block_with_constants, element))
|
||||
{
|
||||
if (!extract_atom_from_tree_function(node, element))
|
||||
element.function = RPNElement::FUNCTION_UNKNOWN;
|
||||
}
|
||||
|
||||
rpn.emplace_back(std::move(element));
|
||||
rpn_elements.emplace_back(std::move(element));
|
||||
}
|
||||
|
||||
bool operatorFromAST(const ASTFunction * func, RPNElement & out)
|
||||
bool extractLogicalOperatorFromTree(const RPNBuilderFunctionTreeNode & function_node, RPNElement & out)
|
||||
{
|
||||
/// Functions AND, OR, NOT.
|
||||
/// Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
|
||||
/// (or, the same thing - calling the function `and` from one argument).
|
||||
const ASTs & args = typeid_cast<const ASTExpressionList &>(*func->arguments).children;
|
||||
/** Functions AND, OR, NOT.
|
||||
* Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
|
||||
* (or, the same thing - calling the function `and` from one argument).
|
||||
*/
|
||||
|
||||
if (func->name == "not")
|
||||
auto function_name = function_node.getFunctionName();
|
||||
if (function_name == "not")
|
||||
{
|
||||
if (args.size() != 1)
|
||||
if (function_node.getArgumentsSize() != 1)
|
||||
return false;
|
||||
|
||||
out.function = RPNElement::FUNCTION_NOT;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (func->name == "and" || func->name == "indexHint")
|
||||
if (function_name == "and" || function_name == "indexHint")
|
||||
out.function = RPNElement::FUNCTION_AND;
|
||||
else if (func->name == "or")
|
||||
else if (function_name == "or")
|
||||
out.function = RPNElement::FUNCTION_OR;
|
||||
else
|
||||
return false;
|
||||
@ -114,10 +269,9 @@ private:
|
||||
return true;
|
||||
}
|
||||
|
||||
const AtomFromASTFunc & atom_from_ast;
|
||||
Block block_with_constants;
|
||||
RPN rpn;
|
||||
RPNBuilderTreeContext tree_context;
|
||||
const ExtractAtomFromTreeFunction & extract_atom_from_tree_function;
|
||||
RPNElements rpn_elements;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,10 +1,10 @@
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeSink.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <Interpreters/PartLog.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <Core/Block.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
@ -165,7 +165,9 @@ void ReplicatedMergeTreeSink::consume(Chunk chunk)
|
||||
*/
|
||||
size_t replicas_num = checkQuorumPrecondition(zookeeper);
|
||||
|
||||
deduceTypesOfObjectColumns(storage_snapshot, block);
|
||||
if (!storage_snapshot->object_columns.empty())
|
||||
convertDynamicColumnsToTuples(block, storage_snapshot);
|
||||
|
||||
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context);
|
||||
|
||||
using DelayedPartitions = std::vector<ReplicatedMergeTreeSink::DelayedChunk::Partition>;
|
||||
|
@ -47,10 +47,10 @@ public:
|
||||
const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const override
|
||||
{
|
||||
const auto & storage_columns = metadata_snapshot->getColumns();
|
||||
if (!hasObjectColumns(storage_columns))
|
||||
if (!hasDynamicSubcolumns(storage_columns))
|
||||
return std::make_shared<StorageSnapshot>(*this, metadata_snapshot);
|
||||
|
||||
auto object_columns = getObjectColumns(
|
||||
auto object_columns = getConcreteObjectColumns(
|
||||
parts.begin(), parts.end(),
|
||||
storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); });
|
||||
|
||||
|
@ -598,7 +598,7 @@ std::optional<QueryProcessingStage::Enum> StorageDistributed::getOptimizedQueryP
|
||||
|
||||
static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr query)
|
||||
{
|
||||
if (!hasObjectColumns(all_columns))
|
||||
if (!hasDynamicSubcolumns(all_columns))
|
||||
return false;
|
||||
|
||||
if (!query)
|
||||
@ -613,7 +613,7 @@ static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr
|
||||
auto name_in_storage = Nested::splitName(required_column).first;
|
||||
auto column_in_storage = all_columns.tryGetPhysical(name_in_storage);
|
||||
|
||||
if (column_in_storage && isObject(column_in_storage->type))
|
||||
if (column_in_storage && column_in_storage->type->hasDynamicSubcolumns())
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -640,7 +640,7 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery(
|
||||
metadata_snapshot->getColumns(),
|
||||
getContext());
|
||||
|
||||
auto object_columns = DB::getObjectColumns(
|
||||
auto object_columns = DB::getConcreteObjectColumns(
|
||||
snapshot_data->objects_by_shard.begin(),
|
||||
snapshot_data->objects_by_shard.end(),
|
||||
metadata_snapshot->getColumns(),
|
||||
|
@ -526,7 +526,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns)
|
||||
|
||||
const auto * available_type = it->getMapped();
|
||||
|
||||
if (!isObject(*available_type)
|
||||
if (!available_type->hasDynamicSubcolumns()
|
||||
&& !column.type->equals(*available_type)
|
||||
&& !isCompatibleEnumTypes(available_type, column.type.get()))
|
||||
throw Exception(
|
||||
@ -575,7 +575,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns,
|
||||
const auto * provided_column_type = it->getMapped();
|
||||
const auto * available_column_type = jt->getMapped();
|
||||
|
||||
if (!isObject(*provided_column_type)
|
||||
if (!provided_column_type->hasDynamicSubcolumns()
|
||||
&& !provided_column_type->equals(*available_column_type)
|
||||
&& !isCompatibleEnumTypes(available_column_type, provided_column_type))
|
||||
throw Exception(
|
||||
@ -619,7 +619,7 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const
|
||||
listOfColumns(available_columns));
|
||||
|
||||
const auto * available_type = it->getMapped();
|
||||
if (!isObject(*available_type)
|
||||
if (!available_type->hasDynamicSubcolumns()
|
||||
&& !column.type->equals(*available_type)
|
||||
&& !isCompatibleEnumTypes(available_type, column.type.get()))
|
||||
throw Exception(
|
||||
|
@ -462,7 +462,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c
|
||||
settings.getter = createStreamGetter(name_and_type);
|
||||
|
||||
if (!serialize_states.contains(name))
|
||||
serialization->serializeBinaryBulkStatePrefix(settings, serialize_states[name]);
|
||||
serialization->serializeBinaryBulkStatePrefix(column, settings, serialize_states[name]);
|
||||
|
||||
if (storage.use_marks_file)
|
||||
{
|
||||
|
@ -146,7 +146,7 @@ public:
|
||||
auto extended_storage_columns = storage_snapshot->getColumns(
|
||||
GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects());
|
||||
|
||||
convertObjectsToTuples(block, extended_storage_columns);
|
||||
convertDynamicColumnsToTuples(block, storage_snapshot);
|
||||
}
|
||||
|
||||
if (storage.compress)
|
||||
@ -212,10 +212,10 @@ StorageSnapshotPtr StorageMemory::getStorageSnapshot(const StorageMetadataPtr &
|
||||
auto snapshot_data = std::make_unique<SnapshotData>();
|
||||
snapshot_data->blocks = data.get();
|
||||
|
||||
if (!hasObjectColumns(metadata_snapshot->getColumns()))
|
||||
if (!hasDynamicSubcolumns(metadata_snapshot->getColumns()))
|
||||
return std::make_shared<StorageSnapshot>(*this, metadata_snapshot, ColumnsDescription{}, std::move(snapshot_data));
|
||||
|
||||
auto object_columns = getObjectColumns(
|
||||
auto object_columns = getConcreteObjectColumns(
|
||||
snapshot_data->blocks->begin(),
|
||||
snapshot_data->blocks->end(),
|
||||
metadata_snapshot->getColumns(),
|
||||
|
@ -364,39 +364,6 @@ String StorageS3Source::KeysIterator::next()
|
||||
return pimpl->next();
|
||||
}
|
||||
|
||||
class StorageS3Source::ReadTasksIterator::Impl
|
||||
{
|
||||
public:
|
||||
explicit Impl(const std::vector<String> & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_)
|
||||
: read_tasks(read_tasks_), new_read_tasks_callback(new_read_tasks_callback_)
|
||||
{
|
||||
}
|
||||
|
||||
String next()
|
||||
{
|
||||
size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
|
||||
if (current_index >= read_tasks.size())
|
||||
return new_read_tasks_callback();
|
||||
return read_tasks[current_index];
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic_size_t index = 0;
|
||||
std::vector<String> read_tasks;
|
||||
ReadTaskCallback new_read_tasks_callback;
|
||||
};
|
||||
|
||||
StorageS3Source::ReadTasksIterator::ReadTasksIterator(
|
||||
const std::vector<String> & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_)
|
||||
: pimpl(std::make_shared<StorageS3Source::ReadTasksIterator::Impl>(read_tasks_, new_read_tasks_callback_))
|
||||
{
|
||||
}
|
||||
|
||||
String StorageS3Source::ReadTasksIterator::next()
|
||||
{
|
||||
return pimpl->next();
|
||||
}
|
||||
|
||||
Block StorageS3Source::getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns)
|
||||
{
|
||||
for (const auto & virtual_column : requested_virtual_columns)
|
||||
@ -806,8 +773,7 @@ StorageS3::StorageS3(
|
||||
distributed_processing_,
|
||||
is_key_with_globs,
|
||||
format_settings,
|
||||
context_,
|
||||
&read_tasks_used_in_schema_inference);
|
||||
context_);
|
||||
storage_metadata.setColumns(columns);
|
||||
}
|
||||
else
|
||||
@ -835,19 +801,14 @@ std::shared_ptr<StorageS3Source::IteratorWrapper> StorageS3::createFileIterator(
|
||||
ContextPtr local_context,
|
||||
ASTPtr query,
|
||||
const Block & virtual_block,
|
||||
const std::vector<String> & read_tasks,
|
||||
std::unordered_map<String, S3::ObjectInfo> * object_infos,
|
||||
Strings * read_keys)
|
||||
{
|
||||
if (distributed_processing)
|
||||
{
|
||||
return std::make_shared<StorageS3Source::IteratorWrapper>(
|
||||
[read_tasks_iterator = std::make_shared<StorageS3Source::ReadTasksIterator>(read_tasks, local_context->getReadTaskCallback()), read_keys]() -> String
|
||||
{
|
||||
auto key = read_tasks_iterator->next();
|
||||
if (read_keys)
|
||||
read_keys->push_back(key);
|
||||
return key;
|
||||
[callback = local_context->getReadTaskCallback()]() -> String {
|
||||
return callback();
|
||||
});
|
||||
}
|
||||
else if (is_key_with_globs)
|
||||
@ -907,7 +868,6 @@ Pipe StorageS3::read(
|
||||
local_context,
|
||||
query_info.query,
|
||||
virtual_block,
|
||||
read_tasks_used_in_schema_inference,
|
||||
&object_infos);
|
||||
|
||||
ColumnsDescription columns_description;
|
||||
@ -1205,7 +1165,7 @@ ColumnsDescription StorageS3::getTableStructureFromData(
|
||||
|
||||
return getTableStructureFromDataImpl(
|
||||
configuration.format, s3_configuration, configuration.compression_method, distributed_processing,
|
||||
s3_configuration.uri.key.find_first_of("*?{") != std::string::npos, format_settings, ctx, nullptr, object_infos);
|
||||
s3_configuration.uri.key.find_first_of("*?{") != std::string::npos, format_settings, ctx, object_infos);
|
||||
}
|
||||
|
||||
ColumnsDescription StorageS3::getTableStructureFromDataImpl(
|
||||
@ -1216,13 +1176,12 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
|
||||
bool is_key_with_globs,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ContextPtr ctx,
|
||||
std::vector<String> * read_keys_in_distributed_processing,
|
||||
std::unordered_map<String, S3::ObjectInfo> * object_infos)
|
||||
{
|
||||
std::vector<String> read_keys;
|
||||
|
||||
auto file_iterator
|
||||
= createFileIterator(s3_configuration, {s3_configuration.uri.key}, is_key_with_globs, distributed_processing, ctx, nullptr, {}, {}, object_infos, &read_keys);
|
||||
= createFileIterator(s3_configuration, {s3_configuration.uri.key}, is_key_with_globs, distributed_processing, ctx, nullptr, {}, object_infos, &read_keys);
|
||||
|
||||
std::optional<ColumnsDescription> columns_from_cache;
|
||||
size_t prev_read_keys_size = read_keys.size();
|
||||
@ -1275,9 +1234,6 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
|
||||
if (ctx->getSettingsRef().schema_inference_use_cache_for_s3)
|
||||
addColumnsToCache(read_keys, s3_configuration, columns, format, format_settings, ctx);
|
||||
|
||||
if (distributed_processing && read_keys_in_distributed_processing)
|
||||
*read_keys_in_distributed_processing = std::move(read_keys);
|
||||
|
||||
return columns;
|
||||
}
|
||||
|
||||
|
@ -66,18 +66,6 @@ public:
|
||||
std::shared_ptr<Impl> pimpl;
|
||||
};
|
||||
|
||||
class ReadTasksIterator
|
||||
{
|
||||
public:
|
||||
ReadTasksIterator(const std::vector<String> & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_);
|
||||
String next();
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
/// shared_ptr to have copy constructor
|
||||
std::shared_ptr<Impl> pimpl;
|
||||
};
|
||||
|
||||
using IteratorWrapper = std::function<String()>;
|
||||
|
||||
static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);
|
||||
@ -238,8 +226,6 @@ private:
|
||||
ASTPtr partition_by;
|
||||
bool is_key_with_globs = false;
|
||||
|
||||
std::vector<String> read_tasks_used_in_schema_inference;
|
||||
|
||||
std::unordered_map<String, S3::ObjectInfo> object_infos;
|
||||
|
||||
static void updateS3Configuration(ContextPtr, S3Configuration &);
|
||||
@ -252,7 +238,6 @@ private:
|
||||
ContextPtr local_context,
|
||||
ASTPtr query,
|
||||
const Block & virtual_block,
|
||||
const std::vector<String> & read_tasks = {},
|
||||
std::unordered_map<String, S3::ObjectInfo> * object_infos = nullptr,
|
||||
Strings * read_keys = nullptr);
|
||||
|
||||
@ -264,7 +249,6 @@ private:
|
||||
bool is_key_with_globs,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ContextPtr ctx,
|
||||
std::vector<String> * read_keys_in_distributed_processing = nullptr,
|
||||
std::unordered_map<String, S3::ObjectInfo> * object_infos = nullptr);
|
||||
|
||||
bool supportsSubsetOfColumns() const override;
|
||||
|
@ -5,46 +5,40 @@
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include "Common/Exception.h"
|
||||
#include <Common/Throttler.h>
|
||||
#include "Client/Connection.h"
|
||||
#include "Core/QueryProcessingStage.h"
|
||||
#include <Core/UUID.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromS3.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/getHeaderForProcessingStage.h>
|
||||
#include <Interpreters/SelectQueryOptions.h>
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
#include <QueryPipeline/narrowPipe.h>
|
||||
#include <QueryPipeline/Pipe.h>
|
||||
#include "Processors/ISource.h"
|
||||
#include <Processors/Sources/RemoteSource.h>
|
||||
#include <QueryPipeline/RemoteQueryExecutor.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
#include <Storages/getVirtualsForStorage.h>
|
||||
#include <Storages/StorageDictionary.h>
|
||||
#include <Storages/addColumnsStructureToQueryWithClusterEngine.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include <aws/core/auth/AWSCredentials.h>
|
||||
#include <aws/s3/S3Client.h>
|
||||
#include <aws/s3/model/ListObjectsV2Request.h>
|
||||
|
||||
#include <ios>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <cassert>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
StorageS3Cluster::StorageS3Cluster(
|
||||
const StorageS3ClusterConfiguration & configuration_,
|
||||
const StorageID & table_id_,
|
||||
@ -72,6 +66,7 @@ StorageS3Cluster::StorageS3Cluster(
|
||||
auto columns = StorageS3::getTableStructureFromDataImpl(format_name, s3_configuration, compression_method,
|
||||
/*distributed_processing_*/false, is_key_with_globs, /*format_settings=*/std::nullopt, context_);
|
||||
storage_metadata.setColumns(columns);
|
||||
add_columns_structure_to_query = true;
|
||||
}
|
||||
else
|
||||
storage_metadata.setColumns(columns_);
|
||||
@ -117,6 +112,11 @@ Pipe StorageS3Cluster::read(
|
||||
|
||||
const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
|
||||
|
||||
ASTPtr query_to_send = query_info.original_query->clone();
|
||||
if (add_columns_structure_to_query)
|
||||
addColumnsStructureToQueryWithClusterEngine(
|
||||
query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 5, getName());
|
||||
|
||||
for (const auto & replicas : cluster->getShardsAddresses())
|
||||
{
|
||||
/// There will be only one replica, because we consider each replica as a shard
|
||||
@ -135,7 +135,7 @@ Pipe StorageS3Cluster::read(
|
||||
/// So, task_identifier is passed as constructor argument. It is more obvious.
|
||||
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
|
||||
connection,
|
||||
queryToString(query_info.original_query),
|
||||
queryToString(query_to_send),
|
||||
header,
|
||||
context,
|
||||
/*throttler=*/nullptr,
|
||||
|
@ -46,6 +46,7 @@ private:
|
||||
String compression_method;
|
||||
NamesAndTypesList virtual_columns;
|
||||
Block virtual_block;
|
||||
bool add_columns_structure_to_query = false;
|
||||
};
|
||||
|
||||
|
||||
|
@ -76,7 +76,7 @@ std::optional<NameAndTypePair> StorageSnapshot::tryGetColumn(const GetColumnsOpt
|
||||
{
|
||||
const auto & columns = getMetadataForQuery()->getColumns();
|
||||
auto column = columns.tryGetColumn(options, column_name);
|
||||
if (column && (!isObject(column->type) || !options.with_extended_objects))
|
||||
if (column && (!column->type->hasDynamicSubcolumns() || !options.with_extended_objects))
|
||||
return column;
|
||||
|
||||
if (options.with_extended_objects)
|
||||
|
51
src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp
Normal file
51
src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp
Normal file
@ -0,0 +1,51 @@
|
||||
#include <Storages/addColumnsStructureToQueryWithClusterEngine.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
static ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query)
|
||||
{
|
||||
auto * select_query = query->as<ASTSelectQuery>();
|
||||
if (!select_query || !select_query->tables())
|
||||
return nullptr;
|
||||
|
||||
auto * tables = select_query->tables()->as<ASTTablesInSelectQuery>();
|
||||
auto * table_expression = tables->children[0]->as<ASTTablesInSelectQueryElement>()->table_expression->as<ASTTableExpression>();
|
||||
if (!table_expression->table_function)
|
||||
return nullptr;
|
||||
|
||||
auto * table_function = table_expression->table_function->as<ASTFunction>();
|
||||
return table_function->arguments->as<ASTExpressionList>();
|
||||
}
|
||||
|
||||
void addColumnsStructureToQueryWithClusterEngine(ASTPtr & query, const String & structure, size_t max_arguments, const String & function_name)
|
||||
{
|
||||
ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
|
||||
if (!expression_list)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function {}, got '{}'", function_name, queryToString(query));
|
||||
auto structure_literal = std::make_shared<ASTLiteral>(structure);
|
||||
|
||||
if (expression_list->children.size() < 2 || expression_list->children.size() > max_arguments)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 2 to {} arguments in {} table functions, got {}", function_name, max_arguments, expression_list->children.size());
|
||||
|
||||
if (expression_list->children.size() == 2 || expression_list->children.size() == max_arguments - 1)
|
||||
{
|
||||
auto format_literal = std::make_shared<ASTLiteral>("auto");
|
||||
expression_list->children.push_back(format_literal);
|
||||
}
|
||||
|
||||
expression_list->children.push_back(structure_literal);
|
||||
}
|
||||
|
||||
}
|
11
src/Storages/addColumnsStructureToQueryWithClusterEngine.h
Normal file
11
src/Storages/addColumnsStructureToQueryWithClusterEngine.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Add structure argument for queries with s3Cluster/hdfsCluster table function.
|
||||
void addColumnsStructureToQueryWithClusterEngine(ASTPtr & query, const String & structure, size_t max_arguments, const String & function_name);
|
||||
|
||||
}
|
@ -200,7 +200,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables(
|
||||
auto type_name = type_col[i].get<const String &>();
|
||||
|
||||
auto storage_column = storage_columns.tryGetPhysical(name);
|
||||
if (storage_column && isObject(storage_column->type))
|
||||
if (storage_column && storage_column->type->hasDynamicSubcolumns())
|
||||
res.add(ColumnDescription(std::move(name), DataTypeFactory::instance().get(type_name)));
|
||||
}
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user