Merge remote-tracking branch 'upstream/master' into group_by_all

# Conflicts:
#	src/Analyzer/QueryNode.h
This commit is contained in:
taofengliu 2022-11-01 22:53:44 +08:00
commit c43dd96f14
129 changed files with 2520 additions and 1406 deletions

View File

@ -2023,6 +2023,7 @@ jobs:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
TestsBugfixCheck:
needs: [CheckLabels, StyleCheck]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs

View File

@ -33,7 +33,7 @@ CREATE TABLE trips (
tip_amount Float32,
tolls_amount Float32,
total_amount Float32,
payment_type Enum('CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4),
payment_type Enum('CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4, 'UNK' = 5),
pickup_ntaname LowCardinality(String),
dropoff_ntaname LowCardinality(String)
)
@ -63,7 +63,7 @@ SELECT
payment_type,
pickup_ntaname,
dropoff_ntaname
FROM url(
FROM s3(
'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_{0..2}.gz',
'TabSeparatedWithNames'
)

View File

@ -128,6 +128,24 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
</details>
<details>
<summary>Migration Method for installing the deb-packages</summary>
```bash
sudo apt-key del E0C56BD4
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list
sudo apt-get update
sudo apt-get install -y clickhouse-server clickhouse-client
sudo service clickhouse-server start
clickhouse-client # or "clickhouse-client --password" if you set up a password.
```
</details>
You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs.
You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/main/c/).

View File

@ -1627,34 +1627,7 @@ void QueryAnalyzer::validateTableExpressionModifiers(const QueryTreeNodePtr & ta
table_expression_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
if (query_node || union_node)
{
auto table_expression_modifiers = query_node ? query_node->getTableExpressionModifiers() : union_node->getTableExpressionModifiers();
if (table_expression_modifiers.has_value())
{
String table_expression_modifiers_error_message;
if (table_expression_modifiers->hasFinal())
{
table_expression_modifiers_error_message += "FINAL";
if (table_expression_modifiers->hasSampleSizeRatio())
table_expression_modifiers_error_message += ", SAMPLE";
}
else if (table_expression_modifiers->hasSampleSizeRatio())
{
table_expression_modifiers_error_message += "SAMPLE";
}
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Table expression modifiers {} are not supported for subquery {}. In scope {}",
table_expression_modifiers_error_message,
table_expression_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
}
}
else if (table_node || table_function_node)
if (table_node || table_function_node)
{
auto table_expression_modifiers = table_node ? table_node->getTableExpressionModifiers() : table_function_node->getTableExpressionModifiers();
@ -4729,17 +4702,23 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod
auto table_expression_modifiers = from_table_identifier.getTableExpressionModifiers();
if (auto * resolved_identifier_query_node = resolved_identifier->as<QueryNode>())
auto * resolved_identifier_query_node = resolved_identifier->as<QueryNode>();
auto * resolved_identifier_union_node = resolved_identifier->as<UnionNode>();
if (resolved_identifier_query_node || resolved_identifier_union_node)
{
resolved_identifier_query_node->setIsCTE(false);
if (resolved_identifier_query_node)
resolved_identifier_query_node->setIsCTE(false);
else
resolved_identifier_union_node->setIsCTE(false);
if (table_expression_modifiers.has_value())
resolved_identifier_query_node->setTableExpressionModifiers(*table_expression_modifiers);
}
else if (auto * resolved_identifier_union_node = resolved_identifier->as<UnionNode>())
{
resolved_identifier_union_node->setIsCTE(false);
if (table_expression_modifiers.has_value())
resolved_identifier_union_node->setTableExpressionModifiers(*table_expression_modifiers);
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Table expression modifiers {} are not supported for subquery {}",
table_expression_modifiers->formatForErrorMessage(),
resolved_identifier->formatASTForErrorMessage());
}
}
else if (auto * resolved_identifier_table_node = resolved_identifier->as<TableNode>())
{

View File

@ -77,12 +77,6 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
buffer << ", constant_value_type: " << constant_value->getType()->getName();
}
if (table_expression_modifiers)
{
buffer << ", ";
table_expression_modifiers->dump(buffer);
}
if (hasWith())
{
buffer << '\n' << std::string(indent + 2, ' ') << "WITH\n";
@ -198,13 +192,6 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
else if (!constant_value && rhs_typed.constant_value)
return false;
if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers)
return false;
else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers)
return false;
else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers)
return false;
return is_subquery == rhs_typed.is_subquery &&
is_cte == rhs_typed.is_cte &&
cte_name == rhs_typed.cte_name &&
@ -255,9 +242,6 @@ void QueryNode::updateTreeHashImpl(HashState & state) const
state.update(constant_value_type_name.size());
state.update(constant_value_type_name);
}
if (table_expression_modifiers)
table_expression_modifiers->updateTreeHash(state);
}
QueryTreeNodePtr QueryNode::cloneImpl() const
@ -276,7 +260,6 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
result_query_node->cte_name = cte_name;
result_query_node->projection_columns = projection_columns;
result_query_node->constant_value = constant_value;
result_query_node->table_expression_modifiers = table_expression_modifiers;
return result_query_node;
}

View File

@ -188,24 +188,6 @@ public:
is_group_by_all = is_group_by_all_value;
}
/// Return true if query node has table expression modifiers, false otherwise
bool hasTableExpressionModifiers() const
{
return table_expression_modifiers.has_value();
}
/// Get table expression modifiers
const std::optional<TableExpressionModifiers> & getTableExpressionModifiers() const
{
return table_expression_modifiers;
}
/// Set table expression modifiers
void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value)
{
table_expression_modifiers = std::move(table_expression_modifiers_value);
}
/// Returns true if query node WITH section is not empty, false otherwise
bool hasWith() const
{
@ -615,7 +597,6 @@ private:
std::string cte_name;
NamesAndTypes projection_columns;
ConstantValuePtr constant_value;
std::optional<TableExpressionModifiers> table_expression_modifiers;
SettingsChanges settings_changes;
static constexpr size_t with_child_index = 0;

View File

@ -145,12 +145,10 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectWithUnionExpression(const ASTPtr &
if (select_lists.children.size() == 1)
return buildSelectOrUnionExpression(select_lists.children[0], is_subquery, cte_name);
auto union_node = std::make_shared<UnionNode>();
auto union_node = std::make_shared<UnionNode>(select_with_union_query_typed.union_mode);
union_node->setIsSubquery(is_subquery);
union_node->setIsCTE(!cte_name.empty());
union_node->setCTEName(cte_name);
union_node->setUnionMode(select_with_union_query_typed.union_mode);
union_node->setUnionModes(select_with_union_query_typed.list_of_modes);
union_node->setOriginalAST(select_with_union_query);
size_t select_lists_children_size = select_lists.children.size();
@ -173,23 +171,22 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectIntersectExceptQuery(const ASTPtr
if (select_lists.size() == 1)
return buildSelectExpression(select_lists[0], is_subquery, cte_name);
auto union_node = std::make_shared<UnionNode>();
union_node->setIsSubquery(is_subquery);
union_node->setIsCTE(!cte_name.empty());
union_node->setCTEName(cte_name);
SelectUnionMode union_mode;
if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT_ALL)
union_node->setUnionMode(SelectUnionMode::INTERSECT_ALL);
union_mode = SelectUnionMode::INTERSECT_ALL;
else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT_DISTINCT)
union_node->setUnionMode(SelectUnionMode::INTERSECT_DISTINCT);
union_mode = SelectUnionMode::INTERSECT_DISTINCT;
else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT_ALL)
union_node->setUnionMode(SelectUnionMode::EXCEPT_ALL);
union_mode = SelectUnionMode::EXCEPT_ALL;
else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT_DISTINCT)
union_node->setUnionMode(SelectUnionMode::EXCEPT_DISTINCT);
union_mode = SelectUnionMode::EXCEPT_DISTINCT;
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION type is not initialized");
union_node->setUnionModes(SelectUnionModes(select_lists.size() - 1, union_node->getUnionMode()));
auto union_node = std::make_shared<UnionNode>(union_mode);
union_node->setIsSubquery(is_subquery);
union_node->setIsCTE(!cte_name.empty());
union_node->setCTEName(cte_name);
union_node->setOriginalAST(select_intersect_except_query);
size_t select_lists_size = select_lists.size();
@ -677,14 +674,10 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
if (table_expression_modifiers)
{
if (auto * query_node = node->as<QueryNode>())
query_node->setTableExpressionModifiers(*table_expression_modifiers);
else if (auto * union_node = node->as<UnionNode>())
union_node->setTableExpressionModifiers(*table_expression_modifiers);
else
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Unexpected table expression subquery node. Expected union or query. Actual {}",
node->formatASTForErrorMessage());
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Table expression modifiers {} are not supported for subquery {}",
table_expression_modifiers->formatForErrorMessage(),
node->formatASTForErrorMessage());
}
table_expressions.push_back(std::move(node));

View File

@ -5,6 +5,7 @@
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
namespace DB
{
@ -39,4 +40,27 @@ void TableExpressionModifiers::updateTreeHash(SipHash & hash_state) const
}
}
String TableExpressionModifiers::formatForErrorMessage() const
{
WriteBufferFromOwnString buffer;
if (has_final)
buffer << "FINAL";
if (sample_size_ratio)
{
if (has_final)
buffer << ' ';
buffer << "SAMPLE " << ASTSampleRatio::toString(*sample_size_ratio);
}
if (sample_offset_ratio)
{
if (has_final || sample_size_ratio)
buffer << ' ';
buffer << "OFFSET " << ASTSampleRatio::toString(*sample_offset_ratio);
}
return buffer.str();
}
}

View File

@ -58,6 +58,9 @@ public:
/// Update tree hash
void updateTreeHash(SipHash & hash_state) const;
/// Format for error message
String formatForErrorMessage() const;
private:
bool has_final = false;
std::optional<Rational> sample_size_ratio;

View File

@ -30,11 +30,18 @@ namespace DB
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
extern const int BAD_ARGUMENTS;
}
UnionNode::UnionNode()
UnionNode::UnionNode(SelectUnionMode union_mode_)
: IQueryTreeNode(children_size)
, union_mode(union_mode_)
{
if (union_mode == SelectUnionMode::UNION_DEFAULT ||
union_mode == SelectUnionMode::EXCEPT_DEFAULT ||
union_mode == SelectUnionMode::INTERSECT_DEFAULT)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION mode {} must be normalized", toString(union_mode));
children[queries_child_index] = std::make_shared<ListNode>();
}
@ -101,28 +108,8 @@ void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
buffer << ", constant_value_type: " << constant_value->getType()->getName();
}
if (table_expression_modifiers)
{
buffer << ", ";
table_expression_modifiers->dump(buffer);
}
buffer << ", union_mode: " << toString(union_mode);
size_t union_modes_size = union_modes.size();
buffer << '\n' << std::string(indent + 2, ' ') << "UNION MODES " << union_modes_size << '\n';
for (size_t i = 0; i < union_modes_size; ++i)
{
buffer << std::string(indent + 4, ' ');
auto query_union_mode = union_modes[i];
buffer << toString(query_union_mode);
if (i + 1 != union_modes_size)
buffer << '\n';
}
buffer << '\n' << std::string(indent + 2, ' ') << "QUERIES\n";
getQueriesNode()->dumpTreeImpl(buffer, format_state, indent + 4);
}
@ -137,15 +124,8 @@ bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs) const
else if (!constant_value && rhs_typed.constant_value)
return false;
if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers)
return false;
else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers)
return false;
else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers)
return false;
return is_subquery == rhs_typed.is_subquery && is_cte == rhs_typed.is_cte && cte_name == rhs_typed.cte_name &&
union_mode == rhs_typed.union_mode && union_modes == rhs_typed.union_modes;
union_mode == rhs_typed.union_mode;
}
void UnionNode::updateTreeHashImpl(HashState & state) const
@ -158,10 +138,6 @@ void UnionNode::updateTreeHashImpl(HashState & state) const
state.update(static_cast<size_t>(union_mode));
state.update(union_modes.size());
for (const auto & query_union_mode : union_modes)
state.update(static_cast<size_t>(query_union_mode));
if (constant_value)
{
auto constant_dump = applyVisitor(FieldVisitorToString(), constant_value->getValue());
@ -172,23 +148,16 @@ void UnionNode::updateTreeHashImpl(HashState & state) const
state.update(constant_value_type_name.size());
state.update(constant_value_type_name);
}
if (table_expression_modifiers)
table_expression_modifiers->updateTreeHash(state);
}
QueryTreeNodePtr UnionNode::cloneImpl() const
{
auto result_union_node = std::make_shared<UnionNode>();
auto result_union_node = std::make_shared<UnionNode>(union_mode);
result_union_node->is_subquery = is_subquery;
result_union_node->is_cte = is_cte;
result_union_node->cte_name = cte_name;
result_union_node->union_mode = union_mode;
result_union_node->union_modes = union_modes;
result_union_node->union_modes_set = union_modes_set;
result_union_node->constant_value = constant_value;
result_union_node->table_expression_modifiers = table_expression_modifiers;
return result_union_node;
}
@ -197,14 +166,7 @@ ASTPtr UnionNode::toASTImpl() const
{
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
select_with_union_query->union_mode = union_mode;
if (union_mode != SelectUnionMode::UNION_DEFAULT &&
union_mode != SelectUnionMode::EXCEPT_DEFAULT &&
union_mode != SelectUnionMode::INTERSECT_DEFAULT)
select_with_union_query->is_normalized = true;
select_with_union_query->list_of_modes = union_modes;
select_with_union_query->set_of_modes = union_modes_set;
select_with_union_query->is_normalized = true;
select_with_union_query->children.push_back(getQueriesNode()->toAST());
select_with_union_query->list_of_selects = select_with_union_query->children.back();

View File

@ -19,6 +19,7 @@ namespace ErrorCodes
}
/** Union node represents union of queries in query tree.
* Union node must be initialized with normalized union mode.
*
* Example: (SELECT id FROM test_table) UNION ALL (SELECT id FROM test_table_2);
* Example: (SELECT id FROM test_table) UNION DISTINCT (SELECT id FROM test_table_2);
@ -41,7 +42,8 @@ using UnionNodePtr = std::shared_ptr<UnionNode>;
class UnionNode final : public IQueryTreeNode
{
public:
explicit UnionNode();
/// Construct union node with normalized union mode
explicit UnionNode(SelectUnionMode union_mode_);
/// Returns true if union node is subquery, false otherwise
bool isSubquery() const
@ -85,25 +87,6 @@ public:
return union_mode;
}
/// Set union mode value
void setUnionMode(SelectUnionMode union_mode_value)
{
union_mode = union_mode_value;
}
/// Get union modes
const SelectUnionModes & getUnionModes() const
{
return union_modes;
}
/// Set union modes value
void setUnionModes(const SelectUnionModes & union_modes_value)
{
union_modes = union_modes_value;
union_modes_set = SelectUnionModesSet(union_modes.begin(), union_modes.end());
}
/// Get union node queries
const ListNode & getQueries() const
{
@ -128,24 +111,6 @@ public:
return children[queries_child_index];
}
/// Return true if union node has table expression modifiers, false otherwise
bool hasTableExpressionModifiers() const
{
return table_expression_modifiers.has_value();
}
/// Get table expression modifiers
const std::optional<TableExpressionModifiers> & getTableExpressionModifiers() const
{
return table_expression_modifiers;
}
/// Set table expression modifiers
void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value)
{
table_expression_modifiers = std::move(table_expression_modifiers_value);
}
/// Compute union node projection columns
NamesAndTypes computeProjectionColumns() const;
@ -189,10 +154,7 @@ private:
bool is_cte = false;
std::string cte_name;
SelectUnionMode union_mode;
SelectUnionModes union_modes;
SelectUnionModesSet union_modes_set;
ConstantValuePtr constant_value;
std::optional<TableExpressionModifiers> table_expression_modifiers;
static constexpr size_t queries_child_index = 0;
static constexpr size_t children_size = queries_child_index + 1;

View File

@ -98,11 +98,6 @@ static ASTPtr convertIntoTableExpressionAST(const QueryTreeNodePtr & table_expre
if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)
{
if (auto * query_node = table_expression_node->as<QueryNode>())
table_expression_modifiers = query_node->getTableExpressionModifiers();
else if (auto * union_node = table_expression_node->as<UnionNode>())
table_expression_modifiers = union_node->getTableExpressionModifiers();
result_table_expression->subquery = result_table_expression->children.back();
}
else if (node_type == QueryTreeNodeType::TABLE || node_type == QueryTreeNodeType::IDENTIFIER)

View File

@ -176,6 +176,9 @@ public:
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
void finalize() override { data->finalize(); }
bool isFinalized() const override { return data->isFinalized(); }
bool isCollationSupported() const override { return getData().isCollationSupported(); }
size_t getNumberOfDimensions() const;

View File

@ -93,6 +93,8 @@ public:
bool structureEquals(const IColumn & rhs) const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
void finalize() override { nested->finalize(); }
bool isFinalized() const override { return nested->isFinalized(); }
const ColumnArray & getNestedColumn() const { return assert_cast<const ColumnArray &>(*nested); }
ColumnArray & getNestedColumn() { return assert_cast<ColumnArray &>(*nested); }

View File

@ -732,8 +732,8 @@ void ColumnObject::get(size_t n, Field & res) const
{
assert(n < size());
res = Object();
auto & object = res.get<Object &>();
for (const auto & entry : subcolumns)
{
auto it = object.try_emplace(entry->path.getPath()).first;
@ -744,7 +744,6 @@ void ColumnObject::get(size_t n, Field & res) const
void ColumnObject::insertFrom(const IColumn & src, size_t n)
{
insert(src[n]);
finalize();
}
void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -792,9 +791,8 @@ MutableColumnPtr ColumnObject::applyForSubcolumns(Func && func) const
{
if (!isFinalized())
{
auto finalized = IColumn::mutate(getPtr());
auto finalized = cloneFinalized();
auto & finalized_object = assert_cast<ColumnObject &>(*finalized);
finalized_object.finalize();
return finalized_object.applyForSubcolumns(std::forward<Func>(func));
}

View File

@ -198,10 +198,6 @@ public:
Subcolumns & getSubcolumns() { return subcolumns; }
PathsInData getKeys() const;
/// Finalizes all subcolumns.
void finalize();
bool isFinalized() const;
/// Part of interface
const char * getFamilyName() const override { return "Object"; }
@ -219,12 +215,17 @@ public:
void popBack(size_t length) override;
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumnPtr cloneResized(size_t new_size) const override;
/// Finalizes all subcolumns.
void finalize() override;
bool isFinalized() const override;
/// Order of rows in ColumnObject is undefined.
void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
@ -264,9 +265,7 @@ private:
template <typename Func>
MutableColumnPtr applyForSubcolumns(Func && func) const;
/// For given subcolumn return subcolumn from the same Nested type.
/// It's used to get shared sized of Nested to insert correct default values.
const Subcolumns::Node * getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const;
};
}

View File

@ -570,4 +570,15 @@ void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, siz
return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
}
void ColumnTuple::finalize()
{
for (auto & column : columns)
column->finalize();
}
bool ColumnTuple::isFinalized() const
{
return std::all_of(columns.begin(), columns.end(), [](const auto & column) { return column->isFinalized(); });
}
}

View File

@ -103,6 +103,8 @@ public:
ColumnPtr compress() const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
void finalize() override;
bool isFinalized() const override;
size_t tupleSize() const { return columns.size(); }

View File

@ -453,6 +453,16 @@ public:
return getPtr();
}
/// Some columns may require finalization before using of other operations.
virtual void finalize() {}
virtual bool isFinalized() const { return true; }
MutablePtr cloneFinalized() const
{
auto finalized = IColumn::mutate(getPtr());
finalized->finalize();
return finalized;
}
[[nodiscard]] static MutablePtr mutate(Ptr ptr)
{

View File

@ -3,6 +3,7 @@
#include <Common/Exception.h>
#include <base/types.h>
#include <base/defines.h>
#include "ElementTypes.h"
namespace DB
@ -25,6 +26,7 @@ struct DummyJSONParser
{
public:
Element() = default;
static ElementType type() { return ElementType::NULL_VALUE; }
static bool isInt64() { return false; }
static bool isUInt64() { return false; }
static bool isDouble() { return false; }

View File

@ -0,0 +1,17 @@
#pragma once
namespace DB
{
// Enum values match simdjson's for fast conversion
enum class ElementType
{
ARRAY = '[',
OBJECT = '{',
INT64 = 'l',
UINT64 = 'u',
DOUBLE = 'd',
STRING = '"',
BOOL = 't',
NULL_VALUE = 'n'
};
}

View File

@ -6,7 +6,7 @@
# include <base/types.h>
# include <base/defines.h>
# include <rapidjson/document.h>
# include "ElementTypes.h"
namespace DB
{
@ -26,6 +26,20 @@ struct RapidJSONParser
ALWAYS_INLINE Element() = default;
ALWAYS_INLINE Element(const rapidjson::Value & value_) : ptr(&value_) {} /// NOLINT
ALWAYS_INLINE ElementType type() const
{
switch (ptr->GetType())
{
case rapidjson::kNumberType: return ptr->IsDouble() ? ElementType::DOUBLE : (ptr->IsUint64() ? ElementType::UINT64 : ElementType::INT64);
case rapidjson::kStringType: return ElementType::STRING;
case rapidjson::kArrayType: return ElementType::ARRAY;
case rapidjson::kObjectType: return ElementType::OBJECT;
case rapidjson::kTrueType: return ElementType::BOOL;
case rapidjson::kFalseType: return ElementType::BOOL;
case rapidjson::kNullType: return ElementType::NULL_VALUE;
}
}
ALWAYS_INLINE bool isInt64() const { return ptr->IsInt64(); }
ALWAYS_INLINE bool isUInt64() const { return ptr->IsUint64(); }
ALWAYS_INLINE bool isDouble() const { return ptr->IsDouble(); }

View File

@ -7,7 +7,7 @@
# include <Common/Exception.h>
# include <base/defines.h>
# include <simdjson.h>
# include "ElementTypes.h"
namespace DB
{
@ -31,6 +31,21 @@ struct SimdJSONParser
ALWAYS_INLINE Element() {} /// NOLINT
ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {} /// NOLINT
ALWAYS_INLINE ElementType type() const
{
switch (element.type())
{
case simdjson::dom::element_type::INT64: return ElementType::INT64;
case simdjson::dom::element_type::UINT64: return ElementType::UINT64;
case simdjson::dom::element_type::DOUBLE: return ElementType::DOUBLE;
case simdjson::dom::element_type::STRING: return ElementType::STRING;
case simdjson::dom::element_type::ARRAY: return ElementType::ARRAY;
case simdjson::dom::element_type::OBJECT: return ElementType::OBJECT;
case simdjson::dom::element_type::BOOL: return ElementType::BOOL;
case simdjson::dom::element_type::NULL_VALUE: return ElementType::NULL_VALUE;
}
}
ALWAYS_INLINE bool isInt64() const { return element.type() == simdjson::dom::element_type::INT64; }
ALWAYS_INLINE bool isUInt64() const { return element.type() == simdjson::dom::element_type::UINT64; }
ALWAYS_INLINE bool isDouble() const { return element.type() == simdjson::dom::element_type::DOUBLE; }

View File

@ -48,6 +48,7 @@ public:
bool textCanContainOnlyValidUTF8() const override { return nested->textCanContainOnlyValidUTF8(); }
bool isComparable() const override { return nested->isComparable(); }
bool canBeComparedWithCollation() const override { return nested->canBeComparedWithCollation(); }
bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); }
bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override
{

View File

@ -22,6 +22,27 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
DataTypeMap::DataTypeMap(const DataTypePtr & nested_)
: nested(nested_)
{
const auto * type_array = typeid_cast<const DataTypeArray *>(nested.get());
if (!type_array)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_array->getNestedType().get());
if (!type_tuple)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
if (type_tuple->getElements().size() != 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expected Array(Tuple(key, value)) type, got {}", nested->getName());
key_type = type_tuple->getElement(0);
value_type = type_tuple->getElement(1);
assertKeyType();
}
DataTypeMap::DataTypeMap(const DataTypes & elems_)
{

View File

@ -23,6 +23,7 @@ private:
public:
static constexpr bool is_parametric = true;
explicit DataTypeMap(const DataTypePtr & nested_);
explicit DataTypeMap(const DataTypes & elems);
DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & value_type_);
@ -40,6 +41,7 @@ public:
bool isComparable() const override { return key_type->isComparable() && value_type->isComparable(); }
bool isParametric() const override { return true; }
bool haveSubtypes() const override { return true; }
bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); }
const DataTypePtr & getKeyType() const { return key_type; }
const DataTypePtr & getValueType() const { return value_type; }

View File

@ -36,6 +36,7 @@ public:
bool haveSubtypes() const override { return false; }
bool equals(const IDataType & rhs) const override;
bool isParametric() const override { return true; }
bool hasDynamicSubcolumns() const override { return true; }
SerializationPtr doGetDefaultSerialization() const override;

View File

@ -247,6 +247,11 @@ bool DataTypeTuple::haveMaximumSizeOfValue() const
return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); });
}
bool DataTypeTuple::hasDynamicSubcolumns() const
{
return std::any_of(elems.begin(), elems.end(), [](auto && elem) { return elem->hasDynamicSubcolumns(); });
}
bool DataTypeTuple::isComparable() const
{
return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->isComparable(); });

View File

@ -50,6 +50,7 @@ public:
bool isComparable() const override;
bool textCanContainOnlyValidUTF8() const override;
bool haveMaximumSizeOfValue() const override;
bool hasDynamicSubcolumns() const override;
size_t getMaximumSizeOfValueInMemory() const override;
size_t getSizeOfValueInMemory() const override;

View File

@ -291,6 +291,9 @@ public:
/// Strings, Numbers, Date, DateTime, Nullable
virtual bool canBeInsideLowCardinality() const { return false; }
/// Object, Array(Object), Tuple(..., Object, ...)
virtual bool hasDynamicSubcolumns() const { return false; }
/// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column.
static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint);

View File

@ -1,17 +1,19 @@
#include <Storages/StorageSnapshot.h>
#include <DataTypes/ObjectUtils.h>
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNested.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/getLeastSupertype.h>
#include <DataTypes/NestedUtils.h>
#include <Storages/StorageSnapshot.h>
#include <Columns/ColumnObject.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnNullable.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTExpressionList.h>
@ -105,10 +107,11 @@ Array createEmptyArrayField(size_t num_dimensions)
DataTypePtr getDataTypeByColumn(const IColumn & column)
{
auto idx = column.getDataType();
if (WhichDataType(idx).isSimple())
WhichDataType which(idx);
if (which.isSimple())
return DataTypeFactory::instance().get(String(magic_enum::enum_name(idx)));
if (WhichDataType(idx).isNothing())
if (which.isNothing())
return std::make_shared<DataTypeNothing>();
if (const auto * column_array = checkAndGetColumn<ColumnArray>(&column))
@ -132,41 +135,124 @@ static auto extractVector(const std::vector<Tuple> & vec)
return res;
}
void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_storage_columns)
static DataTypePtr recreateTupleWithElements(const DataTypeTuple & type_tuple, const DataTypes & elements)
{
std::unordered_map<String, DataTypePtr> storage_columns_map;
for (const auto & [name, type] : extended_storage_columns)
storage_columns_map[name] = type;
for (auto & column : block)
{
if (!isObject(column.type))
continue;
const auto & column_object = assert_cast<const ColumnObject &>(*column.column);
if (!column_object.isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cannot convert to tuple column '{}' from type {}. Column should be finalized first",
column.name, column.type->getName());
std::tie(column.column, column.type) = unflattenObjectToTuple(column_object);
auto it = storage_columns_map.find(column.name);
if (it == storage_columns_map.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", column.name);
/// Check that constructed Tuple type and type in storage are compatible.
getLeastCommonTypeForObject({column.type, it->second}, true);
}
return type_tuple.haveExplicitNames()
? std::make_shared<DataTypeTuple>(elements, type_tuple.getElementNames())
: std::make_shared<DataTypeTuple>(elements);
}
void deduceTypesOfObjectColumns(const StorageSnapshotPtr & storage_snapshot, Block & block)
static std::pair<ColumnPtr, DataTypePtr> convertObjectColumnToTuple(
const ColumnObject & column_object, const DataTypeObject & type_object)
{
if (!storage_snapshot->object_columns.empty())
if (!column_object.isFinalized())
{
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects();
auto storage_columns = storage_snapshot->getColumns(options);
convertObjectsToTuples(block, storage_columns);
auto finalized = column_object.cloneFinalized();
const auto & finalized_object = assert_cast<const ColumnObject &>(*finalized);
return convertObjectColumnToTuple(finalized_object, type_object);
}
const auto & subcolumns = column_object.getSubcolumns();
PathsInData tuple_paths;
DataTypes tuple_types;
Columns tuple_columns;
for (const auto & entry : subcolumns)
{
tuple_paths.emplace_back(entry->path);
tuple_types.emplace_back(entry->data.getLeastCommonType());
tuple_columns.emplace_back(entry->data.getFinalizedColumnPtr());
}
return unflattenTuple(tuple_paths, tuple_types, tuple_columns);
}
static std::pair<ColumnPtr, DataTypePtr> recursivlyConvertDynamicColumnToTuple(
const ColumnPtr & column, const DataTypePtr & type)
{
if (!type->hasDynamicSubcolumns())
return {column, type};
if (const auto * type_object = typeid_cast<const DataTypeObject *>(type.get()))
{
const auto & column_object = assert_cast<const ColumnObject &>(*column);
return convertObjectColumnToTuple(column_object, *type_object);
}
if (const auto * type_array = typeid_cast<const DataTypeArray *>(type.get()))
{
const auto & column_array = assert_cast<const ColumnArray &>(*column);
auto [new_column, new_type] = recursivlyConvertDynamicColumnToTuple(
column_array.getDataPtr(), type_array->getNestedType());
return
{
ColumnArray::create(new_column, column_array.getOffsetsPtr()),
std::make_shared<DataTypeArray>(std::move(new_type)),
};
}
if (const auto * type_map = typeid_cast<const DataTypeMap *>(type.get()))
{
const auto & column_map = assert_cast<const ColumnMap &>(*column);
auto [new_column, new_type] = recursivlyConvertDynamicColumnToTuple(
column_map.getNestedColumnPtr(), type_map->getNestedType());
return
{
ColumnMap::create(new_column),
std::make_shared<DataTypeMap>(std::move(new_type)),
};
}
if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
{
const auto & tuple_columns = assert_cast<const ColumnTuple &>(*column).getColumns();
const auto & tuple_types = type_tuple->getElements();
assert(tuple_columns.size() == tuple_types.size());
const size_t tuple_size = tuple_types.size();
Columns new_tuple_columns(tuple_size);
DataTypes new_tuple_types(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
std::tie(new_tuple_columns[i], new_tuple_types[i])
= recursivlyConvertDynamicColumnToTuple(tuple_columns[i], tuple_types[i]);
}
return
{
ColumnTuple::create(new_tuple_columns),
recreateTupleWithElements(*type_tuple, new_tuple_types)
};
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type->getName());
}
void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & storage_snapshot)
{
for (auto & column : block)
{
if (!column.type->hasDynamicSubcolumns())
continue;
std::tie(column.column, column.type)
= recursivlyConvertDynamicColumnToTuple(column.column, column.type);
GetColumnsOptions options(GetColumnsOptions::AllPhysical);
auto storage_column = storage_snapshot->tryGetColumn(options, column.name);
if (!storage_column)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", column.name);
auto storage_column_concrete = storage_snapshot->getColumn(options.withExtendedObjects(), column.name);
/// Check that constructed Tuple type and type in storage are compatible.
getLeastCommonTypeForDynamicColumns(
storage_column->type, {column.type, storage_column_concrete.type}, true);
}
}
@ -217,24 +303,8 @@ void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
}
}
DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambiguos_paths)
static DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambiguos_paths)
{
if (types.empty())
return nullptr;
bool all_equal = true;
for (size_t i = 1; i < types.size(); ++i)
{
if (!types[i]->equals(*types[0]))
{
all_equal = false;
break;
}
}
if (all_equal)
return types[0];
/// Types of subcolumns by path from all tuples.
std::unordered_map<PathInData, DataTypes, PathInData::Hash> subcolumns_types;
@ -287,19 +357,139 @@ DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambi
return unflattenTuple(tuple_paths, tuple_types);
}
NameSet getNamesOfObjectColumns(const NamesAndTypesList & columns_list)
{
NameSet res;
for (const auto & [name, type] : columns_list)
if (isObject(type))
res.insert(name);
static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl(
const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths);
return res;
template<typename Type>
static DataTypePtr getLeastCommonTypeForColumnWithNestedType(
const Type & type, const DataTypes & concrete_types, bool check_ambiguos_paths)
{
DataTypes nested_types;
nested_types.reserve(concrete_types.size());
for (const auto & concrete_type : concrete_types)
{
const auto * type_with_nested_conctete = typeid_cast<const Type *>(concrete_type.get());
if (!type_with_nested_conctete)
throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected {} type, got {}", demangle(typeid(Type).name()), concrete_type->getName());
nested_types.push_back(type_with_nested_conctete->getNestedType());
}
return std::make_shared<Type>(
getLeastCommonTypeForDynamicColumnsImpl(
type.getNestedType(), nested_types, check_ambiguos_paths));
}
bool hasObjectColumns(const ColumnsDescription & columns)
static DataTypePtr getLeastCommonTypeForTuple(
const DataTypeTuple & type, const DataTypes & concrete_types, bool check_ambiguos_paths)
{
return std::any_of(columns.begin(), columns.end(), [](const auto & column) { return isObject(column.type); });
const auto & element_types = type.getElements();
DataTypes new_element_types(element_types.size());
for (size_t i = 0; i < element_types.size(); ++i)
{
DataTypes concrete_element_types;
concrete_element_types.reserve(concrete_types.size());
for (const auto & type_concrete : concrete_types)
{
const auto * type_tuple_conctete = typeid_cast<const DataTypeTuple *>(type_concrete.get());
if (!type_tuple_conctete)
throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected Tuple type, got {}", type_concrete->getName());
concrete_element_types.push_back(type_tuple_conctete->getElement(i));
}
new_element_types[i] = getLeastCommonTypeForDynamicColumnsImpl(
element_types[i], concrete_element_types, check_ambiguos_paths);
}
return recreateTupleWithElements(type, new_element_types);
}
static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl(
const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths)
{
if (!type_in_storage->hasDynamicSubcolumns())
return type_in_storage;
if (isObject(type_in_storage))
return getLeastCommonTypeForObject(concrete_types, check_ambiguos_paths);
if (const auto * type_array = typeid_cast<const DataTypeArray *>(type_in_storage.get()))
return getLeastCommonTypeForColumnWithNestedType(*type_array, concrete_types, check_ambiguos_paths);
if (const auto * type_map = typeid_cast<const DataTypeMap *>(type_in_storage.get()))
return getLeastCommonTypeForColumnWithNestedType(*type_map, concrete_types, check_ambiguos_paths);
if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_in_storage.get()))
return getLeastCommonTypeForTuple(*type_tuple, concrete_types, check_ambiguos_paths);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
}
DataTypePtr getLeastCommonTypeForDynamicColumns(
const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths)
{
if (concrete_types.empty())
return nullptr;
bool all_equal = true;
for (size_t i = 1; i < concrete_types.size(); ++i)
{
if (!concrete_types[i]->equals(*concrete_types[0]))
{
all_equal = false;
break;
}
}
if (all_equal)
return concrete_types[0];
return getLeastCommonTypeForDynamicColumnsImpl(type_in_storage, concrete_types, check_ambiguos_paths);
}
DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage)
{
if (!type_in_storage->hasDynamicSubcolumns())
return type_in_storage;
if (isObject(type_in_storage))
return std::make_shared<DataTypeTuple>(
DataTypes{std::make_shared<DataTypeUInt8>()}, Names{ColumnObject::COLUMN_NAME_DUMMY});
if (const auto * type_array = typeid_cast<const DataTypeArray *>(type_in_storage.get()))
return std::make_shared<DataTypeArray>(
createConcreteEmptyDynamicColumn(type_array->getNestedType()));
if (const auto * type_map = typeid_cast<const DataTypeMap *>(type_in_storage.get()))
return std::make_shared<DataTypeMap>(
createConcreteEmptyDynamicColumn(type_map->getNestedType()));
if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_in_storage.get()))
{
const auto & elements = type_tuple->getElements();
DataTypes new_elements;
new_elements.reserve(elements.size());
for (const auto & element : elements)
new_elements.push_back(createConcreteEmptyDynamicColumn(element));
return recreateTupleWithElements(*type_tuple, new_elements);
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
}
bool hasDynamicSubcolumns(const ColumnsDescription & columns)
{
return std::any_of(columns.begin(), columns.end(),
[](const auto & column)
{
return column.type->hasDynamicSubcolumns();
});
}
void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescription & object_columns, bool with_subcolumns)
@ -320,16 +510,20 @@ void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescript
columns_list.splice(columns_list.end(), std::move(subcolumns_list));
}
void updateObjectColumns(ColumnsDescription & object_columns, const NamesAndTypesList & new_columns)
void updateObjectColumns(
ColumnsDescription & object_columns,
const ColumnsDescription & storage_columns,
const NamesAndTypesList & new_columns)
{
for (const auto & new_column : new_columns)
{
auto object_column = object_columns.tryGetColumn(GetColumnsOptions::All, new_column.name);
if (object_column && !object_column->type->equals(*new_column.type))
{
auto storage_column = storage_columns.getColumn(GetColumnsOptions::All, new_column.name);
object_columns.modify(new_column.name, [&](auto & column)
{
column.type = getLeastCommonTypeForObject({object_column->type, new_column.type});
column.type = getLeastCommonTypeForDynamicColumns(storage_column.type, {object_column->type, new_column.type});
});
}
}
@ -745,13 +939,6 @@ void replaceMissedSubcolumnsByConstants(
addConstantToWithClause(query, name, type);
}
void finalizeObjectColumns(const MutableColumns & columns)
{
for (const auto & column : columns)
if (auto * column_object = typeid_cast<ColumnObject *>(column.get()))
column_object->finalize();
}
Field FieldVisitorReplaceScalars::operator()(const Array & x) const
{
if (num_dimensions_to_keep == 0)
@ -768,11 +955,13 @@ size_t FieldVisitorToNumberOfDimensions::operator()(const Array & x)
{
const size_t size = x.size();
size_t dimensions = 0;
for (size_t i = 0; i < size; ++i)
{
size_t element_dimensions = applyVisitor(*this, x[i]);
if (i > 0 && element_dimensions != dimensions)
need_fold_dimension = true;
dimensions = std::max(dimensions, element_dimensions);
}
@ -783,12 +972,13 @@ Field FieldVisitorFoldDimension::operator()(const Array & x) const
{
if (num_dimensions_to_fold == 0)
return x;
const size_t size = x.size();
Array res(size);
for (size_t i = 0; i < size; ++i)
{
res[i] = applyVisitor(FieldVisitorFoldDimension(num_dimensions_to_fold - 1), x[i]);
}
return res;
}
}

View File

@ -39,27 +39,31 @@ Array createEmptyArrayField(size_t num_dimensions);
DataTypePtr getDataTypeByColumn(const IColumn & column);
/// Converts Object types and columns to Tuples in @columns_list and @block
/// and checks that types are consistent with types in @extended_storage_columns.
void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_storage_columns);
void deduceTypesOfObjectColumns(const StorageSnapshotPtr & storage_snapshot, Block & block);
/// and checks that types are consistent with types in @storage_snapshot.
void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & storage_snapshot);
/// Checks that each path is not the prefix of any other path.
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths);
/// Receives several Tuple types and deduces the least common type among them.
DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambiguos_paths = false);
DataTypePtr getLeastCommonTypeForDynamicColumns(
const DataTypePtr & type_in_storage, const DataTypes & types, bool check_ambiguos_paths = false);
DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage);
/// Converts types of object columns to tuples in @columns_list
/// according to @object_columns and adds all tuple's subcolumns if needed.
void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescription & object_columns, bool with_subcolumns);
NameSet getNamesOfObjectColumns(const NamesAndTypesList & columns_list);
bool hasObjectColumns(const ColumnsDescription & columns);
void finalizeObjectColumns(const MutableColumns & columns);
/// Checks whether @columns contain any column with dynamic subcolumns.
bool hasDynamicSubcolumns(const ColumnsDescription & columns);
/// Updates types of objects in @object_columns inplace
/// according to types in new_columns.
void updateObjectColumns(ColumnsDescription & object_columns, const NamesAndTypesList & new_columns);
void updateObjectColumns(
ColumnsDescription & object_columns,
const ColumnsDescription & storage_columns,
const NamesAndTypesList & new_columns);
using DataTypeTuplePtr = std::shared_ptr<DataTypeTuple>;
@ -142,13 +146,15 @@ public:
{
if (num_dimensions_to_fold == 0)
return x;
Array res(1,x);
Array res(1, x);
for (size_t i = 1; i < num_dimensions_to_fold; ++i)
{
Array new_res;
new_res.push_back(std::move(res));
res = std::move(new_res);
}
return res;
}
@ -163,7 +169,7 @@ private:
/// columns-like objects from entry to which Iterator points.
/// columns-like object should have fields "name" and "type".
template <typename Iterator, typename EntryColumnsGetter>
ColumnsDescription getObjectColumns(
ColumnsDescription getConcreteObjectColumns(
Iterator begin, Iterator end,
const ColumnsDescription & storage_columns,
EntryColumnsGetter && entry_columns_getter)
@ -176,14 +182,8 @@ ColumnsDescription getObjectColumns(
/// dummy column will be removed.
for (const auto & column : storage_columns)
{
if (isObject(column.type))
{
auto tuple_type = std::make_shared<DataTypeTuple>(
DataTypes{std::make_shared<DataTypeUInt8>()},
Names{ColumnObject::COLUMN_NAME_DUMMY});
types_in_entries[column.name].push_back(std::move(tuple_type));
}
if (column.type->hasDynamicSubcolumns())
types_in_entries[column.name].push_back(createConcreteEmptyDynamicColumn(column.type));
}
for (auto it = begin; it != end; ++it)
@ -192,14 +192,17 @@ ColumnsDescription getObjectColumns(
for (const auto & column : entry_columns)
{
auto storage_column = storage_columns.tryGetPhysical(column.name);
if (storage_column && isObject(storage_column->type))
if (storage_column && storage_column->type->hasDynamicSubcolumns())
types_in_entries[column.name].push_back(column.type);
}
}
ColumnsDescription res;
for (const auto & [name, types] : types_in_entries)
res.add({name, getLeastCommonTypeForObject(types)});
{
auto storage_column = storage_columns.getPhysical(name);
res.add({name, getLeastCommonTypeForDynamicColumns(storage_column.type, types)});
}
return res;
}

View File

@ -249,7 +249,9 @@ public:
};
/// Call before serializeBinaryBulkWithMultipleStreams chain to write something before first mark.
/// Column may be used only to retrieve the structure.
virtual void serializeBinaryBulkStatePrefix(
const IColumn & /*column*/,
SerializeBinaryBulkSettings & /*settings*/,
SerializeBinaryBulkStatePtr & /*state*/) const {}

View File

@ -246,11 +246,13 @@ void SerializationArray::enumerateStreams(
}
void SerializationArray::serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::ArrayElements);
nested->serializeBinaryBulkStatePrefix(settings, state);
const auto & column_array = assert_cast<const ColumnArray &>(column);
nested->serializeBinaryBulkStatePrefix(column_array.getData(), settings, state);
settings.path.pop_back();
}

View File

@ -41,6 +41,7 @@ public:
const SubstreamData & data) const override;
void serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;

View File

@ -221,6 +221,7 @@ struct DeserializeStateLowCardinality : public ISerialization::DeserializeBinary
};
void SerializationLowCardinality::serializeBinaryBulkStatePrefix(
const IColumn & /*column*/,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{

View File

@ -23,6 +23,7 @@ public:
const SubstreamData & data) const override;
void serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;

View File

@ -270,10 +270,11 @@ void SerializationMap::enumerateStreams(
}
void SerializationMap::serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
nested->serializeBinaryBulkStatePrefix(settings, state);
nested->serializeBinaryBulkStatePrefix(extractNestedColumn(column), settings, state);
}
void SerializationMap::serializeBinaryBulkStateSuffix(

View File

@ -37,6 +37,7 @@ public:
const SubstreamData & data) const override;
void serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;

View File

@ -17,11 +17,12 @@ void SerializationNamed::enumerateStreams(
}
void SerializationNamed::serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
addToPath(settings.path);
nested_serialization->serializeBinaryBulkStatePrefix(settings, state);
nested_serialization->serializeBinaryBulkStatePrefix(column, settings, state);
settings.path.pop_back();
}

View File

@ -31,6 +31,7 @@ public:
const SubstreamData & data) const override;
void serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;

View File

@ -70,11 +70,13 @@ void SerializationNullable::enumerateStreams(
}
void SerializationNullable::serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::NullableElements);
nested->serializeBinaryBulkStatePrefix(settings, state);
const auto & column_nullable = assert_cast<const ColumnNullable &>(column);
nested->serializeBinaryBulkStatePrefix(column_nullable.getNestedColumn(), settings, state);
settings.path.pop_back();
}

View File

@ -19,6 +19,7 @@ public:
const SubstreamData & data) const override;
void serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;

View File

@ -13,8 +13,6 @@
#include <Columns/ColumnString.h>
#include <Functions/FunctionsConversion.h>
#include <Common/FieldVisitorToString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/VarInt.h>
@ -30,6 +28,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int INCORRECT_DATA;
extern const int CANNOT_READ_ALL_DATA;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int LOGICAL_ERROR;
}
@ -141,7 +140,6 @@ void SerializationObject<Parser>::checkSerializationIsSupported(const TSettings
template <typename Parser>
struct SerializationObject<Parser>::SerializeStateObject : public ISerialization::SerializeBinaryBulkState
{
bool is_first = true;
DataTypePtr nested_type;
SerializationPtr nested_serialization;
SerializeBinaryBulkStatePtr nested_state;
@ -158,6 +156,7 @@ struct SerializationObject<Parser>::DeserializeStateObject : public ISerializati
template <typename Parser>
void SerializationObject<Parser>::serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
@ -166,15 +165,34 @@ void SerializationObject<Parser>::serializeBinaryBulkStatePrefix(
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"DataTypeObject doesn't support serialization with non-trivial state");
const auto & column_object = assert_cast<const ColumnObject &>(column);
if (!column_object.isFinalized())
{
auto finalized = column_object.cloneFinalized();
serializeBinaryBulkStatePrefix(*finalized, settings, state);
return;
}
settings.path.push_back(Substream::ObjectStructure);
auto * stream = settings.getter(settings.path);
settings.path.pop_back();
if (!stream)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for kind of binary serialization");
auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object);
writeIntBinary(static_cast<UInt8>(BinarySerializationKind::TUPLE), *stream);
state = std::make_shared<SerializeStateObject>();
writeStringBinary(tuple_type->getName(), *stream);
auto state_object = std::make_shared<SerializeStateObject>();
state_object->nested_type = tuple_type;
state_object->nested_serialization = tuple_type->getDefaultSerialization();
settings.path.back() = Substream::ObjectData;
state_object->nested_serialization->serializeBinaryBulkStatePrefix(*tuple_column, settings, state_object->nested_state);
state = std::move(state_object);
settings.path.pop_back();
}
template <typename Parser>
@ -261,33 +279,14 @@ void SerializationObject<Parser>::serializeBinaryBulkWithMultipleStreams(
if (!column_object.isFinalized())
{
auto finalized_object = column_object.clone();
assert_cast<ColumnObject &>(*finalized_object).finalize();
serializeBinaryBulkWithMultipleStreams(*finalized_object, offset, limit, settings, state);
auto finalized = column_object.cloneFinalized();
serializeBinaryBulkWithMultipleStreams(*finalized, offset, limit, settings, state);
return;
}
auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object);
if (state_object->is_first)
{
/// Actually it's a part of serializeBinaryBulkStatePrefix,
/// but it cannot be done there, because we have to know the
/// structure of column.
settings.path.push_back(Substream::ObjectStructure);
if (auto * stream = settings.getter(settings.path))
writeStringBinary(tuple_type->getName(), *stream);
state_object->nested_type = tuple_type;
state_object->nested_serialization = tuple_type->getDefaultSerialization();
state_object->is_first = false;
settings.path.back() = Substream::ObjectData;
state_object->nested_serialization->serializeBinaryBulkStatePrefix(settings, state_object->nested_state);
settings.path.pop_back();
}
else if (!state_object->nested_type->equals(*tuple_type))
if (!state_object->nested_type->equals(*tuple_type))
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Types of internal column of Object mismatched. Expected: {}, Got: {}",
@ -411,18 +410,63 @@ void SerializationObject<Parser>::serializeTextImpl(const IColumn & column, size
writeChar('{', ostr);
for (auto it = subcolumns.begin(); it != subcolumns.end(); ++it)
{
const auto & entry = *it;
if (it != subcolumns.begin())
writeCString(",", ostr);
writeDoubleQuoted((*it)->path.getPath(), ostr);
writeDoubleQuoted(entry->path.getPath(), ostr);
writeChar(':', ostr);
auto serialization = (*it)->data.getLeastCommonType()->getDefaultSerialization();
serialization->serializeTextJSON((*it)->data.getFinalizedColumn(), row_num, ostr, settings);
serializeTextFromSubcolumn(entry->data, row_num, ostr, settings);
}
writeChar('}', ostr);
}
template <typename Parser>
void SerializationObject<Parser>::serializeTextFromSubcolumn(
const ColumnObject::Subcolumn & subcolumn, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const auto & least_common_type = subcolumn.getLeastCommonType();
if (subcolumn.isFinalized())
{
const auto & finalized_column = subcolumn.getFinalizedColumn();
auto info = least_common_type->getSerializationInfo(finalized_column);
auto serialization = least_common_type->getSerialization(*info);
serialization->serializeTextJSON(finalized_column, row_num, ostr, settings);
return;
}
size_t ind = row_num;
if (ind < subcolumn.getNumberOfDefaultsInPrefix())
{
/// Suboptimal, but it should happen rarely.
auto tmp_column = subcolumn.getLeastCommonType()->createColumn();
tmp_column->insertDefault();
auto info = least_common_type->getSerializationInfo(*tmp_column);
auto serialization = least_common_type->getSerialization(*info);
serialization->serializeTextJSON(*tmp_column, 0, ostr, settings);
return;
}
ind -= subcolumn.getNumberOfDefaultsInPrefix();
for (const auto & part : subcolumn.getData())
{
if (ind < part->size())
{
auto part_type = getDataTypeByColumn(*part);
auto info = part_type->getSerializationInfo(*part);
auto serialization = part_type->getSerialization(*info);
serialization->serializeTextJSON(*part, ind, ostr, settings);
return;
}
ind -= part->size();
}
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index ({}) for text serialization is out of range", row_num);
}
template <typename Parser>
void SerializationObject<Parser>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{

View File

@ -8,7 +8,7 @@ namespace DB
{
/** Serialization for data type Object.
* Supported only test serialization/deserialization.
* Supported only text serialization/deserialization.
* and binary bulk serialization/deserialization without position independent
* encoding, i.e. serialization/deserialization into Native format.
*/
@ -31,6 +31,7 @@ public:
*/
void serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
@ -104,6 +105,7 @@ private:
void deserializeTextImpl(IColumn & column, Reader && reader) const;
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;
void serializeTextFromSubcolumn(const ColumnObject::Subcolumn & subcolumn, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;
/// Pool of parser objects to make SerializationObject thread safe.
mutable SimpleObjectPool<Parser> parsers_pool;

View File

@ -178,11 +178,16 @@ void SerializationSparse::enumerateStreams(
}
void SerializationSparse::serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::SparseElements);
nested->serializeBinaryBulkStatePrefix(settings, state);
if (const auto * column_sparse = typeid_cast<const ColumnSparse *>(&column))
nested->serializeBinaryBulkStatePrefix(column_sparse->getValuesColumn(), settings, state);
else
nested->serializeBinaryBulkStatePrefix(column, settings, state);
settings.path.pop_back();
}

View File

@ -33,6 +33,7 @@ public:
const SubstreamData & data) const override;
void serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;

View File

@ -314,6 +314,7 @@ struct DeserializeBinaryBulkStateTuple : public ISerialization::DeserializeBinar
void SerializationTuple::serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
@ -321,7 +322,7 @@ void SerializationTuple::serializeBinaryBulkStatePrefix(
tuple_state->states.resize(elems.size());
for (size_t i = 0; i < elems.size(); ++i)
elems[i]->serializeBinaryBulkStatePrefix(settings, tuple_state->states[i]);
elems[i]->serializeBinaryBulkStatePrefix(extractElementColumn(column, i), settings, tuple_state->states[i]);
state = std::move(tuple_state);
}

View File

@ -39,6 +39,7 @@ public:
const SubstreamData & data) const override;
void serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;

View File

@ -13,10 +13,11 @@ void SerializationWrapper::enumerateStreams(
}
void SerializationWrapper::serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
nested_serialization->serializeBinaryBulkStatePrefix(settings, state);
nested_serialization->serializeBinaryBulkStatePrefix(column, settings, state);
}
void SerializationWrapper::serializeBinaryBulkStateSuffix(

View File

@ -26,6 +26,7 @@ public:
const SubstreamData & data) const override;
void serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;

View File

@ -31,7 +31,7 @@ TEST(SerializationObject, FromString)
settings.getter = [&out](const auto &) { return &out; };
writeIntBinary(static_cast<UInt8>(1), out);
serialization->serializeBinaryBulkStatePrefix(settings, state);
serialization->serializeBinaryBulkStatePrefix(*column_string, settings, state);
serialization->serializeBinaryBulkWithMultipleStreams(*column_string, 0, column_string->size(), settings, state);
serialization->serializeBinaryBulkStateSuffix(settings, state);
}

View File

@ -859,7 +859,7 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo
result += fmt::format(
", use_best_effort_in_schema_inference={}, bool_true_representation={}, bool_false_representation={},"
" null_representation={}, delimiter={}, tuple_delimiter={}",
settings.tsv.use_best_effort_in_schema_inference,
settings.csv.use_best_effort_in_schema_inference,
settings.bool_true_representation,
settings.bool_false_representation,
settings.csv.null_representation,

View File

@ -58,7 +58,7 @@ static void writeData(const ISerialization & serialization, const ColumnPtr & co
settings.low_cardinality_max_dictionary_size = 0; //-V1048
ISerialization::SerializeBinaryBulkStatePtr state;
serialization.serializeBinaryBulkStatePrefix(settings, state);
serialization.serializeBinaryBulkStatePrefix(*full_column, settings, state);
serialization.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
serialization.serializeBinaryBulkStateSuffix(settings, state);
}

View File

@ -3360,9 +3360,8 @@ private:
{
return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count)
{
auto res = ConvertImplGenericFromString<ColumnString>::execute(arguments, result_type, nullable_source, input_rows_count);
auto & res_object = assert_cast<ColumnObject &>(res->assumeMutableRef());
res_object.finalize();
auto res = ConvertImplGenericFromString<ColumnString>::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable();
res->finalize();
return res;
};
}

View File

@ -25,7 +25,6 @@
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNothing.h>
@ -40,6 +39,7 @@
#include <Common/JSONParsers/RapidJSONParser.h>
#include <Functions/FunctionHelpers.h>
#include <IO/readDecimalText.h>
#include <Interpreters/Context.h>
@ -191,7 +191,7 @@ private:
for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments))
{
const auto & column = columns[i];
if (!isString(column.type) && !isInteger(column.type))
if (!isString(column.type) && !isNativeInteger(column.type))
throw Exception{"The argument " + std::to_string(i + 1) + " of function " + String(function_name)
+ " should be a string specifying key or an integer specifying index, illegal type: " + column.type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
@ -623,24 +623,32 @@ public:
static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view)
{
UInt8 type;
if (element.isInt64())
type = 'i';
else if (element.isUInt64())
type = 'u';
else if (element.isDouble())
type = 'd';
else if (element.isBool())
type = 'b';
else if (element.isString())
type = '"';
else if (element.isArray())
type = '[';
else if (element.isObject())
type = '{';
else if (element.isNull())
type = 0;
else
return false;
switch (element.type())
{
case ElementType::INT64:
type = 'i';
break;
case ElementType::UINT64:
type = 'u';
break;
case ElementType::DOUBLE:
type = 'd';
break;
case ElementType::STRING:
type = '"';
break;
case ElementType::ARRAY:
type = '[';
break;
case ElementType::OBJECT:
type = '{';
break;
case ElementType::NULL_VALUE:
type = 0;
break;
default:
return false;
}
ColumnVector<Int8> & col_vec = assert_cast<ColumnVector<Int8> &>(dest);
col_vec.insertValue(type);
@ -666,34 +674,51 @@ public:
{
NumberType value;
if (element.isInt64())
switch (element.type())
{
if (!accurate::convertNumeric(element.getInt64(), value))
case ElementType::DOUBLE:
if constexpr (std::is_floating_point_v<NumberType>)
{
/// We permit inaccurate conversion of double to float.
/// Example: double 0.1 from JSON is not representable in float.
/// But it will be more convenient for user to perform conversion.
value = static_cast<NumberType>(element.getDouble());
}
else if (!accurate::convertNumeric<Float64, NumberType, false>(element.getDouble(), value))
return false;
break;
case ElementType::UINT64:
if (!accurate::convertNumeric<UInt64, NumberType, false>(element.getUInt64(), value))
return false;
break;
case ElementType::INT64:
if (!accurate::convertNumeric<Int64, NumberType, false>(element.getInt64(), value))
return false;
break;
case ElementType::BOOL:
if constexpr (is_integer<NumberType> && convert_bool_to_integer)
{
value = static_cast<NumberType>(element.getBool());
break;
}
return false;
}
else if (element.isUInt64())
{
if (!accurate::convertNumeric(element.getUInt64(), value))
return false;
}
else if (element.isDouble())
{
if constexpr (std::is_floating_point_v<NumberType>)
{
/// We permit inaccurate conversion of double to float.
/// Example: double 0.1 from JSON is not representable in float.
/// But it will be more convenient for user to perform conversion.
value = static_cast<NumberType>(element.getDouble());
case ElementType::STRING: {
auto rb = ReadBufferFromMemory{element.getString()};
if constexpr (std::is_floating_point_v<NumberType>)
{
if (!tryReadFloatText(value, rb) || !rb.eof())
return false;
}
else
{
if (!tryReadIntText(value, rb) || !rb.eof())
return false;
}
break;
}
else if (!accurate::convertNumeric(element.getDouble(), value))
default:
return false;
}
else if (element.isBool() && is_integer<NumberType> && convert_bool_to_integer)
{
value = static_cast<NumberType>(element.getBool());
}
else
return false;
auto & col_vec = assert_cast<ColumnVector<NumberType> &>(dest);
col_vec.insertValue(value);
@ -719,9 +744,25 @@ using JSONExtractInt64Impl = JSONExtractNumericImpl<JSONParser, Int64>;
template <typename JSONParser>
using JSONExtractUInt64Impl = JSONExtractNumericImpl<JSONParser, UInt64>;
template <typename JSONParser>
using JSONExtractInt128Impl = JSONExtractNumericImpl<JSONParser, Int128>;
template <typename JSONParser>
using JSONExtractUInt128Impl = JSONExtractNumericImpl<JSONParser, UInt128>;
template <typename JSONParser>
using JSONExtractInt256Impl = JSONExtractNumericImpl<JSONParser, Int256>;
template <typename JSONParser>
using JSONExtractUInt256Impl = JSONExtractNumericImpl<JSONParser, UInt256>;
template <typename JSONParser>
using JSONExtractFloat32Impl = JSONExtractNumericImpl<JSONParser, Float32>;
template <typename JSONParser>
using JSONExtractFloat64Impl = JSONExtractNumericImpl<JSONParser, Float64>;
template <typename JSONParser>
using JSONExtractDecimal32Impl = JSONExtractNumericImpl<JSONParser, Decimal32>;
template <typename JSONParser>
using JSONExtractDecimal64Impl = JSONExtractNumericImpl<JSONParser, Decimal64>;
template <typename JSONParser>
using JSONExtractDecimal128Impl = JSONExtractNumericImpl<JSONParser, Decimal128>;
template <typename JSONParser>
using JSONExtractDecimal256Impl = JSONExtractNumericImpl<JSONParser, Decimal256>;
template <typename JSONParser>
@ -739,11 +780,22 @@ public:
static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view)
{
if (!element.isBool())
return false;
bool value;
switch (element.type())
{
case ElementType::BOOL:
value = element.getBool();
break;
case ElementType::INT64:
case ElementType::UINT64:
value = element.getUInt64() != 0;
break;
default:
return false;
}
auto & col_vec = assert_cast<ColumnVector<UInt8> &>(dest);
col_vec.insertValue(static_cast<UInt8>(element.getBool()));
col_vec.insertValue(static_cast<UInt8>(value));
return true;
}
};
@ -845,12 +897,35 @@ struct JSONExtractTree
explicit DecimalNode(DataTypePtr data_type_) : data_type(data_type_) {}
bool insertResultToColumn(IColumn & dest, const Element & element) override
{
if (!element.isDouble())
return false;
const auto * type = assert_cast<const DataTypeDecimal<DecimalType> *>(data_type.get());
auto result = convertToDecimal<DataTypeNumber<Float64>, DataTypeDecimal<DecimalType>>(element.getDouble(), type->getScale());
assert_cast<ColumnDecimal<DecimalType> &>(dest).insert(result);
DecimalType value{};
switch (element.type())
{
case ElementType::DOUBLE:
value = convertToDecimal<DataTypeNumber<Float64>, DataTypeDecimal<DecimalType>>(
element.getDouble(), type->getScale());
break;
case ElementType::UINT64:
value = convertToDecimal<DataTypeNumber<UInt64>, DataTypeDecimal<DecimalType>>(
element.getUInt64(), type->getScale());
break;
case ElementType::INT64:
value = convertToDecimal<DataTypeNumber<Int64>, DataTypeDecimal<DecimalType>>(
element.getInt64(), type->getScale());
break;
case ElementType::STRING: {
auto rb = ReadBufferFromMemory{element.getString()};
if (!SerializationDecimal<DecimalType>::tryReadText(value, rb, DecimalUtils::max_precision<DecimalType>, type->getScale()))
return false;
break;
}
default:
return false;
}
assert_cast<ColumnDecimal<DecimalType> &>(dest).insert(value);
return true;
}
private:
@ -1088,10 +1163,14 @@ struct JSONExtractTree
case TypeIndex::UInt16: return std::make_unique<NumericNode<UInt16>>();
case TypeIndex::UInt32: return std::make_unique<NumericNode<UInt32>>();
case TypeIndex::UInt64: return std::make_unique<NumericNode<UInt64>>();
case TypeIndex::UInt128: return std::make_unique<NumericNode<UInt128>>();
case TypeIndex::UInt256: return std::make_unique<NumericNode<UInt256>>();
case TypeIndex::Int8: return std::make_unique<NumericNode<Int8>>();
case TypeIndex::Int16: return std::make_unique<NumericNode<Int16>>();
case TypeIndex::Int32: return std::make_unique<NumericNode<Int32>>();
case TypeIndex::Int64: return std::make_unique<NumericNode<Int64>>();
case TypeIndex::Int128: return std::make_unique<NumericNode<Int128>>();
case TypeIndex::Int256: return std::make_unique<NumericNode<Int256>>();
case TypeIndex::Float32: return std::make_unique<NumericNode<Float32>>();
case TypeIndex::Float64: return std::make_unique<NumericNode<Float64>>();
case TypeIndex::String: return std::make_unique<StringNode>();

View File

@ -104,7 +104,7 @@ struct LowerUpperUTF8Impl
/** Converts a single code point starting at `src` to desired case, storing result starting at `dst`.
* `src` and `dst` are incremented by corresponding sequence lengths. */
static void toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst)
static bool toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool partial)
{
if (src[0] <= ascii_upper_bound)
{
@ -136,6 +136,11 @@ struct LowerUpperUTF8Impl
static const Poco::UTF8Encoding utf8;
size_t src_sequence_length = UTF8::seqLength(*src);
/// In case partial buffer was passed (due to SSE optimization)
/// we cannot convert it with current src_end, but we may have more
/// bytes to convert and eventually got correct symbol.
if (partial && src_sequence_length > static_cast<size_t>(src_end-src))
return false;
auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
if (src_code_point)
@ -152,7 +157,7 @@ struct LowerUpperUTF8Impl
{
src += dst_sequence_length;
dst += dst_sequence_length;
return;
return true;
}
}
}
@ -161,6 +166,8 @@ struct LowerUpperUTF8Impl
++dst;
++src;
}
return true;
}
private:
@ -229,16 +236,13 @@ private:
const UInt8 * expected_end = std::min(src + bytes_sse, row_end);
while (src < expected_end)
toCase(src, expected_end, dst);
/// adjust src_end_sse by pushing it forward or backward
const auto diff = src - expected_end;
if (diff != 0)
{
if (src_end_sse + diff < src_end)
src_end_sse += diff;
else
src_end_sse -= bytes_sse - diff;
if (!toCase(src, expected_end, dst, /* partial= */ true))
{
/// Fallback to handling byte by byte.
src_end_sse = src;
break;
}
}
}
}
@ -255,7 +259,7 @@ private:
chassert(row_end >= src);
while (src < row_end)
toCase(src, row_end, dst);
toCase(src, row_end, dst, /* partial= */ false);
++offset_it;
}
}

View File

@ -54,7 +54,7 @@ public:
auto serialization = elem.type->getDefaultSerialization();
serialization->serializeBinaryBulkStatePrefix(settings, state);
serialization->serializeBinaryBulkStatePrefix(*full_column, settings, state);
serialization->serializeBinaryBulkWithMultipleStreams(*full_column,
0 /** offset */, 0 /** limit */,
settings, state);

View File

@ -16,6 +16,8 @@ public:
requires (sizeof(CharT) == 1)
ReadBufferFromMemory(const CharT * buf, size_t size)
: SeekableReadBuffer(const_cast<char *>(reinterpret_cast<const char *>(buf)), size, 0) {}
explicit ReadBufferFromMemory(const std::string_view&& str)
: SeekableReadBuffer(const_cast<char *>(str.data()), str.size(), 0) {}
off_t seek(off_t off, int whence) override;

View File

@ -147,23 +147,32 @@ inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exp
return true;
}
template <typename T>
inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale, bool digits_only = false)
template <typename T, typename ReturnType=void>
inline ReturnType readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale, bool digits_only = false)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
uint32_t digits = precision;
int32_t exponent;
readDigits<true>(buf, x, digits, exponent, digits_only);
auto ok = readDigits<throw_exception>(buf, x, digits, exponent, digits_only);
if (!throw_exception && !ok)
return ReturnType(false);
if (static_cast<int32_t>(digits) + exponent > static_cast<int32_t>(precision - scale))
{
static constexpr const char * pattern =
"Decimal value is too big: {} digits were read: {}e{}."
" Expected to read decimal with scale {} and precision {}";
if constexpr (throw_exception)
{
static constexpr const char * pattern = "Decimal value is too big: {} digits were read: {}e{}."
" Expected to read decimal with scale {} and precision {}";
if constexpr (is_big_int_v<typename T::NativeType>)
throw Exception(fmt::format(pattern, digits, x.value, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if constexpr (is_big_int_v<typename T::NativeType>)
throw Exception(fmt::format(pattern, digits, x.value, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
else
throw Exception(fmt::format(pattern, digits, x, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
else
throw Exception(fmt::format(pattern, digits, x, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
return ReturnType(false);
}
if (static_cast<int32_t>(scale) + exponent < 0)
@ -175,7 +184,7 @@ inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_
/// Too big negative exponent
x.value = 0;
scale = 0;
return;
return ReturnType(true);
}
else
{
@ -184,26 +193,18 @@ inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_
assert(divisor > 0); /// This is for Clang Static Analyzer. It is not smart enough to infer it automatically.
x.value /= divisor;
scale = 0;
return;
return ReturnType(true);
}
}
scale += exponent;
return ReturnType(true);
}
template <typename T>
inline bool tryReadDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale)
{
uint32_t digits = precision;
int32_t exponent;
if (!readDigits<false>(buf, x, digits, exponent, true) ||
static_cast<int32_t>(digits) + exponent > static_cast<int32_t>(precision - scale) ||
static_cast<int32_t>(scale) + exponent < 0)
return false;
scale += exponent;
return true;
return readDecimalText<T, bool>(buf, x, precision, scale, true);
}
template <typename T>

View File

@ -829,7 +829,7 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat
{
for (const auto & [name, type] : properties.columns.getAllPhysical())
{
if (isObject(type))
if (type->hasDynamicSubcolumns())
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Cannot create table with column '{}' which type is '{}' "
@ -1398,7 +1398,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
/// we can safely destroy the object without a call to "shutdown", because there is guarantee
/// that no background threads/similar resources remain after exception from "startup".
if (!res->supportsDynamicSubcolumns() && hasObjectColumns(res->getInMemoryMetadataPtr()->getColumns()))
if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()))
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Cannot create table with column of type Object, "

View File

@ -387,6 +387,9 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
}
else if (isObject(type))
{
if (src.getType() == Field::Types::Object)
return src; /// Already in needed type.
const auto * from_type_tuple = typeid_cast<const DataTypeTuple *>(from_type_hint);
if (src.getType() == Field::Types::Tuple && from_type_tuple && from_type_tuple->haveExplicitNames())
{

View File

@ -232,7 +232,9 @@ Chunk IRowInputFormat::generate()
return {};
}
finalizeObjectColumns(columns);
for (const auto & column : columns)
column->finalize();
Chunk chunk(std::move(columns), num_rows);
return chunk;
}

View File

@ -101,7 +101,9 @@ Chunk ValuesBlockInputFormat::generate()
return {};
}
finalizeObjectColumns(columns);
for (const auto & column : columns)
column->finalize();
size_t rows_in_block = columns[0]->size();
return Chunk{std::move(columns), rows_in_block};
}

View File

@ -17,7 +17,7 @@ void optimizePrimaryKeyCondition(QueryPlan::Node & root)
size_t next_child = 0;
};
std::deque<Frame> stack;
std::vector<Frame> stack;
stack.push_back({.node = &root});
while (!stack.empty())
@ -27,29 +27,29 @@ void optimizePrimaryKeyCondition(QueryPlan::Node & root)
/// Traverse all children first.
if (frame.next_child < frame.node->children.size())
{
stack.push_back({.node = frame.node->children[frame.next_child]});
auto next_frame = Frame{.node = frame.node->children[frame.next_child]};
++frame.next_child;
stack.push_back(next_frame);
continue;
}
auto add_filter = [&](auto & storage)
auto add_read_from_storage_filter = [&](auto & storage)
{
for (auto iter=stack.rbegin() + 1; iter!=stack.rend(); ++iter)
for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter)
{
if (auto * filter_step = typeid_cast<FilterStep *>(iter->node->step.get()))
storage.addFilter(filter_step->getExpression(), filter_step->getFilterColumnName());
else if (typeid_cast<ExpressionStep *>(iter->node->step.get()))
;
continue;
else
break;
}
};
if (auto * read_from_merge_tree = typeid_cast<ReadFromMergeTree *>(frame.node->step.get()))
add_filter(*read_from_merge_tree);
add_read_from_storage_filter(*read_from_merge_tree);
else if (auto * read_from_merge = typeid_cast<ReadFromMerge *>(frame.node->step.get()))
add_filter(*read_from_merge);
add_read_from_storage_filter(*read_from_merge);
stack.pop_back();
}

View File

@ -925,8 +925,15 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
for (const auto & node : added_filter_nodes.nodes)
nodes.nodes.push_back(node);
key_condition.emplace(
std::move(nodes), query_info.syntax_analyzer_result, query_info.prepared_sets, context, primary_key_columns, primary_key.expression);
NameSet array_join_name_set;
if (query_info.syntax_analyzer_result)
array_join_name_set = query_info.syntax_analyzer_result->getArrayJoinSourceNameSet();
key_condition.emplace(std::move(nodes),
context,
primary_key_columns,
primary_key.expression,
array_join_name_set);
}
else
{

View File

@ -11,7 +11,6 @@
#include <Interpreters/getHeaderForProcessingStage.h>
#include <Interpreters/SelectQueryOptions.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/getTableExpressions.h>
#include <QueryPipeline/narrowPipe.h>
#include <QueryPipeline/Pipe.h>
#include <QueryPipeline/RemoteQueryExecutor.h>
@ -25,6 +24,8 @@
#include <Storages/IStorage.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Storages/StorageDictionary.h>
#include <Storages/addColumnsStructureToQueryWithClusterEngine.h>
#include <memory>
@ -56,6 +57,7 @@ StorageHDFSCluster::StorageHDFSCluster(
{
auto columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_);
storage_metadata.setColumns(columns);
add_columns_structure_to_query = true;
}
else
storage_metadata.setColumns(columns_);
@ -92,6 +94,11 @@ Pipe StorageHDFSCluster::read(
const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
auto query_to_send = query_info.original_query->clone();
if (add_columns_structure_to_query)
addColumnsStructureToQueryWithClusterEngine(
query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 3, getName());
for (const auto & replicas : cluster->getShardsAddresses())
{
/// There will be only one replica, because we consider each replica as a shard
@ -110,7 +117,7 @@ Pipe StorageHDFSCluster::read(
/// So, task_identifier is passed as constructor argument. It is more obvious.
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
connection,
queryToString(query_info.original_query),
queryToString(query_to_send),
header,
context,
/*throttler=*/nullptr,

View File

@ -44,6 +44,7 @@ private:
String uri;
String format_name;
String compression_method;
bool add_columns_structure_to_query = false;
};

View File

@ -27,6 +27,7 @@
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Storages/KeyDescription.h>
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
#include <cassert>
#include <stack>
@ -194,289 +195,6 @@ static String firstStringThatIsGreaterThanAllStringsWithPrefix(const String & pr
return res;
}
static void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & out, bool legacy = false)
{
switch (node.type)
{
case (ActionsDAG::ActionType::INPUT):
writeString(node.result_name, out);
break;
case (ActionsDAG::ActionType::COLUMN):
{
/// If it was created from ASTLiteral, then result_name can be an alias.
/// We need to convert value back to string here.
if (const auto * column_const = typeid_cast<const ColumnConst *>(node.column.get()))
writeString(applyVisitor(FieldVisitorToString(), column_const->getField()), out);
/// It may be possible that column is ColumnSet
else
writeString(node.result_name, out);
break;
}
case (ActionsDAG::ActionType::ALIAS):
appendColumnNameWithoutAlias(*node.children.front(), out, legacy);
break;
case (ActionsDAG::ActionType::ARRAY_JOIN):
writeCString("arrayJoin(", out);
appendColumnNameWithoutAlias(*node.children.front(), out, legacy);
writeChar(')', out);
break;
case (ActionsDAG::ActionType::FUNCTION):
{
auto name = node.function_base->getName();
if (legacy && name == "modulo")
writeCString("moduleLegacy", out);
else
writeString(name, out);
writeChar('(', out);
bool first = true;
for (const auto * arg : node.children)
{
if (!first)
writeCString(", ", out);
first = false;
appendColumnNameWithoutAlias(*arg, out, legacy);
}
writeChar(')', out);
}
}
}
static std::string getColumnNameWithoutAlias(const ActionsDAG::Node & node, bool legacy = false)
{
WriteBufferFromOwnString out;
appendColumnNameWithoutAlias(node, out, legacy);
return std::move(out.str());
}
class KeyCondition::Tree
{
public:
explicit Tree(const IAST * ast_) : ast(ast_) { assert(ast); }
explicit Tree(const ActionsDAG::Node * dag_) : dag(dag_) { assert(dag); }
std::string getColumnName() const
{
if (ast)
return ast->getColumnNameWithoutAlias();
else
return getColumnNameWithoutAlias(*dag);
}
std::string getColumnNameLegacy() const
{
if (ast)
{
auto adjusted_ast = ast->clone();
KeyDescription::moduloToModuloLegacyRecursive(adjusted_ast);
return adjusted_ast->getColumnNameWithoutAlias();
}
else
return getColumnNameWithoutAlias(*dag, true);
}
bool isFunction() const
{
if (ast)
return typeid_cast<const ASTFunction *>(ast);
else
return dag->type == ActionsDAG::ActionType::FUNCTION;
}
bool isConstant() const
{
if (ast)
return typeid_cast<const ASTLiteral *>(ast);
else
return dag->column && isColumnConst(*dag->column);
}
ColumnWithTypeAndName getConstant() const
{
if (!isConstant())
throw Exception(ErrorCodes::LOGICAL_ERROR, "KeyCondition::Tree node is not a constant");
ColumnWithTypeAndName res;
if (ast)
{
const auto * literal = assert_cast<const ASTLiteral *>(ast);
res.type = applyVisitor(FieldToDataType(), literal->value);
res.column = res.type->createColumnConst(0, literal->value);
}
else
{
res.type = dag->result_type;
res.column = dag->column;
}
return res;
}
bool tryGetConstant(const Block & block_with_constants, Field & out_value, DataTypePtr & out_type) const
{
if (ast)
{
// Constant expr should use alias names if any
String column_name = ast->getColumnName();
if (const auto * lit = ast->as<ASTLiteral>())
{
/// By default block_with_constants has only one column named "_dummy".
/// If block contains only constants it's may not be preprocessed by
// ExpressionAnalyzer, so try to look up in the default column.
if (!block_with_constants.has(column_name))
column_name = "_dummy";
/// Simple literal
out_value = lit->value;
out_type = block_with_constants.getByName(column_name).type;
/// If constant is not Null, we can assume it's type is not Nullable as well.
if (!out_value.isNull())
out_type = removeNullable(out_type);
return true;
}
else if (block_with_constants.has(column_name) && isColumnConst(*block_with_constants.getByName(column_name).column))
{
/// An expression which is dependent on constants only
const auto & expr_info = block_with_constants.getByName(column_name);
out_value = (*expr_info.column)[0];
out_type = expr_info.type;
if (!out_value.isNull())
out_type = removeNullable(out_type);
return true;
}
}
else
{
if (dag->column && isColumnConst(*dag->column))
{
out_value = (*dag->column)[0];
out_type = dag->result_type;
if (!out_value.isNull())
out_type = removeNullable(out_type);
return true;
}
}
return false;
}
ConstSetPtr tryGetPreparedSet(
const PreparedSetsPtr & sets,
const std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
const DataTypes & data_types) const
{
if (sets && ast)
{
if (ast->as<ASTSubquery>() || ast->as<ASTTableIdentifier>())
return sets->get(PreparedSetKey::forSubquery(*ast));
/// We have `PreparedSetKey::forLiteral` but it is useless here as we don't have enough information
/// about types in left argument of the IN operator. Instead, we manually iterate through all the sets
/// and find the one for the right arg based on the AST structure (getTreeHash), after that we check
/// that the types it was prepared with are compatible with the types of the primary key.
auto types_match = [&indexes_mapping, &data_types](const SetPtr & candidate_set)
{
assert(indexes_mapping.size() == data_types.size());
for (size_t i = 0; i < indexes_mapping.size(); ++i)
{
if (!candidate_set->areTypesEqual(indexes_mapping[i].tuple_index, data_types[i]))
return false;
}
return true;
};
for (const auto & set : sets->getByTreeHash(ast->getTreeHash()))
{
if (types_match(set))
return set;
}
}
else if (dag->column)
{
const IColumn * col = dag->column.get();
if (const auto * col_const = typeid_cast<const ColumnConst *>(col))
col = &col_const->getDataColumn();
if (const auto * col_set = typeid_cast<const ColumnSet *>(col))
{
auto set = col_set->getData();
if (set->isCreated())
return set;
}
}
return nullptr;
}
FunctionTree asFunction() const;
protected:
const IAST * ast = nullptr;
const ActionsDAG::Node * dag = nullptr;
};
class KeyCondition::FunctionTree : public KeyCondition::Tree
{
public:
std::string getFunctionName() const
{
if (ast)
return assert_cast<const ASTFunction *>(ast)->name;
else
return dag->function_base->getName();
}
size_t numArguments() const
{
if (ast)
{
const auto * func = assert_cast<const ASTFunction *>(ast);
return func->arguments ? func->arguments->children.size() : 0;
}
else
return dag->children.size();
}
Tree getArgumentAt(size_t idx) const
{
if (ast)
return Tree(assert_cast<const ASTFunction *>(ast)->arguments->children[idx].get());
else
return Tree(dag->children[idx]);
}
private:
using Tree::Tree;
friend class Tree;
};
KeyCondition::FunctionTree KeyCondition::Tree::asFunction() const
{
if (!isFunction())
throw Exception(ErrorCodes::LOGICAL_ERROR, "KeyCondition::Tree node is not a function");
if (ast)
return KeyCondition::FunctionTree(ast);
else
return KeyCondition::FunctionTree(dag);
}
/// A dictionary containing actions to the corresponding functions to turn them into `RPNElement`
const KeyCondition::AtomMap KeyCondition::atom_map
{
{
@ -972,16 +690,17 @@ static NameSet getAllSubexpressionNames(const ExpressionActions & key_expr)
KeyCondition::KeyCondition(
const ASTPtr & query,
const ASTs & additional_filter_asts,
TreeRewriterResultPtr syntax_analyzer_result,
PreparedSetsPtr prepared_sets_,
Block block_with_constants,
PreparedSetsPtr prepared_sets,
ContextPtr context,
const Names & key_column_names,
const ExpressionActionsPtr & key_expr_,
NameSet array_joined_column_names_,
bool single_point_,
bool strict_)
: key_expr(key_expr_)
, key_subexpr_names(getAllSubexpressionNames(*key_expr))
, prepared_sets(prepared_sets_)
, array_joined_column_names(std::move(array_joined_column_names_))
, single_point(single_point_)
, strict(strict_)
{
@ -992,82 +711,64 @@ KeyCondition::KeyCondition(
key_columns[name] = i;
}
if (!syntax_analyzer_result)
auto filter_node = buildFilterNode(query, additional_filter_asts);
if (!filter_node)
{
rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
return;
}
/** Evaluation of expressions that depend only on constants.
* For the index to be used, if it is written, for example `WHERE Date = toDate(now())`.
/** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)),
* the use of NOT operator in predicate will result in the indexing algorithm leave out some data.
* This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict
* when parsing the AST into internal RPN representation.
* To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's
* are pushed down and applied (when possible) to leaf nodes.
*/
Block block_with_constants = getBlockWithConstants(query, syntax_analyzer_result, context);
auto inverted_filter_node = cloneASTWithInversionPushDown(filter_node);
if (syntax_analyzer_result)
{
for (const auto & [name, _] : syntax_analyzer_result->array_join_result_to_source)
array_joined_columns.insert(name);
}
RPNBuilder<RPNElement> builder(
inverted_filter_node,
std::move(context),
std::move(block_with_constants),
std::move(prepared_sets),
[&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); });
rpn = std::move(builder).extractRPN();
}
const ASTSelectQuery & select = query->as<ASTSelectQuery &>();
ASTs filters;
if (select.where())
filters.push_back(select.where());
if (select.prewhere())
filters.push_back(select.prewhere());
for (const auto & filter_ast : additional_filter_asts)
filters.push_back(filter_ast);
if (!filters.empty())
{
ASTPtr filter_query;
if (filters.size() == 1)
{
filter_query = filters.front();
}
else
{
auto function = std::make_shared<ASTFunction>();
function->name = "and";
function->arguments = std::make_shared<ASTExpressionList>();
function->children.push_back(function->arguments);
function->arguments->children = std::move(filters);
filter_query = function;
}
/** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)),
* the use of NOT operator in predicate will result in the indexing algorithm leave out some data.
* This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict
* when parsing the AST into internal RPN representation.
* To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's
* are pushed down and applied (when possible) to leaf nodes.
*/
auto ast = cloneASTWithInversionPushDown(filter_query);
traverseAST(Tree(ast.get()), context, block_with_constants);
}
else
{
rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
}
KeyCondition::KeyCondition(
const SelectQueryInfo & query_info,
ContextPtr context,
const Names & key_column_names,
const ExpressionActionsPtr & key_expr_,
bool single_point_,
bool strict_)
: KeyCondition(
query_info.query,
query_info.filter_asts,
KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context),
query_info.prepared_sets,
context,
key_column_names,
key_expr_,
query_info.syntax_analyzer_result->getArrayJoinSourceNameSet(),
single_point_,
strict_)
{
}
KeyCondition::KeyCondition(
ActionDAGNodes dag_nodes,
TreeRewriterResultPtr syntax_analyzer_result,
PreparedSetsPtr prepared_sets_,
ContextPtr context,
const Names & key_column_names,
const ExpressionActionsPtr & key_expr_,
NameSet array_joined_column_names_,
bool single_point_,
bool strict_)
: key_expr(key_expr_)
, key_subexpr_names(getAllSubexpressionNames(*key_expr))
, prepared_sets(prepared_sets_)
, array_joined_column_names(std::move(array_joined_column_names_))
, single_point(single_point_)
, strict(strict_)
{
@ -1078,29 +779,23 @@ KeyCondition::KeyCondition(
key_columns[name] = i;
}
if (!syntax_analyzer_result)
if (dag_nodes.nodes.empty())
{
rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
return;
}
for (const auto & [name, _] : syntax_analyzer_result->array_join_result_to_source)
array_joined_columns.insert(name);
auto inverted_dag = cloneASTWithInversionPushDown(std::move(dag_nodes.nodes), context);
assert(inverted_dag->getOutputs().size() == 1);
if (!dag_nodes.nodes.empty())
const auto * inverted_dag_filter_node = inverted_dag->getOutputs()[0];
RPNBuilder<RPNElement> builder(inverted_dag_filter_node, context, [&](const RPNBuilderTreeNode & node, RPNElement & out)
{
auto inverted_dag = cloneASTWithInversionPushDown(std::move(dag_nodes.nodes), context);
return extractAtomFromTree(node, out);
});
// std::cerr << "========== inverted dag: " << inverted_dag->dumpDAG() << std::endl;
Block empty;
for (const auto * node : inverted_dag->getOutputs())
traverseAST(Tree(node), context, empty);
}
else
{
rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
}
rpn = std::move(builder).extractRPN();
}
bool KeyCondition::addCondition(const String & column, const Range & range)
@ -1112,12 +807,12 @@ bool KeyCondition::addCondition(const String & column, const Range & range)
return true;
}
/** Computes value of constant expression and its data type.
* Returns false, if expression isn't constant.
*/
bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type)
{
return Tree(expr.get()).tryGetConstant(block_with_constants, out_value, out_type);
RPNBuilderTreeContext tree_context(nullptr, block_with_constants, nullptr);
RPNBuilderTreeNode node(expr.get(), tree_context);
return node.tryGetConstant(out_value, out_type);
}
@ -1201,39 +896,6 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr &
return {field.columns, field.row_idx, result_idx};
}
void KeyCondition::traverseAST(const Tree & node, ContextPtr context, Block & block_with_constants)
{
RPNElement element;
if (node.isFunction())
{
auto func = node.asFunction();
if (tryParseLogicalOperatorFromAST(func, element))
{
size_t num_args = func.numArguments();
for (size_t i = 0; i < num_args; ++i)
{
traverseAST(func.getArgumentAt(i), context, block_with_constants);
/** The first part of the condition is for the correct support of `and` and `or` functions of arbitrary arity
* - in this case `n - 1` elements are added (where `n` is the number of arguments).
*/
if (i != 0 || element.function == RPNElement::FUNCTION_NOT)
rpn.emplace_back(element);
}
return;
}
}
if (!tryParseAtomFromAST(node, context, block_with_constants, element))
{
element.function = RPNElement::FUNCTION_UNKNOWN;
}
rpn.emplace_back(std::move(element));
}
/** The key functional expression constraint may be inferred from a plain column in the expression.
* For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`,
* it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())`
@ -1355,7 +1017,7 @@ bool KeyCondition::transformConstantWithValidFunctions(
}
bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
const Tree & node,
const RPNBuilderTreeNode & node,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
Field & out_value,
@ -1363,7 +1025,7 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
{
String expr_name = node.getColumnName();
if (array_joined_columns.contains(expr_name))
if (array_joined_column_names.contains(expr_name))
return false;
if (!key_subexpr_names.contains(expr_name))
@ -1390,11 +1052,15 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
/// Looking for possible transformation of `column = constant` into `partition_expr = function(constant)`
bool KeyCondition::canConstantBeWrappedByFunctions(
const Tree & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type)
const RPNBuilderTreeNode & node,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
Field & out_value,
DataTypePtr & out_type)
{
String expr_name = node.getColumnName();
if (array_joined_columns.contains(expr_name))
if (array_joined_column_names.contains(expr_name))
return false;
if (!key_subexpr_names.contains(expr_name))
@ -1408,7 +1074,7 @@ bool KeyCondition::canConstantBeWrappedByFunctions(
/// The case `f(modulo(...))` for totally monotonic `f ` is considered to be rare.
///
/// Note: for negative values, we can filter more partitions then needed.
expr_name = node.getColumnNameLegacy();
expr_name = node.getColumnNameWithModuloLegacy();
if (!key_subexpr_names.contains(expr_name))
return false;
@ -1425,8 +1091,7 @@ bool KeyCondition::canConstantBeWrappedByFunctions(
}
bool KeyCondition::tryPrepareSetIndex(
const FunctionTree & func,
ContextPtr context,
const RPNBuilderFunctionTreeNode & func,
RPNElement & out,
size_t & out_key_column_num)
{
@ -1436,13 +1101,12 @@ bool KeyCondition::tryPrepareSetIndex(
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> indexes_mapping;
DataTypes data_types;
auto get_key_tuple_position_mapping = [&](const Tree & node, size_t tuple_index)
auto get_key_tuple_position_mapping = [&](const RPNBuilderTreeNode & node, size_t tuple_index)
{
MergeTreeSetIndex::KeyTuplePositionMapping index_mapping;
index_mapping.tuple_index = tuple_index;
DataTypePtr data_type;
if (isKeyPossiblyWrappedByMonotonicFunctions(
node, context, index_mapping.key_index, data_type, index_mapping.functions))
if (isKeyPossiblyWrappedByMonotonicFunctions(node, index_mapping.key_index, data_type, index_mapping.functions))
{
indexes_mapping.push_back(index_mapping);
data_types.push_back(data_type);
@ -1456,25 +1120,29 @@ bool KeyCondition::tryPrepareSetIndex(
{
/// Note: in case of ActionsDAG, tuple may be a constant.
/// In this case, there is no keys in tuple. So, we don't have to check it.
auto left_arg_tuple = left_arg.asFunction();
auto left_arg_tuple = left_arg.toFunctionNode();
if (left_arg_tuple.getFunctionName() == "tuple")
{
left_args_count = left_arg_tuple.numArguments();
left_args_count = left_arg_tuple.getArgumentsSize();
for (size_t i = 0; i < left_args_count; ++i)
get_key_tuple_position_mapping(left_arg_tuple.getArgumentAt(i), i);
}
else
{
get_key_tuple_position_mapping(left_arg, 0);
}
}
else
{
get_key_tuple_position_mapping(left_arg, 0);
}
if (indexes_mapping.empty())
return false;
const auto right_arg = func.getArgumentAt(1);
auto prepared_set = right_arg.tryGetPreparedSet(prepared_sets, indexes_mapping, data_types);
auto prepared_set = right_arg.tryGetPreparedSet(indexes_mapping, data_types);
if (!prepared_set)
return false;
@ -1568,13 +1236,12 @@ private:
bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
const Tree & node,
ContextPtr context,
const RPNBuilderTreeNode & node,
size_t & out_key_column_num,
DataTypePtr & out_key_res_column_type,
MonotonicFunctionsChain & out_functions_chain)
{
std::vector<FunctionTree> chain_not_tested_for_monotonicity;
std::vector<RPNBuilderFunctionTreeNode> chain_not_tested_for_monotonicity;
DataTypePtr key_column_type;
if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_key_column_num, key_column_type, chain_not_tested_for_monotonicity))
@ -1583,17 +1250,17 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
for (auto it = chain_not_tested_for_monotonicity.rbegin(); it != chain_not_tested_for_monotonicity.rend(); ++it)
{
auto function = *it;
auto func_builder = FunctionFactory::instance().tryGet(function.getFunctionName(), context);
auto func_builder = FunctionFactory::instance().tryGet(function.getFunctionName(), node.getTreeContext().getQueryContext());
if (!func_builder)
return false;
ColumnsWithTypeAndName arguments;
ColumnWithTypeAndName const_arg;
FunctionWithOptionalConstArg::Kind kind = FunctionWithOptionalConstArg::Kind::NO_CONST;
if (function.numArguments() == 2)
if (function.getArgumentsSize() == 2)
{
if (function.getArgumentAt(0).isConstant())
{
const_arg = function.getArgumentAt(0).getConstant();
const_arg = function.getArgumentAt(0).getConstantColumn();
arguments.push_back(const_arg);
arguments.push_back({ nullptr, key_column_type, "" });
kind = FunctionWithOptionalConstArg::Kind::LEFT_CONST;
@ -1601,7 +1268,7 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
else if (function.getArgumentAt(1).isConstant())
{
arguments.push_back({ nullptr, key_column_type, "" });
const_arg = function.getArgumentAt(1).getConstant();
const_arg = function.getArgumentAt(1).getConstantColumn();
arguments.push_back(const_arg);
kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST;
}
@ -1627,10 +1294,10 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
}
bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
const Tree & node,
const RPNBuilderTreeNode & node,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
std::vector<FunctionTree> & out_functions_chain)
std::vector<RPNBuilderFunctionTreeNode> & out_functions_chain)
{
/** By itself, the key column can be a functional expression. for example, `intHash32(UserID)`.
* Therefore, use the full name of the expression for search.
@ -1640,7 +1307,7 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
// Key columns should use canonical names for index analysis
String name = node.getColumnName();
if (array_joined_columns.contains(name))
if (array_joined_column_names.contains(name))
return false;
auto it = key_columns.find(name);
@ -1653,37 +1320,39 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
if (node.isFunction())
{
auto func = node.asFunction();
auto function_node = node.toFunctionNode();
size_t num_args = func.numArguments();
if (num_args > 2 || num_args == 0)
size_t arguments_size = function_node.getArgumentsSize();
if (arguments_size > 2 || arguments_size == 0)
return false;
out_functions_chain.push_back(func);
bool ret = false;
if (num_args == 2)
out_functions_chain.push_back(function_node);
bool result = false;
if (arguments_size == 2)
{
if (func.getArgumentAt(0).isConstant())
if (function_node.getArgumentAt(0).isConstant())
{
ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(func.getArgumentAt(1), out_key_column_num, out_key_column_type, out_functions_chain);
result = isKeyPossiblyWrappedByMonotonicFunctionsImpl(function_node.getArgumentAt(1), out_key_column_num, out_key_column_type, out_functions_chain);
}
else if (func.getArgumentAt(1).isConstant())
else if (function_node.getArgumentAt(1).isConstant())
{
ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(func.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain);
result = isKeyPossiblyWrappedByMonotonicFunctionsImpl(function_node.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain);
}
}
else
{
ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(func.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain);
result = isKeyPossiblyWrappedByMonotonicFunctionsImpl(function_node.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain);
}
return ret;
return result;
}
return false;
}
static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const KeyCondition::Tree & node)
static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const String & node_column_name)
{
try
{
@ -1693,13 +1362,13 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value,
{
throw Exception("Key expression contains comparison between inconvertible types: " +
desired_type->getName() + " and " + src_type->getName() +
" inside " + node.getColumnName(),
" inside " + node_column_name,
ErrorCodes::BAD_TYPE_OF_FIELD);
}
}
bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Block & block_with_constants, RPNElement & out)
bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out)
{
/** Functions < > = != <= >= in `notIn` isNull isNotNull, where one argument is a constant, and the other is one of columns of key,
* or itself, wrapped in a chain of possibly-monotonic functions,
@ -1709,8 +1378,8 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
DataTypePtr const_type;
if (node.isFunction())
{
auto func = node.asFunction();
size_t num_args = func.numArguments();
auto func = node.toFunctionNode();
size_t num_args = func.getArgumentsSize();
DataTypePtr key_expr_type; /// Type of expression containing key column
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
@ -1722,7 +1391,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
if (num_args == 1)
{
if (!(isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), context, key_column_num, key_expr_type, chain)))
if (!(isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), key_column_num, key_expr_type, chain)))
return false;
if (key_column_num == static_cast<size_t>(-1))
@ -1753,7 +1422,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
if (functionIsInOrGlobalInOperator(func_name))
{
if (tryPrepareSetIndex(func, context, out, key_column_num))
if (tryPrepareSetIndex(func, out, key_column_num))
{
key_arg_pos = 0;
is_set_const = true;
@ -1761,7 +1430,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
else
return false;
}
else if (func.getArgumentAt(1).tryGetConstant(block_with_constants, const_value, const_type))
else if (func.getArgumentAt(1).tryGetConstant(const_value, const_type))
{
/// If the const operand is null, the atom will be always false
if (const_value.isNull())
@ -1770,7 +1439,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
return true;
}
if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), context, key_column_num, key_expr_type, chain))
if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), key_column_num, key_expr_type, chain))
{
key_arg_pos = 0;
}
@ -1791,7 +1460,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
else
return false;
}
else if (func.getArgumentAt(0).tryGetConstant(block_with_constants, const_value, const_type))
else if (func.getArgumentAt(0).tryGetConstant(const_value, const_type))
{
/// If the const operand is null, the atom will be always false
if (const_value.isNull())
@ -1800,7 +1469,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
return true;
}
if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(1), context, key_column_num, key_expr_type, chain))
if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(1), key_column_num, key_expr_type, chain))
{
key_arg_pos = 1;
}
@ -1880,7 +1549,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
if (!const_type->equals(*common_type))
{
castValueToType(common_type, const_value, const_type, node);
castValueToType(common_type, const_value, const_type, node.getColumnName());
// Need to set is_constant_transformed unless we're doing exact conversion
if (!key_expr_type_not_null->equals(*common_type))
@ -1925,7 +1594,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
return atom_it->second(out, const_value);
}
else if (node.tryGetConstant(block_with_constants, const_value, const_type))
else if (node.tryGetConstant(const_value, const_type))
{
/// For cases where it says, for example, `WHERE 0 AND something`
@ -1948,32 +1617,6 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl
return false;
}
bool KeyCondition::tryParseLogicalOperatorFromAST(const FunctionTree & func, RPNElement & out)
{
/// Functions AND, OR, NOT.
/// Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
/// (or, the same thing - calling the function `and` from one argument).
if (func.getFunctionName() == "not")
{
if (func.numArguments() != 1)
return false;
out.function = RPNElement::FUNCTION_NOT;
}
else
{
if (func.getFunctionName() == "and" || func.getFunctionName() == "indexHint")
out.function = RPNElement::FUNCTION_AND;
else if (func.getFunctionName() == "or")
out.function = RPNElement::FUNCTION_OR;
else
return false;
}
return true;
}
String KeyCondition::toString() const
{
String res;

View File

@ -2,11 +2,16 @@
#include <optional>
#include <Interpreters/Set.h>
#include <Core/SortDescription.h>
#include <Parsers/ASTExpressionList.h>
#include <Storages/SelectQueryInfo.h>
#include <Parsers/ASTExpressionList.h>
#include <Interpreters/Set.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/TreeRewriter.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/MergeTree/RPNBuilder.h>
namespace DB
{
@ -205,45 +210,37 @@ public:
class KeyCondition
{
public:
/// Does not take into account the SAMPLE section. all_columns - the set of all columns of the table.
/// Construct key condition from AST SELECT query WHERE, PREWHERE and additional filters
KeyCondition(
const ASTPtr & query,
const ASTs & additional_filter_asts,
TreeRewriterResultPtr syntax_analyzer_result,
Block block_with_constants,
PreparedSetsPtr prepared_sets_,
ContextPtr context,
const Names & key_column_names,
const ExpressionActionsPtr & key_expr,
NameSet array_joined_column_names,
bool single_point_ = false,
bool strict_ = false);
/** Construct key condition from AST SELECT query WHERE, PREWHERE and additional filters.
* Select query, additional filters, prepared sets are initialized using query info.
*/
KeyCondition(
const SelectQueryInfo & query_info,
ContextPtr context,
const Names & key_column_names,
const ExpressionActionsPtr & key_expr_,
bool single_point_ = false,
bool strict_ = false)
: KeyCondition(
query_info.query,
query_info.filter_asts,
query_info.syntax_analyzer_result,
query_info.prepared_sets,
context,
key_column_names,
key_expr_,
single_point_,
strict_)
{
}
bool strict_ = false);
/// Construct key condition from ActionsDAG nodes
KeyCondition(
ActionDAGNodes dag_nodes,
TreeRewriterResultPtr syntax_analyzer_result,
PreparedSetsPtr prepared_sets_,
ContextPtr context,
const Names & key_column_names,
const ExpressionActionsPtr & key_expr,
NameSet array_joined_column_names,
bool single_point_ = false,
bool strict_ = false);
@ -275,6 +272,7 @@ public:
/// Checks that the index can not be used
/// FUNCTION_UNKNOWN will be AND'ed (if any).
bool alwaysUnknownOrTrue() const;
/// Checks that the index can not be used
/// Does not allow any FUNCTION_UNKNOWN (will instantly return true).
bool anyUnknownOrAlwaysTrue() const;
@ -313,10 +311,18 @@ public:
* Returns false, if expression isn't constant.
*/
static bool getConstant(
const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type);
const ASTPtr & expr,
Block & block_with_constants,
Field & out_value,
DataTypePtr & out_type);
/** Calculate expressions, that depend only on constants.
* For index to work when something like "WHERE Date = toDate(now())" is written.
*/
static Block getBlockWithConstants(
const ASTPtr & query, const TreeRewriterResultPtr & syntax_analyzer_result, ContextPtr context);
const ASTPtr & query,
const TreeRewriterResultPtr & syntax_analyzer_result,
ContextPtr context);
static std::optional<Range> applyMonotonicFunctionsChainToRange(
Range key_range,
@ -373,14 +379,11 @@ private:
using RPN = std::vector<RPNElement>;
using ColumnIndices = std::map<String, size_t>;
using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value)>;
public:
using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value)>;
static const AtomMap atom_map;
class Tree;
class FunctionTree;
private:
BoolMask checkInRange(
size_t used_key_size,
@ -390,9 +393,7 @@ private:
bool right_bounded,
BoolMask initial_mask) const;
void traverseAST(const Tree & node, ContextPtr context, Block & block_with_constants);
bool tryParseAtomFromAST(const Tree & node, ContextPtr context, Block & block_with_constants, RPNElement & out);
static bool tryParseLogicalOperatorFromAST(const FunctionTree & func, RPNElement & out);
bool extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out);
/** Is node the key column
* or expression in which column of key is wrapped by chain of functions,
@ -401,17 +402,16 @@ private:
* and fills chain of possibly-monotonic functions.
*/
bool isKeyPossiblyWrappedByMonotonicFunctions(
const Tree & node,
ContextPtr context,
const RPNBuilderTreeNode & node,
size_t & out_key_column_num,
DataTypePtr & out_key_res_column_type,
MonotonicFunctionsChain & out_functions_chain);
bool isKeyPossiblyWrappedByMonotonicFunctionsImpl(
const Tree & node,
const RPNBuilderTreeNode & node,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
std::vector<FunctionTree> & out_functions_chain);
std::vector<RPNBuilderFunctionTreeNode> & out_functions_chain);
bool transformConstantWithValidFunctions(
const String & expr_name,
@ -422,21 +422,24 @@ private:
std::function<bool(IFunctionBase &, const IDataType &)> always_monotonic) const;
bool canConstantBeWrappedByMonotonicFunctions(
const Tree & node,
const RPNBuilderTreeNode & node,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
Field & out_value,
DataTypePtr & out_type);
bool canConstantBeWrappedByFunctions(
const Tree & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type);
const RPNBuilderTreeNode & node,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
Field & out_value,
DataTypePtr & out_type);
/// If it's possible to make an RPNElement
/// that will filter values (possibly tuples) by the content of 'prepared_set',
/// do it and return true.
bool tryPrepareSetIndex(
const FunctionTree & func,
ContextPtr context,
const RPNBuilderFunctionTreeNode & func,
RPNElement & out,
size_t & out_key_column_num);
@ -472,11 +475,12 @@ private:
/// All intermediate columns are used to calculate key_expr.
const NameSet key_subexpr_names;
NameSet array_joined_columns;
PreparedSetsPtr prepared_sets;
/// Array joined column names
NameSet array_joined_column_names;
// If true, always allow key_expr to be wrapped by function
bool single_point;
// If true, do not use always_monotonic information to transform constants
bool strict;
};

View File

@ -153,7 +153,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical();
global_ctx->storage_columns = global_ctx->metadata_snapshot->getColumns().getAllPhysical();
auto object_columns = MergeTreeData::getObjectColumns(global_ctx->future_part->parts, global_ctx->metadata_snapshot->getColumns());
auto object_columns = MergeTreeData::getConcreteObjectColumns(global_ctx->future_part->parts, global_ctx->metadata_snapshot->getColumns());
global_ctx->storage_snapshot = std::make_shared<StorageSnapshot>(*global_ctx->data, global_ctx->metadata_snapshot, object_columns);
extendObjectColumns(global_ctx->storage_columns, object_columns, false);

View File

@ -7124,18 +7124,18 @@ ReservationPtr MergeTreeData::balancedReservation(
return reserved_space;
}
ColumnsDescription MergeTreeData::getObjectColumns(
ColumnsDescription MergeTreeData::getConcreteObjectColumns(
const DataPartsVector & parts, const ColumnsDescription & storage_columns)
{
return DB::getObjectColumns(
return DB::getConcreteObjectColumns(
parts.begin(), parts.end(),
storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); });
}
ColumnsDescription MergeTreeData::getObjectColumns(
ColumnsDescription MergeTreeData::getConcreteObjectColumns(
boost::iterator_range<DataPartIteratorByStateAndInfo> range, const ColumnsDescription & storage_columns)
{
return DB::getObjectColumns(
return DB::getConcreteObjectColumns(
range.begin(), range.end(),
storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); });
}
@ -7144,21 +7144,21 @@ void MergeTreeData::resetObjectColumnsFromActiveParts(const DataPartsLock & /*lo
{
auto metadata_snapshot = getInMemoryMetadataPtr();
const auto & columns = metadata_snapshot->getColumns();
if (!hasObjectColumns(columns))
if (!hasDynamicSubcolumns(columns))
return;
auto range = getDataPartsStateRange(DataPartState::Active);
object_columns = getObjectColumns(range, columns);
object_columns = getConcreteObjectColumns(range, columns);
}
void MergeTreeData::updateObjectColumns(const DataPartPtr & part, const DataPartsLock & /*lock*/)
{
auto metadata_snapshot = getInMemoryMetadataPtr();
const auto & columns = metadata_snapshot->getColumns();
if (!hasObjectColumns(columns))
if (!hasDynamicSubcolumns(columns))
return;
DB::updateObjectColumns(object_columns, part->getColumns());
DB::updateObjectColumns(object_columns, columns, part->getColumns());
}
StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const

View File

@ -779,10 +779,10 @@ public:
return column_sizes;
}
const ColumnsDescription & getObjectColumns() const { return object_columns; }
const ColumnsDescription & getConcreteObjectColumns() const { return object_columns; }
/// Creates description of columns of data type Object from the range of data parts.
static ColumnsDescription getObjectColumns(
static ColumnsDescription getConcreteObjectColumns(
const DataPartsVector & parts, const ColumnsDescription & storage_columns);
IndexSizeByName getSecondaryIndexSizes() const override
@ -1151,7 +1151,7 @@ protected:
}
/// Creates description of columns of data type Object from the range of data parts.
static ColumnsDescription getObjectColumns(
static ColumnsDescription getConcreteObjectColumns(
boost::iterator_range<DataPartIteratorByStateAndInfo> range, const ColumnsDescription & storage_columns);
std::optional<UInt64> totalRowsByPartitionPredicateImpl(

View File

@ -131,7 +131,7 @@ void writeColumnSingleGranule(
serialize_settings.position_independent_encoding = true; //-V1048
serialize_settings.low_cardinality_max_dictionary_size = 0; //-V1048
serialization->serializeBinaryBulkStatePrefix(serialize_settings, state);
serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state);
serialization->serializeBinaryBulkWithMultipleStreams(*column.column, from_row, number_of_rows, serialize_settings, state);
serialization->serializeBinaryBulkStateSuffix(serialize_settings, state);
}

View File

@ -355,7 +355,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
{
ISerialization::SerializeBinaryBulkSettings serialize_settings;
serialize_settings.getter = createStreamGetter(name_and_type, offset_columns);
serialization->serializeBinaryBulkStatePrefix(serialize_settings, it->second);
serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second);
}
const auto & global_settings = storage.getContext()->getSettingsRef();

View File

@ -288,7 +288,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart(
auto columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames());
for (auto & column : columns)
if (isObject(column.type))
if (column.type->hasDynamicSubcolumns())
column.type = block.getByName(column.name).type;
static const String TMP_PREFIX = "tmp_insert_";

View File

@ -6,11 +6,13 @@
#include <Columns/ColumnConst.h>
#include <Columns/ColumnTuple.h>
#include <Storages/MergeTree/RPNBuilder.h>
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
#include <Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h>
#include <Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Interpreters/misc.h>
#include <Interpreters/BloomFilterHash.h>
#include <Interpreters/castColumn.h>
@ -28,19 +30,7 @@ namespace ErrorCodes
namespace
{
PreparedSetKey getPreparedSetKey(const ASTPtr & node, const DataTypePtr & data_type)
{
/// If the data type is tuple, let's try unbox once
if (node->as<ASTSubquery>() || node->as<ASTIdentifier>())
return PreparedSetKey::forSubquery(*node);
if (const auto * date_type_tuple = typeid_cast<const DataTypeTuple *>(&*data_type))
return PreparedSetKey::forLiteral(*node, date_type_tuple->getElements());
return PreparedSetKey::forLiteral(*node, DataTypes(1, data_type));
}
ColumnWithTypeAndName getPreparedSetInfo(const SetPtr & prepared_set)
ColumnWithTypeAndName getPreparedSetInfo(const ConstSetPtr & prepared_set)
{
if (prepared_set->getDataTypes().size() == 1)
return {prepared_set->getSetElements()[0], prepared_set->getElementsTypes()[0], "dummy"};
@ -110,8 +100,22 @@ MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter(
const SelectQueryInfo & info_, ContextPtr context_, const Block & header_, size_t hash_functions_)
: WithContext(context_), header(header_), query_info(info_), hash_functions(hash_functions_)
{
auto atom_from_ast = [this](auto & node, auto, auto & constants, auto & out) { return traverseAtomAST(node, constants, out); };
rpn = std::move(RPNBuilder<RPNElement>(info_, getContext(), atom_from_ast).extractRPN());
ASTPtr filter_node = buildFilterNode(query_info.query);
if (!filter_node)
{
rpn.push_back(RPNElement::FUNCTION_UNKNOWN);
return;
}
auto block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context_);
RPNBuilder<RPNElement> builder(
filter_node,
context_,
std::move(block_with_constants),
query_info.prepared_sets,
[&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); });
rpn = std::move(builder).extractRPN();
}
bool MergeTreeIndexConditionBloomFilter::alwaysUnknownOrTrue() const
@ -235,12 +239,13 @@ bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndex
return rpn_stack[0].can_be_true;
}
bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out)
bool MergeTreeIndexConditionBloomFilter::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out)
{
{
Field const_value;
DataTypePtr const_type;
if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type))
if (node.tryGetConstant(const_value, const_type))
{
if (const_value.getType() == Field::Types::UInt64)
{
@ -262,56 +267,62 @@ bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Bl
}
}
return traverseFunction(node, block_with_constants, out, nullptr);
return traverseFunction(node, out, nullptr /*parent*/);
}
bool MergeTreeIndexConditionBloomFilter::traverseFunction(const ASTPtr & node, Block & block_with_constants, RPNElement & out, const ASTPtr & parent)
bool MergeTreeIndexConditionBloomFilter::traverseFunction(const RPNBuilderTreeNode & node, RPNElement & out, const RPNBuilderTreeNode * parent)
{
bool maybe_useful = false;
if (const auto * function = node->as<ASTFunction>())
if (node.isFunction())
{
if (!function->arguments)
return false;
const auto function = node.toFunctionNode();
auto arguments_size = function.getArgumentsSize();
auto function_name = function.getFunctionName();
const ASTs & arguments = function->arguments->children;
for (const auto & arg : arguments)
for (size_t i = 0; i < arguments_size; ++i)
{
if (traverseFunction(arg, block_with_constants, out, node))
auto argument = function.getArgumentAt(i);
if (traverseFunction(argument, out, &node))
maybe_useful = true;
}
if (arguments.size() != 2)
if (arguments_size != 2)
return false;
if (functionIsInOrGlobalInOperator(function->name))
{
auto prepared_set = getPreparedSet(arguments[1]);
auto lhs_argument = function.getArgumentAt(0);
auto rhs_argument = function.getArgumentAt(1);
if (prepared_set)
if (functionIsInOrGlobalInOperator(function_name))
{
ConstSetPtr prepared_set = rhs_argument.tryGetPreparedSet();
if (prepared_set && prepared_set->hasExplicitSetElements())
{
if (traverseASTIn(function->name, arguments[0], prepared_set, out))
const auto prepared_info = getPreparedSetInfo(prepared_set);
if (traverseTreeIn(function_name, lhs_argument, prepared_set, prepared_info.type, prepared_info.column, out))
maybe_useful = true;
}
}
else if (function->name == "equals" ||
function->name == "notEquals" ||
function->name == "has" ||
function->name == "mapContains" ||
function->name == "indexOf" ||
function->name == "hasAny" ||
function->name == "hasAll")
else if (function_name == "equals" ||
function_name == "notEquals" ||
function_name == "has" ||
function_name == "mapContains" ||
function_name == "indexOf" ||
function_name == "hasAny" ||
function_name == "hasAll")
{
Field const_value;
DataTypePtr const_type;
if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type))
if (rhs_argument.tryGetConstant(const_value, const_type))
{
if (traverseASTEquals(function->name, arguments[0], const_type, const_value, out, parent))
if (traverseTreeEquals(function_name, lhs_argument, const_type, const_value, out, parent))
maybe_useful = true;
}
else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type))
else if (lhs_argument.tryGetConstant(const_value, const_type))
{
if (traverseASTEquals(function->name, arguments[1], const_type, const_value, out, parent))
if (traverseTreeEquals(function_name, rhs_argument, const_type, const_value, out, parent))
maybe_useful = true;
}
}
@ -320,28 +331,20 @@ bool MergeTreeIndexConditionBloomFilter::traverseFunction(const ASTPtr & node, B
return maybe_useful;
}
bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
bool MergeTreeIndexConditionBloomFilter::traverseTreeIn(
const String & function_name,
const ASTPtr & key_ast,
const SetPtr & prepared_set,
RPNElement & out)
{
const auto prepared_info = getPreparedSetInfo(prepared_set);
return traverseASTIn(function_name, key_ast, prepared_set, prepared_info.type, prepared_info.column, out);
}
bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
const String & function_name,
const ASTPtr & key_ast,
const SetPtr & prepared_set,
const RPNBuilderTreeNode & key_node,
const ConstSetPtr & prepared_set,
const DataTypePtr & type,
const ColumnPtr & column,
RPNElement & out)
{
if (header.has(key_ast->getColumnName()))
auto key_node_column_name = key_node.getColumnName();
if (header.has(key_node_column_name))
{
size_t row_size = column->size();
size_t position = header.getPositionByName(key_ast->getColumnName());
size_t position = header.getPositionByName(key_node_column_name);
const DataTypePtr & index_type = header.getByPosition(position).type;
const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type);
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size)));
@ -355,30 +358,33 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
return true;
}
if (const auto * function = key_ast->as<ASTFunction>())
if (key_node.isFunction())
{
auto key_node_function = key_node.toFunctionNode();
auto key_node_function_name = key_node_function.getFunctionName();
size_t key_node_function_arguments_size = key_node_function.getArgumentsSize();
WhichDataType which(type);
if (which.isTuple() && function->name == "tuple")
if (which.isTuple() && key_node_function_name == "tuple")
{
const auto & tuple_column = typeid_cast<const ColumnTuple *>(column.get());
const auto & tuple_data_type = typeid_cast<const DataTypeTuple *>(type.get());
const ASTs & arguments = typeid_cast<const ASTExpressionList &>(*function->arguments).children;
if (tuple_data_type->getElements().size() != arguments.size() || tuple_column->getColumns().size() != arguments.size())
if (tuple_data_type->getElements().size() != key_node_function_arguments_size || tuple_column->getColumns().size() != key_node_function_arguments_size)
throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
bool match_with_subtype = false;
const auto & sub_columns = tuple_column->getColumns();
const auto & sub_data_types = tuple_data_type->getElements();
for (size_t index = 0; index < arguments.size(); ++index)
match_with_subtype |= traverseASTIn(function_name, arguments[index], nullptr, sub_data_types[index], sub_columns[index], out);
for (size_t index = 0; index < key_node_function_arguments_size; ++index)
match_with_subtype |= traverseTreeIn(function_name, key_node_function.getArgumentAt(index), nullptr, sub_data_types[index], sub_columns[index], out);
return match_with_subtype;
}
if (function->name == "arrayElement")
if (key_node_function_name == "arrayElement")
{
/** Try to parse arrayElement for mapKeys index.
* It is important to ignore keys like column_map['Key'] IN ('') because if key does not exists in map
@ -387,7 +393,6 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
* We cannot skip keys that does not exist in map if comparison is with default type value because
* that way we skip necessary granules where map key does not exists.
*/
if (!prepared_set)
return false;
@ -400,28 +405,26 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
if (set_contain_default_value)
return false;
const auto * column_ast_identifier = function->arguments.get()->children[0].get()->as<ASTIdentifier>();
if (!column_ast_identifier)
return false;
const auto & col_name = column_ast_identifier->name();
auto map_keys_index_column_name = fmt::format("mapKeys({})", col_name);
auto map_values_index_column_name = fmt::format("mapValues({})", col_name);
auto first_argument = key_node_function.getArgumentAt(0);
const auto column_name = first_argument.getColumnName();
auto map_keys_index_column_name = fmt::format("mapKeys({})", column_name);
auto map_values_index_column_name = fmt::format("mapValues({})", column_name);
if (header.has(map_keys_index_column_name))
{
/// For mapKeys we serialize key argument with bloom filter
auto & argument = function->arguments.get()->children[1];
auto second_argument = key_node_function.getArgumentAt(1);
if (const auto * literal = argument->as<ASTLiteral>())
Field constant_value;
DataTypePtr constant_type;
if (second_argument.tryGetConstant(constant_value, constant_type))
{
size_t position = header.getPositionByName(map_keys_index_column_name);
const DataTypePtr & index_type = header.getByPosition(position).type;
auto element_key = literal->value;
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type);
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), element_key)));
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), constant_value)));
}
else
{
@ -459,74 +462,97 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
}
static bool indexOfCanUseBloomFilter(const ASTPtr & parent)
static bool indexOfCanUseBloomFilter(const RPNBuilderTreeNode * parent)
{
if (!parent)
return true;
if (!parent->isFunction())
return false;
auto function = parent->toFunctionNode();
auto function_name = function.getFunctionName();
/// `parent` is a function where `indexOf` is located.
/// Example: `indexOf(arr, x) = 1`, parent is a function named `equals`.
if (const auto * function = parent->as<ASTFunction>())
if (function_name == "and")
{
if (function->name == "and")
return true;
}
else if (function_name == "equals" /// notEquals is not applicable
|| function_name == "greater" || function_name == "greaterOrEquals"
|| function_name == "less" || function_name == "lessOrEquals")
{
size_t function_arguments_size = function.getArgumentsSize();
if (function_arguments_size != 2)
return false;
/// We don't allow constant expressions like `indexOf(arr, x) = 1 + 0` but it's negligible.
/// We should return true when the corresponding expression implies that the array contains the element.
/// Example: when `indexOf(arr, x)` > 10 is written, it means that arr definitely should contain the element
/// (at least at 11th position but it does not matter).
bool reversed = false;
Field constant_value;
DataTypePtr constant_type;
if (function.getArgumentAt(0).tryGetConstant(constant_value, constant_type))
{
reversed = true;
}
else if (function.getArgumentAt(1).tryGetConstant(constant_value, constant_type))
{
}
else
{
return false;
}
Field zero(0);
bool constant_equal_zero = applyVisitor(FieldVisitorAccurateEquals(), constant_value, zero);
if (function_name == "equals" && !constant_equal_zero)
{
/// indexOf(...) = c, c != 0
return true;
}
else if (function->name == "equals" /// notEquals is not applicable
|| function->name == "greater" || function->name == "greaterOrEquals"
|| function->name == "less" || function->name == "lessOrEquals")
else if (function_name == "notEquals" && constant_equal_zero)
{
if (function->arguments->children.size() != 2)
return false;
/// We don't allow constant expressions like `indexOf(arr, x) = 1 + 0` but it's negligible.
/// We should return true when the corresponding expression implies that the array contains the element.
/// Example: when `indexOf(arr, x)` > 10 is written, it means that arr definitely should contain the element
/// (at least at 11th position but it does not matter).
bool reversed = false;
const ASTLiteral * constant = nullptr;
if (const ASTLiteral * left = function->arguments->children[0]->as<ASTLiteral>())
{
constant = left;
reversed = true;
}
else if (const ASTLiteral * right = function->arguments->children[1]->as<ASTLiteral>())
{
constant = right;
}
else
return false;
Field zero(0);
return (function->name == "equals" /// indexOf(...) = c, c != 0
&& !applyVisitor(FieldVisitorAccurateEquals(), constant->value, zero))
|| (function->name == "notEquals" /// indexOf(...) != c, c = 0
&& applyVisitor(FieldVisitorAccurateEquals(), constant->value, zero))
|| (function->name == (reversed ? "less" : "greater") /// indexOf(...) > c, c >= 0
&& !applyVisitor(FieldVisitorAccurateLess(), constant->value, zero))
|| (function->name == (reversed ? "lessOrEquals" : "greaterOrEquals") /// indexOf(...) >= c, c > 0
&& applyVisitor(FieldVisitorAccurateLess(), zero, constant->value));
/// indexOf(...) != c, c = 0
return true;
}
else if (function_name == (reversed ? "less" : "greater") && !applyVisitor(FieldVisitorAccurateLess(), constant_value, zero))
{
/// indexOf(...) > c, c >= 0
return true;
}
else if (function_name == (reversed ? "lessOrEquals" : "greaterOrEquals") && applyVisitor(FieldVisitorAccurateLess(), zero, constant_value))
{
/// indexOf(...) >= c, c > 0
return true;
}
return false;
}
return false;
}
bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals(
const String & function_name,
const ASTPtr & key_ast,
const RPNBuilderTreeNode & key_node,
const DataTypePtr & value_type,
const Field & value_field,
RPNElement & out,
const ASTPtr & parent)
const RPNBuilderTreeNode * parent)
{
if (header.has(key_ast->getColumnName()))
auto key_column_name = key_node.getColumnName();
if (header.has(key_column_name))
{
size_t position = header.getPositionByName(key_ast->getColumnName());
size_t position = header.getPositionByName(key_column_name);
const DataTypePtr & index_type = header.getByPosition(position).type;
const auto * array_type = typeid_cast<const DataTypeArray *>(index_type.get());
@ -602,13 +628,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
if (function_name == "mapContains" || function_name == "has")
{
const auto * key_ast_identifier = key_ast.get()->as<const ASTIdentifier>();
if (!key_ast_identifier)
return false;
const auto & col_name = key_ast_identifier->name();
auto map_keys_index_column_name = fmt::format("mapKeys({})", col_name);
auto map_keys_index_column_name = fmt::format("mapKeys({})", key_column_name);
if (!header.has(map_keys_index_column_name))
return false;
@ -629,29 +649,32 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
return true;
}
if (const auto * function = key_ast->as<ASTFunction>())
if (key_node.isFunction())
{
WhichDataType which(value_type);
if (which.isTuple() && function->name == "tuple")
auto key_node_function = key_node.toFunctionNode();
auto key_node_function_name = key_node_function.getFunctionName();
size_t key_node_function_arguments_size = key_node_function.getArgumentsSize();
if (which.isTuple() && key_node_function_name == "tuple")
{
const Tuple & tuple = value_field.get<const Tuple &>();
const auto * value_tuple_data_type = typeid_cast<const DataTypeTuple *>(value_type.get());
const ASTs & arguments = typeid_cast<const ASTExpressionList &>(*function->arguments).children;
if (tuple.size() != arguments.size())
if (tuple.size() != key_node_function_arguments_size)
throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
bool match_with_subtype = false;
const DataTypes & subtypes = value_tuple_data_type->getElements();
for (size_t index = 0; index < tuple.size(); ++index)
match_with_subtype |= traverseASTEquals(function_name, arguments[index], subtypes[index], tuple[index], out, key_ast);
match_with_subtype |= traverseTreeEquals(function_name, key_node_function.getArgumentAt(index), subtypes[index], tuple[index], out, &key_node);
return match_with_subtype;
}
if (function->name == "arrayElement" && (function_name == "equals" || function_name == "notEquals"))
if (key_node_function_name == "arrayElement" && (function_name == "equals" || function_name == "notEquals"))
{
/** Try to parse arrayElement for mapKeys index.
* It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map
@ -663,27 +686,22 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
if (value_field == value_type->getDefault())
return false;
const auto * column_ast_identifier = function->arguments.get()->children[0].get()->as<ASTIdentifier>();
if (!column_ast_identifier)
return false;
auto first_argument = key_node_function.getArgumentAt(0);
const auto column_name = first_argument.getColumnName();
const auto & col_name = column_ast_identifier->name();
auto map_keys_index_column_name = fmt::format("mapKeys({})", col_name);
auto map_values_index_column_name = fmt::format("mapValues({})", col_name);
auto map_keys_index_column_name = fmt::format("mapKeys({})", column_name);
auto map_values_index_column_name = fmt::format("mapValues({})", column_name);
size_t position = 0;
Field const_value = value_field;
DataTypePtr const_type;
if (header.has(map_keys_index_column_name))
{
position = header.getPositionByName(map_keys_index_column_name);
auto second_argument = key_node_function.getArgumentAt(1);
auto & argument = function->arguments.get()->children[1];
if (const auto * literal = argument->as<ASTLiteral>())
const_value = literal->value;
else
if (!second_argument.tryGetConstant(const_value, const_type))
return false;
}
else if (header.has(map_values_index_column_name))
@ -708,23 +726,4 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
return false;
}
SetPtr MergeTreeIndexConditionBloomFilter::getPreparedSet(const ASTPtr & node)
{
if (header.has(node->getColumnName()))
{
const auto & column_and_type = header.getByName(node->getColumnName());
auto set_key = getPreparedSetKey(node, column_and_type.type);
if (auto prepared_set = query_info.prepared_sets->get(set_key))
return prepared_set;
}
else
{
for (const auto & set : query_info.prepared_sets->getByTreeHash(node->getTreeHash()))
if (set->hasExplicitSetElements())
return set;
}
return DB::SetPtr();
}
}

View File

@ -62,35 +62,27 @@ private:
const size_t hash_functions;
std::vector<RPNElement> rpn;
SetPtr getPreparedSet(const ASTPtr & node);
bool mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const;
bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
bool extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out);
bool traverseFunction(const ASTPtr & node, Block & block_with_constants, RPNElement & out, const ASTPtr & parent);
bool traverseFunction(const RPNBuilderTreeNode & node, RPNElement & out, const RPNBuilderTreeNode * parent);
bool traverseASTIn(
bool traverseTreeIn(
const String & function_name,
const ASTPtr & key_ast,
const SetPtr & prepared_set,
RPNElement & out);
bool traverseASTIn(
const String & function_name,
const ASTPtr & key_ast,
const SetPtr & prepared_set,
const RPNBuilderTreeNode & key_node,
const ConstSetPtr & prepared_set,
const DataTypePtr & type,
const ColumnPtr & column,
RPNElement & out);
bool traverseASTEquals(
bool traverseTreeEquals(
const String & function_name,
const ASTPtr & key_ast,
const RPNBuilderTreeNode & key_node,
const DataTypePtr & value_type,
const Field & value_field,
RPNElement & out,
const ASTPtr & parent);
const RPNBuilderTreeNode * parent);
};
}

View File

@ -11,9 +11,11 @@
#include <Interpreters/misc.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/RPNBuilder.h>
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Core/Defines.h>
#include <Poco/Logger.h>
@ -148,13 +150,22 @@ MergeTreeConditionFullText::MergeTreeConditionFullText(
, token_extractor(token_extactor_)
, prepared_sets(query_info.prepared_sets)
{
rpn = std::move(
RPNBuilder<RPNElement>(
query_info, context,
[this] (const ASTPtr & node, ContextPtr /* context */, Block & block_with_constants, RPNElement & out) -> bool
{
return this->traverseAtomAST(node, block_with_constants, out);
}).extractRPN());
ASTPtr filter_node = buildFilterNode(query_info.query);
if (!filter_node)
{
rpn.push_back(RPNElement::FUNCTION_UNKNOWN);
return;
}
auto block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context);
RPNBuilder<RPNElement> builder(
filter_node,
context,
std::move(block_with_constants),
query_info.prepared_sets,
[&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); });
rpn = std::move(builder).extractRPN();
}
bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
@ -306,13 +317,13 @@ bool MergeTreeConditionFullText::getKey(const std::string & key_column_name, siz
return true;
}
bool MergeTreeConditionFullText::traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out)
bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out)
{
{
Field const_value;
DataTypePtr const_type;
if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type))
if (node.tryGetConstant(const_value, const_type))
{
/// Check constant like in KeyCondition
if (const_value.getType() == Field::Types::UInt64
@ -329,53 +340,56 @@ bool MergeTreeConditionFullText::traverseAtomAST(const ASTPtr & node, Block & bl
}
}
if (const auto * function = node->as<ASTFunction>())
if (node.isFunction())
{
if (!function->arguments)
auto function_node = node.toFunctionNode();
auto function_name = function_node.getFunctionName();
size_t arguments_size = function_node.getArgumentsSize();
if (arguments_size != 2)
return false;
const ASTs & arguments = function->arguments->children;
auto left_argument = function_node.getArgumentAt(0);
auto right_argument = function_node.getArgumentAt(1);
if (arguments.size() != 2)
return false;
if (functionIsInOrGlobalInOperator(function->name))
if (functionIsInOrGlobalInOperator(function_name))
{
if (tryPrepareSetBloomFilter(arguments, out))
if (tryPrepareSetBloomFilter(left_argument, right_argument, out))
{
if (function->name == "notIn")
if (function_name == "notIn")
{
out.function = RPNElement::FUNCTION_NOT_IN;
return true;
}
else if (function->name == "in")
else if (function_name == "in")
{
out.function = RPNElement::FUNCTION_IN;
return true;
}
}
}
else if (function->name == "equals" ||
function->name == "notEquals" ||
function->name == "has" ||
function->name == "mapContains" ||
function->name == "like" ||
function->name == "notLike" ||
function->name == "hasToken" ||
function->name == "startsWith" ||
function->name == "endsWith" ||
function->name == "multiSearchAny")
else if (function_name == "equals" ||
function_name == "notEquals" ||
function_name == "has" ||
function_name == "mapContains" ||
function_name == "like" ||
function_name == "notLike" ||
function_name == "hasToken" ||
function_name == "startsWith" ||
function_name == "endsWith" ||
function_name == "multiSearchAny")
{
Field const_value;
DataTypePtr const_type;
if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type))
if (right_argument.tryGetConstant(const_value, const_type))
{
if (traverseASTEquals(function->name, arguments[0], const_type, const_value, out))
if (traverseTreeEquals(function_name, left_argument, const_type, const_value, out))
return true;
}
else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type) && (function->name == "equals" || function->name == "notEquals"))
else if (left_argument.tryGetConstant(const_value, const_type) && (function_name == "equals" || function_name == "notEquals"))
{
if (traverseASTEquals(function->name, arguments[1], const_type, const_value, out))
if (traverseTreeEquals(function_name, right_argument, const_type, const_value, out))
return true;
}
}
@ -384,9 +398,9 @@ bool MergeTreeConditionFullText::traverseAtomAST(const ASTPtr & node, Block & bl
return false;
}
bool MergeTreeConditionFullText::traverseASTEquals(
bool MergeTreeConditionFullText::traverseTreeEquals(
const String & function_name,
const ASTPtr & key_ast,
const RPNBuilderTreeNode & key_node,
const DataTypePtr & value_type,
const Field & value_field,
RPNElement & out)
@ -397,13 +411,17 @@ bool MergeTreeConditionFullText::traverseASTEquals(
Field const_value = value_field;
auto column_name = key_node.getColumnName();
size_t key_column_num = 0;
bool key_exists = getKey(key_ast->getColumnName(), key_column_num);
bool map_key_exists = getKey(fmt::format("mapKeys({})", key_ast->getColumnName()), key_column_num);
bool key_exists = getKey(column_name, key_column_num);
bool map_key_exists = getKey(fmt::format("mapKeys({})", column_name), key_column_num);
if (const auto * function = key_ast->as<ASTFunction>())
if (key_node.isFunction())
{
if (function->name == "arrayElement")
auto key_function_node = key_node.toFunctionNode();
auto key_function_node_function_name = key_function_node.getFunctionName();
if (key_function_node_function_name == "arrayElement")
{
/** Try to parse arrayElement for mapKeys index.
* It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map
@ -415,11 +433,8 @@ bool MergeTreeConditionFullText::traverseASTEquals(
if (value_field == value_type->getDefault())
return false;
const auto * column_ast_identifier = function->arguments.get()->children[0].get()->as<ASTIdentifier>();
if (!column_ast_identifier)
return false;
const auto & map_column_name = column_ast_identifier->name();
auto first_argument = key_function_node.getArgumentAt(0);
const auto map_column_name = first_argument.getColumnName();
size_t map_keys_key_column_num = 0;
auto map_keys_index_column_name = fmt::format("mapKeys({})", map_column_name);
@ -431,12 +446,11 @@ bool MergeTreeConditionFullText::traverseASTEquals(
if (map_keys_exists)
{
auto & argument = function->arguments.get()->children[1];
auto second_argument = key_function_node.getArgumentAt(1);
DataTypePtr const_type;
if (const auto * literal = argument->as<ASTLiteral>())
if (second_argument.tryGetConstant(const_value, const_type))
{
auto element_key = literal->value;
const_value = element_key;
key_column_num = map_keys_key_column_num;
key_exists = true;
}
@ -567,23 +581,24 @@ bool MergeTreeConditionFullText::traverseASTEquals(
}
bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
const ASTs & args,
const RPNBuilderTreeNode & left_argument,
const RPNBuilderTreeNode & right_argument,
RPNElement & out)
{
const ASTPtr & left_arg = args[0];
const ASTPtr & right_arg = args[1];
std::vector<KeyTuplePositionMapping> key_tuple_mapping;
DataTypes data_types;
const auto * left_arg_tuple = typeid_cast<const ASTFunction *>(left_arg.get());
if (left_arg_tuple && left_arg_tuple->name == "tuple")
auto left_argument_function_node_optional = left_argument.toFunctionNodeOrNull();
if (left_argument_function_node_optional && left_argument_function_node_optional->getFunctionName() == "tuple")
{
const auto & tuple_elements = left_arg_tuple->arguments->children;
for (size_t i = 0; i < tuple_elements.size(); ++i)
const auto & left_argument_function_node = *left_argument_function_node_optional;
size_t left_argument_function_node_arguments_size = left_argument_function_node.getArgumentsSize();
for (size_t i = 0; i < left_argument_function_node_arguments_size; ++i)
{
size_t key = 0;
if (getKey(tuple_elements[i]->getColumnName(), key))
if (getKey(left_argument_function_node.getArgumentAt(i).getColumnName(), key))
{
key_tuple_mapping.emplace_back(i, key);
data_types.push_back(index_data_types[key]);
@ -593,7 +608,7 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
else
{
size_t key = 0;
if (getKey(left_arg->getColumnName(), key))
if (getKey(left_argument.getColumnName(), key))
{
key_tuple_mapping.emplace_back(0, key);
data_types.push_back(index_data_types[key]);
@ -603,19 +618,10 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
if (key_tuple_mapping.empty())
return false;
PreparedSetKey set_key;
if (typeid_cast<const ASTSubquery *>(right_arg.get()) || typeid_cast<const ASTIdentifier *>(right_arg.get()))
set_key = PreparedSetKey::forSubquery(*right_arg);
else
set_key = PreparedSetKey::forLiteral(*right_arg, data_types);
auto prepared_set = prepared_sets->get(set_key);
auto prepared_set = right_argument.tryGetPreparedSet(data_types);
if (!prepared_set)
return false;
if (!prepared_set->hasExplicitSetElements())
return false;
for (const auto & data_type : prepared_set->getDataTypes())
if (data_type->getTypeId() != TypeIndex::String && data_type->getTypeId() != TypeIndex::FixedString)
return false;

View File

@ -122,17 +122,17 @@ private:
using RPN = std::vector<RPNElement>;
bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
bool extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out);
bool traverseASTEquals(
bool traverseTreeEquals(
const String & function_name,
const ASTPtr & key_ast,
const RPNBuilderTreeNode & key_node,
const DataTypePtr & value_type,
const Field & value_field,
RPNElement & out);
bool getKey(const std::string & key_column_name, size_t & key_column_num);
bool tryPrepareSetBloomFilter(const ASTs & args, RPNElement & out);
bool tryPrepareSetBloomFilter(const RPNBuilderTreeNode & left_argument, const RPNBuilderTreeNode & right_argument, RPNElement & out);
static bool createFunctionEqualsCondition(
RPNElement & out, const Field & value, const BloomFilterParameters & params, TokenExtractorPtr token_extractor);

View File

@ -74,8 +74,9 @@ void MergeTreeIndexGranuleSet::serializeBinary(WriteBuffer & ostr) const
auto serialization = type->getDefaultSerialization();
ISerialization::SerializeBinaryBulkStatePtr state;
serialization->serializeBinaryBulkStatePrefix(settings, state);
serialization->serializeBinaryBulkWithMultipleStreams(*block.getByPosition(i).column, 0, size(), settings, state);
const auto & column = *block.getByPosition(i).column;
serialization->serializeBinaryBulkStatePrefix(column, settings, state);
serialization->serializeBinaryBulkWithMultipleStreams(column, 0, size(), settings, state);
serialization->serializeBinaryBulkStateSuffix(settings, state);
}
}

View File

@ -0,0 +1,47 @@
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectQuery.h>
namespace DB
{
ASTPtr buildFilterNode(const ASTPtr & select_query, ASTs additional_filters)
{
auto & select_query_typed = select_query->as<ASTSelectQuery &>();
ASTs filters;
if (select_query_typed.where())
filters.push_back(select_query_typed.where());
if (select_query_typed.prewhere())
filters.push_back(select_query_typed.prewhere());
filters.insert(filters.end(), additional_filters.begin(), additional_filters.end());
if (filters.empty())
return nullptr;
ASTPtr filter_node;
if (filters.size() == 1)
{
filter_node = filters.front();
}
else
{
auto function = std::make_shared<ASTFunction>();
function->name = "and";
function->arguments = std::make_shared<ASTExpressionList>();
function->children.push_back(function->arguments);
function->arguments->children = std::move(filters);
filter_node = std::move(function);
}
return filter_node;
}
}

View File

@ -0,0 +1,13 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
/** Build AST filter node for index analysis from WHERE and PREWHERE sections of select query and additional filters.
* If select query does not have WHERE and PREWHERE and additional filters are empty null is returned.
*/
ASTPtr buildFilterNode(const ASTPtr & select_query, ASTs additional_filters = {});
}

View File

@ -1,8 +1,8 @@
#include <Storages/MergeTree/MergeTreeSink.h>
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
#include <Storages/StorageMergeTree.h>
#include <DataTypes/ObjectUtils.h>
#include <Interpreters/PartLog.h>
#include <DataTypes/ObjectUtils.h>
namespace ProfileEvents
{
@ -56,8 +56,9 @@ struct MergeTreeSink::DelayedChunk
void MergeTreeSink::consume(Chunk chunk)
{
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
if (!storage_snapshot->object_columns.empty())
convertDynamicColumnsToTuples(block, storage_snapshot);
deduceTypesOfObjectColumns(storage_snapshot, block);
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context);
using DelayedPartitions = std::vector<MergeTreeSink::DelayedChunk::Partition>;

View File

@ -0,0 +1,417 @@
#include <Storages/MergeTree/RPNBuilder.h>
#include <Common/FieldVisitorToString.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSubquery.h>
#include <DataTypes/FieldToDataType.h>
#include <DataTypes/DataTypeNullable.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnSet.h>
#include <Functions/IFunction.h>
#include <Storages/KeyDescription.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & out, bool legacy = false)
{
switch (node.type)
{
case ActionsDAG::ActionType::INPUT:
writeString(node.result_name, out);
break;
case ActionsDAG::ActionType::COLUMN:
{
/// If it was created from ASTLiteral, then result_name can be an alias.
/// We need to convert value back to string here.
if (const auto * column_const = typeid_cast<const ColumnConst *>(node.column.get()))
writeString(applyVisitor(FieldVisitorToString(), column_const->getField()), out);
/// It may be possible that column is ColumnSet
else
writeString(node.result_name, out);
break;
}
case ActionsDAG::ActionType::ALIAS:
appendColumnNameWithoutAlias(*node.children.front(), out, legacy);
break;
case ActionsDAG::ActionType::ARRAY_JOIN:
writeCString("arrayJoin(", out);
appendColumnNameWithoutAlias(*node.children.front(), out, legacy);
writeChar(')', out);
break;
case ActionsDAG::ActionType::FUNCTION:
{
auto name = node.function_base->getName();
if (legacy && name == "modulo")
writeCString("moduleLegacy", out);
else
writeString(name, out);
writeChar('(', out);
bool first = true;
for (const auto * arg : node.children)
{
if (!first)
writeCString(", ", out);
first = false;
appendColumnNameWithoutAlias(*arg, out, legacy);
}
writeChar(')', out);
}
}
}
String getColumnNameWithoutAlias(const ActionsDAG::Node & node, bool legacy = false)
{
WriteBufferFromOwnString out;
appendColumnNameWithoutAlias(node, out, legacy);
return std::move(out.str());
}
}
RPNBuilderTreeContext::RPNBuilderTreeContext(ContextPtr query_context_)
: query_context(std::move(query_context_))
{}
RPNBuilderTreeContext::RPNBuilderTreeContext(ContextPtr query_context_, Block block_with_constants_, PreparedSetsPtr prepared_sets_)
: query_context(std::move(query_context_))
, block_with_constants(std::move(block_with_constants_))
, prepared_sets(std::move(prepared_sets_))
{}
RPNBuilderTreeNode::RPNBuilderTreeNode(const ActionsDAG::Node * dag_node_, RPNBuilderTreeContext & tree_context_)
: dag_node(dag_node_)
, tree_context(tree_context_)
{
assert(dag_node);
}
RPNBuilderTreeNode::RPNBuilderTreeNode(const IAST * ast_node_, RPNBuilderTreeContext & tree_context_)
: ast_node(ast_node_)
, tree_context(tree_context_)
{
assert(ast_node);
}
std::string RPNBuilderTreeNode::getColumnName() const
{
if (ast_node)
return ast_node->getColumnNameWithoutAlias();
else
return getColumnNameWithoutAlias(*dag_node);
}
std::string RPNBuilderTreeNode::getColumnNameWithModuloLegacy() const
{
if (ast_node)
{
auto adjusted_ast = ast_node->clone();
KeyDescription::moduloToModuloLegacyRecursive(adjusted_ast);
return adjusted_ast->getColumnNameWithoutAlias();
}
else
{
return getColumnNameWithoutAlias(*dag_node, true /*legacy*/);
}
}
bool RPNBuilderTreeNode::isFunction() const
{
if (ast_node)
return typeid_cast<const ASTFunction *>(ast_node);
else
return dag_node->type == ActionsDAG::ActionType::FUNCTION;
}
bool RPNBuilderTreeNode::isConstant() const
{
if (ast_node)
{
bool is_literal = typeid_cast<const ASTLiteral *>(ast_node);
if (is_literal)
return true;
String column_name = ast_node->getColumnName();
const auto & block_with_constants = tree_context.getBlockWithConstants();
if (block_with_constants.has(column_name) && isColumnConst(*block_with_constants.getByName(column_name).column))
return true;
return false;
}
else
{
return dag_node->column && isColumnConst(*dag_node->column);
}
}
ColumnWithTypeAndName RPNBuilderTreeNode::getConstantColumn() const
{
if (!isConstant())
throw Exception(ErrorCodes::LOGICAL_ERROR, "RPNBuilderTree node is not a constant");
ColumnWithTypeAndName result;
if (ast_node)
{
const auto * literal = assert_cast<const ASTLiteral *>(ast_node);
if (literal)
{
result.type = applyVisitor(FieldToDataType(), literal->value);
result.column = result.type->createColumnConst(0, literal->value);
return result;
}
String column_name = ast_node->getColumnName();
const auto & block_with_constants = tree_context.getBlockWithConstants();
return block_with_constants.getByName(column_name);
}
else
{
result.type = dag_node->result_type;
result.column = dag_node->column;
}
return result;
}
bool RPNBuilderTreeNode::tryGetConstant(Field & output_value, DataTypePtr & output_type) const
{
if (ast_node)
{
// Constant expr should use alias names if any
String column_name = ast_node->getColumnName();
const auto & block_with_constants = tree_context.getBlockWithConstants();
if (const auto * literal = ast_node->as<ASTLiteral>())
{
/// By default block_with_constants has only one column named "_dummy".
/// If block contains only constants it's may not be preprocessed by
// ExpressionAnalyzer, so try to look up in the default column.
if (!block_with_constants.has(column_name))
column_name = "_dummy";
/// Simple literal
output_value = literal->value;
output_type = block_with_constants.getByName(column_name).type;
/// If constant is not Null, we can assume it's type is not Nullable as well.
if (!output_value.isNull())
output_type = removeNullable(output_type);
return true;
}
else if (block_with_constants.has(column_name) &&
isColumnConst(*block_with_constants.getByName(column_name).column))
{
/// An expression which is dependent on constants only
const auto & constant_column = block_with_constants.getByName(column_name);
output_value = (*constant_column.column)[0];
output_type = constant_column.type;
if (!output_value.isNull())
output_type = removeNullable(output_type);
return true;
}
}
else
{
if (dag_node->column && isColumnConst(*dag_node->column))
{
output_value = (*dag_node->column)[0];
output_type = dag_node->result_type;
if (!output_value.isNull())
output_type = removeNullable(output_type);
return true;
}
}
return false;
}
namespace
{
ConstSetPtr tryGetSetFromDAGNode(const ActionsDAG::Node * dag_node)
{
if (!dag_node->column)
return {};
const IColumn * column = dag_node->column.get();
if (const auto * column_const = typeid_cast<const ColumnConst *>(column))
column = &column_const->getDataColumn();
if (const auto * column_set = typeid_cast<const ColumnSet *>(column))
{
auto set = column_set->getData();
if (set->isCreated())
return set;
}
return {};
}
}
ConstSetPtr RPNBuilderTreeNode::tryGetPreparedSet() const
{
const auto & prepared_sets = getTreeContext().getPreparedSets();
if (ast_node && prepared_sets)
{
auto prepared_sets_with_same_hash = prepared_sets->getByTreeHash(ast_node->getTreeHash());
for (auto & set : prepared_sets_with_same_hash)
if (set->isCreated())
return set;
}
else if (dag_node)
{
return tryGetSetFromDAGNode(dag_node);
}
return {};
}
ConstSetPtr RPNBuilderTreeNode::tryGetPreparedSet(const DataTypes & data_types) const
{
const auto & prepared_sets = getTreeContext().getPreparedSets();
if (prepared_sets && ast_node)
{
if (ast_node->as<ASTSubquery>() || ast_node->as<ASTTableIdentifier>())
return prepared_sets->get(PreparedSetKey::forSubquery(*ast_node));
return prepared_sets->get(PreparedSetKey::forLiteral(*ast_node, data_types));
}
else if (dag_node)
{
return tryGetSetFromDAGNode(dag_node);
}
return nullptr;
}
ConstSetPtr RPNBuilderTreeNode::tryGetPreparedSet(
const std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
const DataTypes & data_types) const
{
const auto & prepared_sets = getTreeContext().getPreparedSets();
if (prepared_sets && ast_node)
{
if (ast_node->as<ASTSubquery>() || ast_node->as<ASTTableIdentifier>())
return prepared_sets->get(PreparedSetKey::forSubquery(*ast_node));
/// We have `PreparedSetKey::forLiteral` but it is useless here as we don't have enough information
/// about types in left argument of the IN operator. Instead, we manually iterate through all the sets
/// and find the one for the right arg based on the AST structure (getTreeHash), after that we check
/// that the types it was prepared with are compatible with the types of the primary key.
auto types_match = [&indexes_mapping, &data_types](const SetPtr & candidate_set)
{
assert(indexes_mapping.size() == data_types.size());
for (size_t i = 0; i < indexes_mapping.size(); ++i)
{
if (!candidate_set->areTypesEqual(indexes_mapping[i].tuple_index, data_types[i]))
return false;
}
return true;
};
auto tree_hash = ast_node->getTreeHash();
for (const auto & set : prepared_sets->getByTreeHash(tree_hash))
{
if (types_match(set))
return set;
}
}
else if (dag_node->column)
{
return tryGetSetFromDAGNode(dag_node);
}
return nullptr;
}
RPNBuilderFunctionTreeNode RPNBuilderTreeNode::toFunctionNode() const
{
if (!isFunction())
throw Exception(ErrorCodes::LOGICAL_ERROR, "RPNBuilderTree node is not a function");
if (this->ast_node)
return RPNBuilderFunctionTreeNode(this->ast_node, tree_context);
else
return RPNBuilderFunctionTreeNode(this->dag_node, tree_context);
}
std::optional<RPNBuilderFunctionTreeNode> RPNBuilderTreeNode::toFunctionNodeOrNull() const
{
if (!isFunction())
return {};
if (this->ast_node)
return RPNBuilderFunctionTreeNode(this->ast_node, tree_context);
else
return RPNBuilderFunctionTreeNode(this->dag_node, tree_context);
}
std::string RPNBuilderFunctionTreeNode::getFunctionName() const
{
if (ast_node)
return assert_cast<const ASTFunction *>(ast_node)->name;
else
return dag_node->function_base->getName();
}
size_t RPNBuilderFunctionTreeNode::getArgumentsSize() const
{
if (ast_node)
{
const auto * ast_function = assert_cast<const ASTFunction *>(ast_node);
return ast_function->arguments ? ast_function->arguments->children.size() : 0;
}
else
{
return dag_node->children.size();
}
}
RPNBuilderTreeNode RPNBuilderFunctionTreeNode::getArgumentAt(size_t index) const
{
if (ast_node)
{
const auto * ast_function = assert_cast<const ASTFunction *>(ast_node);
return RPNBuilderTreeNode(ast_function->arguments->children[index].get(), tree_context);
}
else
{
return RPNBuilderTreeNode(dag_node->children[index], tree_context);
}
}
}

View File

@ -1,111 +1,266 @@
#pragma once
#include <Core/Block.h>
#include <DataTypes/DataTypesNumber.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectQuery.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <Storages/SelectQueryInfo.h>
#include <Common/typeid_cast.h>
#include <Interpreters/Context.h>
#include <Interpreters/Set.h>
#include <Interpreters/PreparedSets.h>
#include <Interpreters/ActionsDAG.h>
namespace DB
{
/// Builds reverse polish notation
template <typename RPNElement>
class RPNBuilder : WithContext
/** Context of RPNBuilderTree.
*
* For AST tree context, precalculated block with constants and prepared sets are required for index analysis.
* For DAG tree precalculated block with constants and prepared sets are not required, because constants and sets already
* calculated inside COLUMN actions dag node.
*/
class RPNBuilderTreeContext
{
public:
using RPN = std::vector<RPNElement>;
using AtomFromASTFunc = std::function<
bool(const ASTPtr & node, ContextPtr context, Block & block_with_constants, RPNElement & out)>;
/// Construct RPNBuilderTreeContext for ActionsDAG tree
explicit RPNBuilderTreeContext(ContextPtr query_context_);
RPNBuilder(const SelectQueryInfo & query_info, ContextPtr context_, const AtomFromASTFunc & atom_from_ast_)
: WithContext(context_), atom_from_ast(atom_from_ast_)
/// Construct RPNBuilderTreeContext for AST tree
explicit RPNBuilderTreeContext(ContextPtr query_context_, Block block_with_constants_, PreparedSetsPtr prepared_sets_);
/// Get query context
const ContextPtr & getQueryContext() const
{
/** Evaluation of expressions that depend only on constants.
* For the index to be used, if it is written, for example `WHERE Date = toDate(now())`.
*/
block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, getContext());
/// Transform WHERE section to Reverse Polish notation
const ASTSelectQuery & select = typeid_cast<const ASTSelectQuery &>(*query_info.query);
if (select.where())
{
traverseAST(select.where());
if (select.prewhere())
{
traverseAST(select.prewhere());
rpn.emplace_back(RPNElement::FUNCTION_AND);
}
}
else if (select.prewhere())
{
traverseAST(select.prewhere());
}
else
{
rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
}
return query_context;
}
RPN && extractRPN() { return std::move(rpn); }
/** Get block with constants.
* Valid only for AST tree.
*/
const Block & getBlockWithConstants() const
{
return block_with_constants;
}
/** Get prepared sets.
* Valid only for AST tree.
*/
const PreparedSetsPtr & getPreparedSets() const
{
return prepared_sets;
}
private:
void traverseAST(const ASTPtr & node)
/// Valid for both AST and ActionDAG tree
ContextPtr query_context;
/// Valid only for AST tree
Block block_with_constants;
/// Valid only for AST tree
PreparedSetsPtr prepared_sets;
};
class RPNBuilderFunctionTreeNode;
/** RPNBuilderTreeNode is wrapper around DAG or AST node.
* It defines unified interface for index analysis.
*/
class RPNBuilderTreeNode
{
public:
/// Construct RPNBuilderTreeNode with non null dag node and tree context
explicit RPNBuilderTreeNode(const ActionsDAG::Node * dag_node_, RPNBuilderTreeContext & tree_context_);
/// Construct RPNBuilderTreeNode with non null ast node and tree context
explicit RPNBuilderTreeNode(const IAST * ast_node_, RPNBuilderTreeContext & tree_context_);
/// Get column name
std::string getColumnName() const;
/** Get column name.
* Function `modulo` is replaced with `moduloLegacy`.
*/
std::string getColumnNameWithModuloLegacy() const;
/// Is node function
bool isFunction() const;
/// Is node constant
bool isConstant() const;
/** Get constant as constant column.
* Node must be constant before calling these method, otherwise logical exception is thrown.
*/
ColumnWithTypeAndName getConstantColumn() const;
/** Try get constant from node. If node is constant returns true, and constant value and constant type output parameters are set.
* Otherwise false is returned.
*/
bool tryGetConstant(Field & output_value, DataTypePtr & output_type) const;
/// Try get prepared set from node
ConstSetPtr tryGetPreparedSet() const;
/// Try get prepared set from node that match data types
ConstSetPtr tryGetPreparedSet(const DataTypes & data_types) const;
/// Try get prepared set from node that match indexes mapping and data types
ConstSetPtr tryGetPreparedSet(
const std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
const DataTypes & data_types) const;
/** Convert node to function node.
* Node must be function before calling these method, otherwise exception is thrown.
*/
RPNBuilderFunctionTreeNode toFunctionNode() const;
/// Convert node to function node or null optional
std::optional<RPNBuilderFunctionTreeNode> toFunctionNodeOrNull() const;
/// Get tree context
const RPNBuilderTreeContext & getTreeContext() const
{
return tree_context;
}
/// Get tree context
RPNBuilderTreeContext & getTreeContext()
{
return tree_context;
}
protected:
const IAST * ast_node = nullptr;
const ActionsDAG::Node * dag_node = nullptr;
RPNBuilderTreeContext & tree_context;
};
/** RPNBuilderFunctionTreeNode is wrapper around RPNBuilderTreeNode with function type.
* It provide additional functionality that is specific for function.
*/
class RPNBuilderFunctionTreeNode : public RPNBuilderTreeNode
{
public:
using RPNBuilderTreeNode::RPNBuilderTreeNode;
/// Get function name
std::string getFunctionName() const;
/// Get function arguments size
size_t getArgumentsSize() const;
/// Get function argument at index
RPNBuilderTreeNode getArgumentAt(size_t index) const;
};
/** RPN Builder build stack of reverse polish notation elements (RPNElements) required for index analysis.
*
* RPNBuilder client must provide RPNElement type that has following interface:
*
* struct RPNElementInterface
* {
* enum Function
* {
* FUNCTION_UNKNOWN, /// Can take any value.
* /// Operators of the logical expression.
* FUNCTION_NOT,
* FUNCTION_AND,
* FUNCTION_OR,
* ...
* };
*
* RPNElementInterface();
*
* Function function = FUNCTION_UNKNOWN;
*
* }
*
* RPNBuilder take care of building stack of RPNElements with `NOT`, `AND`, `OR` types.
* In addition client must provide ExtractAtomFromTreeFunction that returns true and RPNElement as output parameter,
* if it can convert RPNBuilderTree node to RPNElement, false otherwise.
*/
template <typename RPNElement>
class RPNBuilder
{
public:
using RPNElements = std::vector<RPNElement>;
using ExtractAtomFromTreeFunction = std::function<bool (const RPNBuilderTreeNode & node, RPNElement & out)>;
explicit RPNBuilder(const ActionsDAG::Node * filter_actions_dag_node,
ContextPtr query_context_,
const ExtractAtomFromTreeFunction & extract_atom_from_tree_function_)
: tree_context(std::move(query_context_))
, extract_atom_from_tree_function(extract_atom_from_tree_function_)
{
traverseTree(RPNBuilderTreeNode(filter_actions_dag_node, tree_context));
}
RPNBuilder(const ASTPtr & filter_node,
ContextPtr query_context_,
Block block_with_constants_,
PreparedSetsPtr prepared_sets_,
const ExtractAtomFromTreeFunction & extract_atom_from_tree_function_)
: tree_context(std::move(query_context_), std::move(block_with_constants_), std::move(prepared_sets_))
, extract_atom_from_tree_function(extract_atom_from_tree_function_)
{
traverseTree(RPNBuilderTreeNode(filter_node.get(), tree_context));
}
RPNElements && extractRPN() && { return std::move(rpn_elements); }
private:
void traverseTree(const RPNBuilderTreeNode & node)
{
RPNElement element;
if (ASTFunction * func = typeid_cast<ASTFunction *>(&*node))
if (node.isFunction())
{
if (operatorFromAST(func, element))
auto function_node = node.toFunctionNode();
if (extractLogicalOperatorFromTree(function_node, element))
{
auto & args = typeid_cast<ASTExpressionList &>(*func->arguments).children;
for (size_t i = 0, size = args.size(); i < size; ++i)
size_t arguments_size = function_node.getArgumentsSize();
for (size_t argument_index = 0; argument_index < arguments_size; ++argument_index)
{
traverseAST(args[i]);
auto function_node_argument = function_node.getArgumentAt(argument_index);
traverseTree(function_node_argument);
/** The first part of the condition is for the correct support of `and` and `or` functions of arbitrary arity
* - in this case `n - 1` elements are added (where `n` is the number of arguments).
*/
if (i != 0 || element.function == RPNElement::FUNCTION_NOT)
rpn.emplace_back(std::move(element));
if (argument_index != 0 || element.function == RPNElement::FUNCTION_NOT)
rpn_elements.emplace_back(std::move(element));
}
return;
}
}
if (!atom_from_ast(node, getContext(), block_with_constants, element))
{
if (!extract_atom_from_tree_function(node, element))
element.function = RPNElement::FUNCTION_UNKNOWN;
}
rpn.emplace_back(std::move(element));
rpn_elements.emplace_back(std::move(element));
}
bool operatorFromAST(const ASTFunction * func, RPNElement & out)
bool extractLogicalOperatorFromTree(const RPNBuilderFunctionTreeNode & function_node, RPNElement & out)
{
/// Functions AND, OR, NOT.
/// Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
/// (or, the same thing - calling the function `and` from one argument).
const ASTs & args = typeid_cast<const ASTExpressionList &>(*func->arguments).children;
/** Functions AND, OR, NOT.
* Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
* (or, the same thing - calling the function `and` from one argument).
*/
if (func->name == "not")
auto function_name = function_node.getFunctionName();
if (function_name == "not")
{
if (args.size() != 1)
if (function_node.getArgumentsSize() != 1)
return false;
out.function = RPNElement::FUNCTION_NOT;
}
else
{
if (func->name == "and" || func->name == "indexHint")
if (function_name == "and" || function_name == "indexHint")
out.function = RPNElement::FUNCTION_AND;
else if (func->name == "or")
else if (function_name == "or")
out.function = RPNElement::FUNCTION_OR;
else
return false;
@ -114,10 +269,9 @@ private:
return true;
}
const AtomFromASTFunc & atom_from_ast;
Block block_with_constants;
RPN rpn;
RPNBuilderTreeContext tree_context;
const ExtractAtomFromTreeFunction & extract_atom_from_tree_function;
RPNElements rpn_elements;
};
}

View File

@ -1,10 +1,10 @@
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h>
#include <Storages/MergeTree/ReplicatedMergeTreeSink.h>
#include <DataTypes/ObjectUtils.h>
#include <Interpreters/PartLog.h>
#include <Common/SipHash.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <DataTypes/ObjectUtils.h>
#include <Core/Block.h>
#include <IO/Operators.h>
@ -165,7 +165,9 @@ void ReplicatedMergeTreeSink::consume(Chunk chunk)
*/
size_t replicas_num = checkQuorumPrecondition(zookeeper);
deduceTypesOfObjectColumns(storage_snapshot, block);
if (!storage_snapshot->object_columns.empty())
convertDynamicColumnsToTuples(block, storage_snapshot);
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context);
using DelayedPartitions = std::vector<ReplicatedMergeTreeSink::DelayedChunk::Partition>;

View File

@ -47,10 +47,10 @@ public:
const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const override
{
const auto & storage_columns = metadata_snapshot->getColumns();
if (!hasObjectColumns(storage_columns))
if (!hasDynamicSubcolumns(storage_columns))
return std::make_shared<StorageSnapshot>(*this, metadata_snapshot);
auto object_columns = getObjectColumns(
auto object_columns = getConcreteObjectColumns(
parts.begin(), parts.end(),
storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); });

View File

@ -598,7 +598,7 @@ std::optional<QueryProcessingStage::Enum> StorageDistributed::getOptimizedQueryP
static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr query)
{
if (!hasObjectColumns(all_columns))
if (!hasDynamicSubcolumns(all_columns))
return false;
if (!query)
@ -613,7 +613,7 @@ static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr
auto name_in_storage = Nested::splitName(required_column).first;
auto column_in_storage = all_columns.tryGetPhysical(name_in_storage);
if (column_in_storage && isObject(column_in_storage->type))
if (column_in_storage && column_in_storage->type->hasDynamicSubcolumns())
return true;
}
@ -640,7 +640,7 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery(
metadata_snapshot->getColumns(),
getContext());
auto object_columns = DB::getObjectColumns(
auto object_columns = DB::getConcreteObjectColumns(
snapshot_data->objects_by_shard.begin(),
snapshot_data->objects_by_shard.end(),
metadata_snapshot->getColumns(),

View File

@ -526,7 +526,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns)
const auto * available_type = it->getMapped();
if (!isObject(*available_type)
if (!available_type->hasDynamicSubcolumns()
&& !column.type->equals(*available_type)
&& !isCompatibleEnumTypes(available_type, column.type.get()))
throw Exception(
@ -575,7 +575,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns,
const auto * provided_column_type = it->getMapped();
const auto * available_column_type = jt->getMapped();
if (!isObject(*provided_column_type)
if (!provided_column_type->hasDynamicSubcolumns()
&& !provided_column_type->equals(*available_column_type)
&& !isCompatibleEnumTypes(available_column_type, provided_column_type))
throw Exception(
@ -619,7 +619,7 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const
listOfColumns(available_columns));
const auto * available_type = it->getMapped();
if (!isObject(*available_type)
if (!available_type->hasDynamicSubcolumns()
&& !column.type->equals(*available_type)
&& !isCompatibleEnumTypes(available_type, column.type.get()))
throw Exception(

View File

@ -462,7 +462,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c
settings.getter = createStreamGetter(name_and_type);
if (!serialize_states.contains(name))
serialization->serializeBinaryBulkStatePrefix(settings, serialize_states[name]);
serialization->serializeBinaryBulkStatePrefix(column, settings, serialize_states[name]);
if (storage.use_marks_file)
{

View File

@ -146,7 +146,7 @@ public:
auto extended_storage_columns = storage_snapshot->getColumns(
GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects());
convertObjectsToTuples(block, extended_storage_columns);
convertDynamicColumnsToTuples(block, storage_snapshot);
}
if (storage.compress)
@ -212,10 +212,10 @@ StorageSnapshotPtr StorageMemory::getStorageSnapshot(const StorageMetadataPtr &
auto snapshot_data = std::make_unique<SnapshotData>();
snapshot_data->blocks = data.get();
if (!hasObjectColumns(metadata_snapshot->getColumns()))
if (!hasDynamicSubcolumns(metadata_snapshot->getColumns()))
return std::make_shared<StorageSnapshot>(*this, metadata_snapshot, ColumnsDescription{}, std::move(snapshot_data));
auto object_columns = getObjectColumns(
auto object_columns = getConcreteObjectColumns(
snapshot_data->blocks->begin(),
snapshot_data->blocks->end(),
metadata_snapshot->getColumns(),

View File

@ -364,39 +364,6 @@ String StorageS3Source::KeysIterator::next()
return pimpl->next();
}
class StorageS3Source::ReadTasksIterator::Impl
{
public:
explicit Impl(const std::vector<String> & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_)
: read_tasks(read_tasks_), new_read_tasks_callback(new_read_tasks_callback_)
{
}
String next()
{
size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
if (current_index >= read_tasks.size())
return new_read_tasks_callback();
return read_tasks[current_index];
}
private:
std::atomic_size_t index = 0;
std::vector<String> read_tasks;
ReadTaskCallback new_read_tasks_callback;
};
StorageS3Source::ReadTasksIterator::ReadTasksIterator(
const std::vector<String> & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_)
: pimpl(std::make_shared<StorageS3Source::ReadTasksIterator::Impl>(read_tasks_, new_read_tasks_callback_))
{
}
String StorageS3Source::ReadTasksIterator::next()
{
return pimpl->next();
}
Block StorageS3Source::getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns)
{
for (const auto & virtual_column : requested_virtual_columns)
@ -806,8 +773,7 @@ StorageS3::StorageS3(
distributed_processing_,
is_key_with_globs,
format_settings,
context_,
&read_tasks_used_in_schema_inference);
context_);
storage_metadata.setColumns(columns);
}
else
@ -835,19 +801,14 @@ std::shared_ptr<StorageS3Source::IteratorWrapper> StorageS3::createFileIterator(
ContextPtr local_context,
ASTPtr query,
const Block & virtual_block,
const std::vector<String> & read_tasks,
std::unordered_map<String, S3::ObjectInfo> * object_infos,
Strings * read_keys)
{
if (distributed_processing)
{
return std::make_shared<StorageS3Source::IteratorWrapper>(
[read_tasks_iterator = std::make_shared<StorageS3Source::ReadTasksIterator>(read_tasks, local_context->getReadTaskCallback()), read_keys]() -> String
{
auto key = read_tasks_iterator->next();
if (read_keys)
read_keys->push_back(key);
return key;
[callback = local_context->getReadTaskCallback()]() -> String {
return callback();
});
}
else if (is_key_with_globs)
@ -907,7 +868,6 @@ Pipe StorageS3::read(
local_context,
query_info.query,
virtual_block,
read_tasks_used_in_schema_inference,
&object_infos);
ColumnsDescription columns_description;
@ -1205,7 +1165,7 @@ ColumnsDescription StorageS3::getTableStructureFromData(
return getTableStructureFromDataImpl(
configuration.format, s3_configuration, configuration.compression_method, distributed_processing,
s3_configuration.uri.key.find_first_of("*?{") != std::string::npos, format_settings, ctx, nullptr, object_infos);
s3_configuration.uri.key.find_first_of("*?{") != std::string::npos, format_settings, ctx, object_infos);
}
ColumnsDescription StorageS3::getTableStructureFromDataImpl(
@ -1216,13 +1176,12 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
bool is_key_with_globs,
const std::optional<FormatSettings> & format_settings,
ContextPtr ctx,
std::vector<String> * read_keys_in_distributed_processing,
std::unordered_map<String, S3::ObjectInfo> * object_infos)
{
std::vector<String> read_keys;
auto file_iterator
= createFileIterator(s3_configuration, {s3_configuration.uri.key}, is_key_with_globs, distributed_processing, ctx, nullptr, {}, {}, object_infos, &read_keys);
= createFileIterator(s3_configuration, {s3_configuration.uri.key}, is_key_with_globs, distributed_processing, ctx, nullptr, {}, object_infos, &read_keys);
std::optional<ColumnsDescription> columns_from_cache;
size_t prev_read_keys_size = read_keys.size();
@ -1275,9 +1234,6 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
if (ctx->getSettingsRef().schema_inference_use_cache_for_s3)
addColumnsToCache(read_keys, s3_configuration, columns, format, format_settings, ctx);
if (distributed_processing && read_keys_in_distributed_processing)
*read_keys_in_distributed_processing = std::move(read_keys);
return columns;
}

View File

@ -66,18 +66,6 @@ public:
std::shared_ptr<Impl> pimpl;
};
class ReadTasksIterator
{
public:
ReadTasksIterator(const std::vector<String> & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_);
String next();
private:
class Impl;
/// shared_ptr to have copy constructor
std::shared_ptr<Impl> pimpl;
};
using IteratorWrapper = std::function<String()>;
static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);
@ -238,8 +226,6 @@ private:
ASTPtr partition_by;
bool is_key_with_globs = false;
std::vector<String> read_tasks_used_in_schema_inference;
std::unordered_map<String, S3::ObjectInfo> object_infos;
static void updateS3Configuration(ContextPtr, S3Configuration &);
@ -252,7 +238,6 @@ private:
ContextPtr local_context,
ASTPtr query,
const Block & virtual_block,
const std::vector<String> & read_tasks = {},
std::unordered_map<String, S3::ObjectInfo> * object_infos = nullptr,
Strings * read_keys = nullptr);
@ -264,7 +249,6 @@ private:
bool is_key_with_globs,
const std::optional<FormatSettings> & format_settings,
ContextPtr ctx,
std::vector<String> * read_keys_in_distributed_processing = nullptr,
std::unordered_map<String, S3::ObjectInfo> * object_infos = nullptr);
bool supportsSubsetOfColumns() const override;

View File

@ -5,46 +5,40 @@
#if USE_AWS_S3
#include "Common/Exception.h"
#include <Common/Throttler.h>
#include "Client/Connection.h"
#include "Core/QueryProcessingStage.h"
#include <Core/UUID.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromS3.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Interpreters/getHeaderForProcessingStage.h>
#include <Interpreters/SelectQueryOptions.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/getTableExpressions.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <QueryPipeline/narrowPipe.h>
#include <QueryPipeline/Pipe.h>
#include "Processors/ISource.h"
#include <Processors/Sources/RemoteSource.h>
#include <QueryPipeline/RemoteQueryExecutor.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Storages/IStorage.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/getVirtualsForStorage.h>
#include <Storages/StorageDictionary.h>
#include <Storages/addColumnsStructureToQueryWithClusterEngine.h>
#include <Common/logger_useful.h>
#include <aws/core/auth/AWSCredentials.h>
#include <aws/s3/S3Client.h>
#include <aws/s3/model/ListObjectsV2Request.h>
#include <ios>
#include <memory>
#include <string>
#include <thread>
#include <cassert>
namespace DB
{
StorageS3Cluster::StorageS3Cluster(
const StorageS3ClusterConfiguration & configuration_,
const StorageID & table_id_,
@ -72,6 +66,7 @@ StorageS3Cluster::StorageS3Cluster(
auto columns = StorageS3::getTableStructureFromDataImpl(format_name, s3_configuration, compression_method,
/*distributed_processing_*/false, is_key_with_globs, /*format_settings=*/std::nullopt, context_);
storage_metadata.setColumns(columns);
add_columns_structure_to_query = true;
}
else
storage_metadata.setColumns(columns_);
@ -117,6 +112,11 @@ Pipe StorageS3Cluster::read(
const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
ASTPtr query_to_send = query_info.original_query->clone();
if (add_columns_structure_to_query)
addColumnsStructureToQueryWithClusterEngine(
query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 5, getName());
for (const auto & replicas : cluster->getShardsAddresses())
{
/// There will be only one replica, because we consider each replica as a shard
@ -135,7 +135,7 @@ Pipe StorageS3Cluster::read(
/// So, task_identifier is passed as constructor argument. It is more obvious.
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
connection,
queryToString(query_info.original_query),
queryToString(query_to_send),
header,
context,
/*throttler=*/nullptr,

View File

@ -46,6 +46,7 @@ private:
String compression_method;
NamesAndTypesList virtual_columns;
Block virtual_block;
bool add_columns_structure_to_query = false;
};

View File

@ -76,7 +76,7 @@ std::optional<NameAndTypePair> StorageSnapshot::tryGetColumn(const GetColumnsOpt
{
const auto & columns = getMetadataForQuery()->getColumns();
auto column = columns.tryGetColumn(options, column_name);
if (column && (!isObject(column->type) || !options.with_extended_objects))
if (column && (!column->type->hasDynamicSubcolumns() || !options.with_extended_objects))
return column;
if (options.with_extended_objects)

View File

@ -0,0 +1,51 @@
#include <Storages/addColumnsStructureToQueryWithClusterEngine.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/queryToString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
static ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query)
{
auto * select_query = query->as<ASTSelectQuery>();
if (!select_query || !select_query->tables())
return nullptr;
auto * tables = select_query->tables()->as<ASTTablesInSelectQuery>();
auto * table_expression = tables->children[0]->as<ASTTablesInSelectQueryElement>()->table_expression->as<ASTTableExpression>();
if (!table_expression->table_function)
return nullptr;
auto * table_function = table_expression->table_function->as<ASTFunction>();
return table_function->arguments->as<ASTExpressionList>();
}
void addColumnsStructureToQueryWithClusterEngine(ASTPtr & query, const String & structure, size_t max_arguments, const String & function_name)
{
ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
if (!expression_list)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function {}, got '{}'", function_name, queryToString(query));
auto structure_literal = std::make_shared<ASTLiteral>(structure);
if (expression_list->children.size() < 2 || expression_list->children.size() > max_arguments)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 2 to {} arguments in {} table functions, got {}", function_name, max_arguments, expression_list->children.size());
if (expression_list->children.size() == 2 || expression_list->children.size() == max_arguments - 1)
{
auto format_literal = std::make_shared<ASTLiteral>("auto");
expression_list->children.push_back(format_literal);
}
expression_list->children.push_back(structure_literal);
}
}

View File

@ -0,0 +1,11 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
/// Add structure argument for queries with s3Cluster/hdfsCluster table function.
void addColumnsStructureToQueryWithClusterEngine(ASTPtr & query, const String & structure, size_t max_arguments, const String & function_name);
}

View File

@ -200,7 +200,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables(
auto type_name = type_col[i].get<const String &>();
auto storage_column = storage_columns.tryGetPhysical(name);
if (storage_column && isObject(storage_column->type))
if (storage_column && storage_column->type->hasDynamicSubcolumns())
res.add(ColumnDescription(std::move(name), DataTypeFactory::instance().get(type_name)));
}
}

Some files were not shown because too many files have changed in this diff Show More