More fixes

This commit is contained in:
Amos Bird 2021-05-04 21:24:15 +08:00
parent 718c284437
commit ddd4256a15
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
11 changed files with 12 additions and 247 deletions

View File

@ -39,8 +39,6 @@
#include <Storages/MergeTree/MergeTreeSequentialSource.h>
#include <Storages/MergeTree/MergedBlockOutputStream.h>
#include <Storages/MergeTree/MergedColumnOnlyOutputStream.h>
#include <Storages/MergeTree/ProjectionCondition.h>
#include <Storages/MergeTree/ProjectionKeyActions.h>
#include <Storages/MergeTree/checkDataPart.h>
#include <Storages/MergeTree/localBackup.h>
#include <Storages/StorageMergeTree.h>

View File

@ -48,7 +48,6 @@
#include <Processors/Transforms/ProjectionPartTransform.h>
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
#include <Storages/MergeTree/StorageFromBasePartsOfProjection.h>
#include <Storages/MergeTree/ProjectionCondition.h>
#include <IO/WriteBufferFromOStream.h>
namespace ProfileEvents
@ -443,15 +442,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
}
}
const Settings & settings = context->getSettingsRef();
NamesAndTypesList available_real_columns = metadata_snapshot->getColumns().getAllPhysical();
/// If there are only virtual columns in the query, you must request at least one non-virtual one.
if (real_column_names.empty())
real_column_names.push_back(ExpressionActions::getSmallestColumn(available_real_columns));
metadata_snapshot->check(real_column_names, data.getVirtuals(), data.getStorageID());
// Filter parts by virtual columns.
std::unordered_set<String> part_values;
if (!use_cache)
@ -474,6 +464,15 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
}
// At this point, empty `part_values` means all parts.
const Settings & settings = context->getSettingsRef();
NamesAndTypesList available_real_columns = metadata_snapshot->getColumns().getAllPhysical();
/// If there are only virtual columns in the query, you must request at least one non-virtual one.
if (real_column_names.empty())
real_column_names.push_back(ExpressionActions::getSmallestColumn(available_real_columns));
metadata_snapshot->check(real_column_names, data.getVirtuals(), data.getStorageID());
// Build and check if primary key is used when necessary
std::optional<KeyCondition> key_condition;
if (!use_cache)

View File

@ -1,107 +0,0 @@
#include <Storages/MergeTree/ProjectionCondition.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <Interpreters/misc.h>
namespace DB
{
ProjectionCondition::ProjectionCondition(const Names & key_column_names, const Names & required_column_names)
{
for (const auto & name : key_column_names)
key_columns[name] = 0;
for (const auto & name : required_column_names)
key_columns[name] = 1;
}
bool ProjectionCondition::check(const ASTPtr & node)
{
if (node->as<ASTIdentifier>())
{
auto name = node->getColumnNameWithoutAlias();
auto it = key_columns.find(name);
if (key_columns.end() != it)
{
++it->second;
required_columns_in_predicate.insert(name);
return true;
}
else
return false;
}
if (node->as<ASTLiteral>())
return true;
if (auto * func = node->as<ASTFunction>())
{
// in function should be treated specially
if (functionIsInOrGlobalInOperator(func->name))
{
if (func->arguments && func->arguments->children.size() == 2)
{
if (!check(func->arguments->children[0]))
return false;
// If it's a dependent table or subquery, we can still use projection
if (func->arguments->children[1]->as<ASTIdentifier>())
return true;
if (check(func->arguments->children[1]))
return true;
}
return false;
}
// TODO Need to check other special functions such as joinGet/dictGet
auto name = node->getColumnNameWithoutAlias();
auto it = key_columns.find(name);
if (key_columns.end() != it)
{
++it->second;
return true;
}
}
for (auto & child : node->children)
{
if (!check(child))
return false;
}
return true;
}
Names ProjectionCondition::getRequiredColumns() const
{
Names ret;
for (const auto & [key, value] : key_columns)
{
if (value > 0)
ret.push_back(key);
}
return ret;
}
// Rewrite predicates for projection parts so exprs are treated as columns
void ProjectionCondition::rewrite(ASTPtr & node) const
{
if (node->as<ASTFunction>() || node->as<ASTIdentifier>())
{
auto name = node->getColumnNameWithoutAlias();
auto it = key_columns.find(name);
if (key_columns.end() != it)
{
node = std::make_shared<ASTIdentifier>(name);
return;
}
}
for (auto & child : node->children)
rewrite(child);
}
}

View File

@ -1,28 +0,0 @@
#pragma once
#include <Core/Names.h>
#include <Parsers/IAST_fwd.h>
namespace DB
{
class ProjectionCondition
{
public:
ProjectionCondition(const Names & key_column_names, const Names & required_column_names);
/// Check if given predicate can be evaluated by `key_columns`.
bool check(const ASTPtr & node);
Names getRequiredColumns() const;
NameSet getRequiredColumnsInPredicate() const { return required_columns_in_predicate; }
void rewrite(ASTPtr & node) const;
private:
std::unordered_map<std::string, size_t> key_columns;
NameSet required_columns_in_predicate;
};
}

View File

@ -1,75 +0,0 @@
#include <Storages/MergeTree/ProjectionKeyActions.h>
#include <Core/Block.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
namespace
{
bool isKeyPossiblyWrappedByFunctionsImpl(
ASTPtr & node, const Block & key_block, String & out_key_column_name, DataTypePtr & out_key_column_type)
{
String name = node->getColumnNameWithoutAlias();
const auto * it = key_block.findByName(name);
if (it)
{
out_key_column_name = it->name;
out_key_column_type = it->type;
node = std::make_shared<ASTIdentifier>(it->name);
return true;
}
if (const auto * func = node->as<ASTFunction>())
{
// Projection aggregating keys cannot be inside another aggregate function. It's possible but doesn't make sense.
if (AggregateFunctionFactory::instance().isAggregateFunctionName(func->name))
return false;
auto & args = func->arguments->children;
for (auto & arg : args)
{
if (!isKeyPossiblyWrappedByFunctionsImpl(arg, key_block, out_key_column_name, out_key_column_type))
return false;
}
return true;
}
else if (node->as<ASTLiteral>())
return true;
return false;
}
bool isKeyPossiblyWrappedByFunctions(
ASTPtr & node, const Block & key_block, String & out_key_res_column_name, DataTypePtr & out_key_res_column_type)
{
String key_column_name;
DataTypePtr key_column_type;
if (!isKeyPossiblyWrappedByFunctionsImpl(node, key_block, key_column_name, key_column_type))
return false;
out_key_res_column_name = key_column_name;
out_key_res_column_type = key_column_type;
return true;
}
}
bool ProjectionKeyActions::add(ASTPtr & node, const std::string & node_name, Block & key_block)
{
String out_key_res_column_name;
DataTypePtr out_key_res_column_type;
if (isKeyPossiblyWrappedByFunctions(node, key_block, out_key_res_column_name, out_key_res_column_type))
{
key_block.erase(out_key_res_column_name);
func_map[{out_key_res_column_name, out_key_res_column_type}] = node;
name_map[node_name] = out_key_res_column_name;
return true;
}
return false;
}
}

View File

@ -1,18 +0,0 @@
#pragma once
#include <Core/NamesAndTypes.h>
#include <Parsers/IAST_fwd.h>
namespace DB
{
class Block;
class ProjectionKeyActions
{
public:
bool add(ASTPtr & node, const std::string & node_name, Block & key_block);
std::map<NameAndTypePair, ASTPtr> func_map;
std::map<std::string, std::string> name_map;
};
}

View File

@ -5,7 +5,6 @@
#include <Core/SortDescription.h>
#include <Core/Names.h>
#include <Storages/ProjectionsDescription.h>
#include <Storages/MergeTree/ProjectionKeyActions.h>
#include <Interpreters/AggregateDescription.h>
#include <memory>

View File

@ -93,8 +93,6 @@ SRCS(
MergeTree/MergedBlockOutputStream.cpp
MergeTree/MergedColumnOnlyOutputStream.cpp
MergeTree/PartitionPruner.cpp
MergeTree/ProjectionCondition.cpp
MergeTree/ProjectionKeyActions.cpp
MergeTree/ReplicatedFetchList.cpp
MergeTree/ReplicatedMergeTreeAddress.cpp
MergeTree/ReplicatedMergeTreeAltersSequence.cpp

View File

@ -8,7 +8,7 @@ INSERT INTO xp SELECT '2020-01-01', number, '' FROM numbers(100000);
CREATE TABLE xp_d AS xp ENGINE = Distributed(test_shard_localhost, currentDatabase(), xp);
SELECT count(7 = (SELECT number FROM numbers(0) ORDER BY number ASC NULLS FIRST LIMIT 7)) FROM xp_d PREWHERE toYYYYMM(A) GLOBAL IN (SELECT NULL = (SELECT number FROM numbers(1) ORDER BY number DESC NULLS LAST LIMIT 1), toYYYYMM(min(A)) FROM xp_d) WHERE B > NULL; -- B > NULL is evaluated to 0 and this works
SELECT count(7 = (SELECT number FROM numbers(0) ORDER BY number ASC NULLS FIRST LIMIT 7)) FROM xp_d PREWHERE toYYYYMM(A) GLOBAL IN (SELECT NULL = (SELECT number FROM numbers(1) ORDER BY number DESC NULLS LAST LIMIT 1), toYYYYMM(min(A)) FROM xp_d) WHERE B > NULL; -- { serverError 20 }
SELECT count() FROM xp_d WHERE A GLOBAL IN (SELECT NULL); -- { serverError 53 }

View File

@ -393,8 +393,8 @@
"01674_clickhouse_client_query_param_cte",
"01666_merge_tree_max_query_limit",
"01710_projections.sql",
"01710_normal_projections.sql",
"01710_aggregate_projections.sql",
"01710_normal_projections.sh",
"01710_aggregate_projections.sh",
"01786_explain_merge_tree",
"01666_merge_tree_max_query_limit",
"01802_test_postgresql_protocol_with_row_policy", /// It cannot parse DROP ROW POLICY