ClickHouse/src/Interpreters/inplaceBlockConversions.cpp

323 lines
12 KiB
C++
Raw Normal View History

2020-02-17 15:44:13 +00:00
#include "inplaceBlockConversions.h"
#include <Core/Block.h>
2020-01-15 13:00:08 +00:00
#include <Parsers/queryToString.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTWithAlias.h>
2020-01-15 13:00:08 +00:00
#include <Parsers/ASTIdentifier.h>
2020-02-19 14:58:06 +00:00
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
2016-01-13 00:32:59 +00:00
#include <utility>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/ObjectUtils.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
2020-10-02 12:38:50 +00:00
#include <Common/checkStackSize.h>
#include <Storages/ColumnsDescription.h>
#include <DataTypes/NestedUtils.h>
#include <Columns/ColumnArray.h>
#include <DataTypes/DataTypeArray.h>
#include <Storages/StorageInMemoryMetadata.h>
2016-01-13 00:32:59 +00:00
namespace DB
{
2022-02-16 14:18:03 +00:00
namespace ErrorCode
{
extern const int LOGICAL_ERROR;
}
2020-02-17 12:47:34 +00:00
namespace
{
2020-10-02 12:54:51 +00:00
/// Add all required expressions for missing columns calculation
void addDefaultRequiredExpressionsRecursively(
const Block & block, const String & required_column_name, DataTypePtr required_column_type,
const ColumnsDescription & columns, ASTPtr default_expr_list_accum, NameSet & added_columns, bool null_as_default)
2016-01-13 00:32:59 +00:00
{
2020-10-02 12:38:50 +00:00
checkStackSize();
bool is_column_in_query = block.has(required_column_name);
bool convert_null_to_default = false;
if (is_column_in_query)
convert_null_to_default = null_as_default && block.findByName(required_column_name)->type->isNullable() && !required_column_type->isNullable();
if ((is_column_in_query && !convert_null_to_default) || added_columns.contains(required_column_name))
2020-10-02 12:38:50 +00:00
return;
2016-01-13 00:32:59 +00:00
auto column_default = columns.getDefault(required_column_name);
2020-10-02 12:38:50 +00:00
if (column_default)
{
2020-10-02 12:38:50 +00:00
/// expressions must be cloned to prevent modification by the ExpressionAnalyzer
auto column_default_expr = column_default->expression->clone();
2016-01-13 00:32:59 +00:00
2020-10-02 12:54:51 +00:00
/// Our default may depend on columns with default expr which not present in block
/// we have to add them to block too
2020-10-02 12:38:50 +00:00
RequiredSourceColumnsVisitor::Data columns_context;
RequiredSourceColumnsVisitor(columns_context).visit(column_default_expr);
NameSet required_columns_names = columns_context.requiredColumns();
2016-01-13 00:32:59 +00:00
2021-08-07 08:11:40 +00:00
auto expr = makeASTFunction("_CAST", column_default_expr, std::make_shared<ASTLiteral>(columns.get(required_column_name).type->getName()));
if (is_column_in_query && convert_null_to_default)
2021-04-23 12:36:40 +00:00
expr = makeASTFunction("ifNull", std::make_shared<ASTIdentifier>(required_column_name), std::move(expr));
2021-04-23 12:40:34 +00:00
default_expr_list_accum->children.emplace_back(setAlias(expr, required_column_name));
added_columns.emplace(required_column_name);
for (const auto & next_required_column_name : required_columns_names)
{
/// Required columns of the default expression should not be converted to NULL,
/// since this map value to default and MATERIALIZED values will not work.
///
/// Consider the following structure:
/// - A Nullable(Int64)
/// - X Int64 materialized coalesce(A, -1)
///
/// With recursive_null_as_default=true you will get:
///
/// _CAST(coalesce(A, -1), 'Int64') AS X, NULL AS A
///
/// And this will ignore default expression.
bool recursive_null_as_default = false;
addDefaultRequiredExpressionsRecursively(block,
next_required_column_name, required_column_type,
columns, default_expr_list_accum, added_columns,
recursive_null_as_default);
}
}
2021-09-10 14:41:09 +00:00
else if (columns.has(required_column_name))
{
2021-09-10 14:41:09 +00:00
/// In case of dictGet function we allow to use it with identifier dictGet(identifier, 'column_name', key_expression)
/// and this identifier will be in required columns. If such column is not in ColumnsDescription we ignore it.
/// This column is required, but doesn't have default expression, so lets use "default default"
auto column = columns.get(required_column_name);
auto default_value = column.type->getDefault();
auto default_ast = std::make_shared<ASTLiteral>(default_value);
default_expr_list_accum->children.emplace_back(setAlias(default_ast, required_column_name));
added_columns.emplace(required_column_name);
}
2020-10-02 12:38:50 +00:00
}
ASTPtr defaultRequiredExpressions(const Block & block, const NamesAndTypesList & required_columns, const ColumnsDescription & columns, bool null_as_default)
2020-10-02 12:38:50 +00:00
{
ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
NameSet added_columns;
for (const auto & column : required_columns)
addDefaultRequiredExpressionsRecursively(block, column.name, column.type, columns, default_expr_list, added_columns, null_as_default);
2020-10-02 12:38:50 +00:00
if (default_expr_list->children.empty())
return nullptr;
2020-10-02 12:38:50 +00:00
return default_expr_list;
}
2020-01-15 13:00:08 +00:00
ASTPtr convertRequiredExpressions(Block & block, const NamesAndTypesList & required_columns)
{
2020-01-15 13:00:08 +00:00
ASTPtr conversion_expr_list = std::make_shared<ASTExpressionList>();
for (const auto & required_column : required_columns)
{
if (!block.has(required_column.name))
continue;
auto column_in_block = block.getByName(required_column.name);
if (column_in_block.type->equals(*required_column.type))
continue;
auto cast_func = makeASTFunction(
2021-08-07 08:11:40 +00:00
"_CAST", std::make_shared<ASTIdentifier>(required_column.name), std::make_shared<ASTLiteral>(required_column.type->getName()));
2020-01-15 13:00:08 +00:00
conversion_expr_list->children.emplace_back(setAlias(cast_func, required_column.name));
2020-01-15 13:00:08 +00:00
}
return conversion_expr_list;
}
2021-02-05 15:11:26 +00:00
ActionsDAGPtr createExpressions(
const Block & header,
2020-01-15 13:00:08 +00:00
ASTPtr expr_list,
bool save_unneeded_columns,
ContextPtr context)
2020-01-15 13:00:08 +00:00
{
if (!expr_list)
return nullptr;
auto syntax_result = TreeRewriter(context).analyze(expr_list, header.getNamesAndTypesList());
2020-01-15 13:00:08 +00:00
auto expression_analyzer = ExpressionAnalyzer{expr_list, syntax_result, context};
2021-02-05 14:42:41 +00:00
auto dag = std::make_shared<ActionsDAG>(header.getNamesAndTypesList());
auto actions = expression_analyzer.getActionsDAG(true, !save_unneeded_columns);
dag = ActionsDAG::merge(std::move(*dag), std::move(*actions));
return dag;
2016-01-13 00:32:59 +00:00
}
}
2020-01-15 13:00:08 +00:00
void performRequiredConversions(Block & block, const NamesAndTypesList & required_columns, ContextPtr context)
2020-01-15 13:00:08 +00:00
{
ASTPtr conversion_expr_list = convertRequiredExpressions(block, required_columns);
if (conversion_expr_list->children.empty())
return;
2022-02-14 13:53:32 +00:00
if (auto dag = createExpressions(block, conversion_expr_list, true, context))
{
2021-03-04 17:38:12 +00:00
auto expression = std::make_shared<ExpressionActions>(std::move(dag), ExpressionActionsSettings::fromContext(context));
expression->execute(block);
}
2020-01-15 13:00:08 +00:00
}
2021-02-05 15:11:26 +00:00
ActionsDAGPtr evaluateMissingDefaults(
const Block & header,
2020-01-15 13:00:08 +00:00
const NamesAndTypesList & required_columns,
2020-10-02 12:38:50 +00:00
const ColumnsDescription & columns,
ContextPtr context,
bool save_unneeded_columns,
bool null_as_default)
2020-01-15 13:00:08 +00:00
{
2020-10-02 12:38:50 +00:00
if (!columns.hasDefaults())
return nullptr;
2020-01-15 13:00:08 +00:00
ASTPtr expr_list = defaultRequiredExpressions(header, required_columns, columns, null_as_default);
2022-02-14 13:53:32 +00:00
return createExpressions(header, expr_list, save_unneeded_columns, context);
2020-01-15 13:00:08 +00:00
}
static bool arrayHasNoElementsRead(const IColumn & column)
{
const auto * column_array = typeid_cast<const ColumnArray *>(&column);
if (!column_array)
return false;
size_t size = column_array->size();
if (!size)
return false;
if (const auto * nested_array = typeid_cast<const ColumnArray *>(&column_array->getData()))
return arrayHasNoElementsRead(*nested_array);
size_t data_size = column_array->getData().size();
if (data_size)
return false;
size_t last_offset = column_array->getOffsets()[size - 1];
return last_offset != 0;
}
void fillMissingColumns(
Columns & res_columns,
size_t num_rows,
const NamesAndTypesList & requested_columns,
const NamesAndTypesList & available_columns,
StorageMetadataPtr metadata_snapshot)
{
size_t num_columns = requested_columns.size();
if (num_columns != res_columns.size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Invalid number of columns passed to fillMissingColumns. Expected {}, got {}",
num_columns, res_columns.size());
/// For a missing column of a nested data structure we must create not a column of empty
/// arrays, but a column of arrays of correct length.
/// First, collect offset columns for all arrays in the block.
std::unordered_map<String, ColumnPtr> offset_columns;
auto available_column = available_columns.begin();
for (size_t i = 0; i < num_columns; ++i, ++available_column)
{
if (res_columns[i] == nullptr)
continue;
auto serialization = IDataType::getSerialization(*available_column);
auto name_in_storage = Nested::extractTableName(available_column->name);
ISerialization::SubstreamPath path;
serialization->enumerateStreams(path, [&](const auto & subpath)
{
if (subpath.empty() || subpath.back().type != ISerialization::Substream::ArraySizes)
return;
auto subname = ISerialization::getSubcolumnNameForStream(subpath);
auto & offsets_column = offset_columns[Nested::concatenateName(name_in_storage, subname)];
/// If for some reason multiple offsets columns are present for the same nested data structure,
/// choose the one that is not empty.
/// TODO: more optimal
if (!offsets_column || offsets_column->empty())
offsets_column = arraySizesToOffsets(*subpath.back().data.column);
}, {serialization, available_column->type, res_columns[i], nullptr});
}
/// insert default values only for columns without default expressions
auto requested_column = requested_columns.begin();
for (size_t i = 0; i < num_columns; ++i, ++requested_column)
{
const auto & [name, type] = *requested_column;
if (res_columns[i] && arrayHasNoElementsRead(*res_columns[i]))
res_columns[i] = nullptr;
if (res_columns[i] == nullptr)
{
if (metadata_snapshot && metadata_snapshot->getColumns().hasDefault(name))
continue;
std::vector<ColumnPtr> current_offsets;
bool has_all_offsets = true;
const auto * array_type = typeid_cast<const DataTypeArray *>(type.get());
if (array_type)
{
auto serialization = IDataType::getSerialization(*requested_column);
auto name_in_storage = Nested::extractTableName(requested_column->name);
ISerialization::SubstreamPath path;
serialization->enumerateStreams(path, [&](const auto & subpath)
{
if (!has_all_offsets)
return;
if (subpath.empty() || subpath.back().type != ISerialization::Substream::ArraySizes)
return;
auto subname = ISerialization::getSubcolumnNameForStream(subpath);
auto it = offset_columns.find(Nested::concatenateName(name_in_storage, subname));
if (it != offset_columns.end())
current_offsets.emplace_back(it->second);
else
has_all_offsets = false;
}, {serialization, type, nullptr, nullptr});
}
if (array_type && has_all_offsets)
{
assert(!current_offsets.empty());
auto scalar_type = getBaseTypeOfArray(type);
size_t data_size = assert_cast<const ColumnUInt64 &>(*current_offsets.back()).getData().back();
res_columns[i] = scalar_type->createColumnConstWithDefaultValue(data_size)->convertToFullColumnIfConst();
for (auto it = current_offsets.rbegin(); it != current_offsets.rend(); ++it)
res_columns[i] = ColumnArray::create(res_columns[i], *it);
}
else
{
/// We must turn a constant column into a full column because the interpreter could infer
/// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
}
}
}
}
2020-01-15 13:00:08 +00:00
}