forbid non-deterministic functions in primary and partition keys [#CLICKHOUSE-3455]

This commit is contained in:
Alexey Zatelepin 2018-02-21 20:05:21 +03:00
parent 6fdf82e452
commit 610f70fbeb
11 changed files with 104 additions and 41 deletions

View File

@ -1111,6 +1111,8 @@ public:
return std::make_shared<DataTypeUUID>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{
auto col_res = ColumnVector<UInt128>::create();

View File

@ -1208,6 +1208,8 @@ public:
return std::make_shared<DataTypeDateTime>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{
block.getByPosition(result).column = DataTypeUInt32().createColumnConst(
@ -1235,6 +1237,8 @@ public:
return std::make_shared<DataTypeDate>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{
block.getByPosition(result).column = DataTypeUInt16().createColumnConst(
@ -1262,6 +1266,8 @@ public:
return std::make_shared<DataTypeDate>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{
block.getByPosition(result).column = DataTypeUInt16().createColumnConst(

View File

@ -218,6 +218,8 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
{
/// The dictionary key that defines the "point of view".
@ -312,6 +314,8 @@ public:
return std::make_shared<DataTypeUInt8>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
{
/// The dictionary key that defines the "point of view".
@ -446,6 +450,8 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
{
/// The dictionary key that defines the "point of view".
@ -720,6 +726,8 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
{
RegionsNames::Language language = RegionsNames::Language::RU;

View File

@ -94,6 +94,8 @@ private:
return std::make_shared<DataTypeUInt8>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -274,6 +276,8 @@ private:
return std::make_shared<DataTypeString>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -535,6 +539,8 @@ private:
return std::make_shared<DataTypeString>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -821,6 +827,8 @@ private:
return std::make_shared<DataType>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -1134,6 +1142,8 @@ private:
return std::make_shared<DataType>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -1379,6 +1389,8 @@ private:
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -1549,6 +1561,8 @@ private:
return std::make_shared<DataTypeUInt8>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());

View File

@ -23,6 +23,8 @@ public:
bool isVariadic() const override { return true; }
bool isDeterministic() override { return false; }
size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;

View File

@ -104,6 +104,8 @@ public:
return std::make_shared<DataTypeString>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, const size_t result) override
{
block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), db_name);
@ -126,6 +128,8 @@ public:
return name;
}
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override
{
return false;
@ -391,6 +395,8 @@ public:
return name;
}
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override
{
return false;
@ -434,6 +440,8 @@ public:
return 0;
}
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override
{
return false;
@ -482,6 +490,8 @@ public:
return 0;
}
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override
{
return false;
@ -524,6 +534,8 @@ public:
return 0;
}
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override
{
return false;
@ -889,6 +901,8 @@ public:
}
/** It could return many different values for single argument. */
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override
{
return false;
@ -1288,6 +1302,8 @@ public:
return std::make_shared<DataTypeUInt32>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{
block.getByPosition(result).column = DataTypeUInt32().createColumnConst(block.rows(), static_cast<UInt64>(uptime));
@ -1323,6 +1339,8 @@ public:
return std::make_shared<DataTypeString>();
}
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{
block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), DateLUT::instance().getTimeZone());
@ -1355,6 +1373,8 @@ public:
return 1;
}
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override
{
return false;
@ -1632,6 +1652,8 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override;
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override;
private:

View File

@ -126,6 +126,9 @@ public:
* (even for distributed query), but not deterministic it general.
* Example: now(). Another example: functions that work with periodically updated dictionaries.
*/
virtual bool isDeterministic() { return true; }
virtual bool isDeterministicInScopeOfQuery() { return true; }
/** Lets you know if the function is monotonic in a range of values.
@ -320,6 +323,8 @@ public:
bool isInjective(const Block & sample_block) override { return function->isInjective(sample_block); }
bool isDeterministic() override { return function->isDeterministic(); }
bool isDeterministicInScopeOfQuery() override { return function->isDeterministicInScopeOfQuery(); }
bool hasInformationAboutMonotonicity() const override { return function->hasInformationAboutMonotonicity(); }

View File

@ -178,14 +178,32 @@ MergeTreeData::MergeTreeData(
}
static void checkForAllowedKeyColumns(const ColumnWithTypeAndName & element, const std::string & key_name)
static void checkKeyExpression(const ExpressionActions & expr, const Block & sample_block, const String & key_name)
{
const ColumnPtr & column = element.column;
if (column && (column->isColumnConst() || column->isDummy()))
throw Exception{key_name + " key cannot contain constants", ErrorCodes::ILLEGAL_COLUMN};
for (const ExpressionAction & action : expr.getActions())
{
if (action.type == ExpressionAction::ARRAY_JOIN)
throw Exception(key_name + " key cannot contain array joins");
if (element.type->isNullable())
throw Exception{key_name + " key cannot contain nullable columns", ErrorCodes::ILLEGAL_COLUMN};
if (action.type == ExpressionAction::APPLY_FUNCTION)
{
IFunctionBase & func = *action.function;
if (!func.isDeterministic())
throw Exception(key_name + " key cannot contain non-deterministic functions, "
"but contains function " + func.getName(),
ErrorCodes::BAD_ARGUMENTS);
}
}
for (const ColumnWithTypeAndName & element : sample_block)
{
const ColumnPtr & column = element.column;
if (column && (column->isColumnConst() || column->isDummy()))
throw Exception{key_name + " key cannot contain constants", ErrorCodes::ILLEGAL_COLUMN};
if (element.type->isNullable())
throw Exception{key_name + " key cannot contain nullable columns", ErrorCodes::ILLEGAL_COLUMN};
}
}
@ -213,14 +231,9 @@ void MergeTreeData::initPrimaryKey()
primary_key_sample = projected_expr->getSampleBlock();
}
checkKeyExpression(*primary_expr, primary_key_sample, "Primary");
size_t primary_key_size = primary_key_sample.columns();
/// A primary key cannot contain constants. It is meaningless.
/// (And also couldn't work because primary key is serialized with method of IDataType that doesn't support constants).
/// Also a primary key must not contain any nullable column.
for (size_t i = 0; i < primary_key_size; ++i)
checkForAllowedKeyColumns(primary_key_sample.getByPosition(i), "Primary");
primary_key_data_types.resize(primary_key_size);
for (size_t i = 0; i < primary_key_size; ++i)
primary_key_data_types[i] = primary_key_sample.getByPosition(i).type;
@ -235,8 +248,7 @@ void MergeTreeData::initPrimaryKey()
ExpressionAnalyzer(secondary_sort_expr_ast, context, nullptr, getColumnsList()).getActions(true);
auto secondary_key_sample = projected_expr->getSampleBlock();
for (size_t i = 0; i < secondary_key_sample.columns(); ++i)
checkForAllowedKeyColumns(secondary_key_sample.getByPosition(i), "Secondary");
checkKeyExpression(*secondary_sort_expr, secondary_key_sample, "Secondary");
}
}
@ -250,14 +262,11 @@ void MergeTreeData::initPartitionKey()
for (const ASTPtr & ast : partition_expr_ast->children)
{
String col_name = ast->getColumnName();
partition_expr_columns.emplace_back(col_name);
const ColumnWithTypeAndName & element = partition_expr->getSampleBlock().getByName(col_name);
checkForAllowedKeyColumns(element, "Partition");
partition_expr_column_types.emplace_back(element.type);
partition_key_sample.insert(partition_expr->getSampleBlock().getByName(col_name));
}
checkKeyExpression(*partition_expr, partition_key_sample, "Partition");
/// Add all columns used in the partition key to the min-max index.
const NamesAndTypesList & minmax_idx_columns_with_types = partition_expr->getRequiredColumnsWithTypes();
minmax_idx_expr = std::make_shared<ExpressionActions>(minmax_idx_columns_with_types, context.getSettingsRef());
@ -2025,7 +2034,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context
/// Re-parse partition key fields using the information about expected field types.
size_t fields_count = partition_expr_column_types.size();
size_t fields_count = partition_key_sample.columns();
if (partition_ast.fields_count != fields_count)
throw Exception(
"Wrong number of fields in the partition expression: " + toString(partition_ast.fields_count) +
@ -2041,12 +2050,8 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context
ReadBufferFromMemory right_paren_buf(")", 1);
ConcatReadBuffer buf({&left_paren_buf, &fields_buf, &right_paren_buf});
Block header;
for (size_t i = 0; i < fields_count; ++i)
header.insert(ColumnWithTypeAndName(partition_expr_column_types[i], partition_expr_columns[i]));
ValuesRowInputStream input_stream(buf, header, context, /* interpret_expressions = */true);
MutableColumns columns = header.cloneEmptyColumns();
ValuesRowInputStream input_stream(buf, partition_key_sample, context, /* interpret_expressions = */true);
MutableColumns columns = partition_key_sample.cloneEmptyColumns();
if (!input_stream.read(columns))
throw Exception(

View File

@ -524,8 +524,7 @@ public:
ASTPtr partition_expr_ast;
ExpressionActionsPtr partition_expr;
Names partition_expr_columns;
DataTypes partition_expr_column_types;
Block partition_key_sample;
ExpressionActionsPtr minmax_idx_expr;
Names minmax_idx_columns;

View File

@ -81,9 +81,9 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block
data.partition_expr->execute(block_copy);
ColumnRawPtrs partition_columns;
partition_columns.reserve(data.partition_expr_columns.size());
for (const String & name : data.partition_expr_columns)
partition_columns.emplace_back(block_copy.getByName(name).column.get());
partition_columns.reserve(data.partition_key_sample.columns());
for (const ColumnWithTypeAndName & element : data.partition_key_sample)
partition_columns.emplace_back(block_copy.getByName(element.name).column.get());
PODArray<size_t> partition_num_to_first_row;
IColumn::Selector selector;

View File

@ -23,7 +23,7 @@ static ReadBufferFromFile openForReading(const String & path)
/// So if you want to change this method, be sure to guarantee compatibility with existing table data.
String MergeTreePartition::getID(const MergeTreeData & storage) const
{
if (value.size() != storage.partition_expr_columns.size())
if (value.size() != storage.partition_key_sample.columns())
throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR);
if (value.empty())
@ -51,7 +51,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const
if (i > 0)
result += '-';
if (typeid_cast<const DataTypeDate *>(storage.partition_expr_column_types[i].get()))
if (typeid_cast<const DataTypeDate *>(storage.partition_key_sample.getByPosition(i).type.get()))
result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].safeGet<UInt64>())));
else
result += applyVisitor(to_string_visitor, value[i]);
@ -79,7 +79,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const
void MergeTreePartition::serializeTextQuoted(const MergeTreeData & storage, WriteBuffer & out) const
{
size_t key_size = storage.partition_expr_column_types.size();
size_t key_size = storage.partition_key_sample.columns();
if (key_size == 0)
{
@ -95,7 +95,7 @@ void MergeTreePartition::serializeTextQuoted(const MergeTreeData & storage, Writ
if (i > 0)
writeCString(", ", out);
const DataTypePtr & type = storage.partition_expr_column_types[i];
const DataTypePtr & type = storage.partition_key_sample.getByPosition(i).type;
auto column = type->createColumn();
column->insert(value[i]);
type->serializeTextQuoted(*column, 0, out);
@ -111,9 +111,9 @@ void MergeTreePartition::load(const MergeTreeData & storage, const String & part
return;
ReadBufferFromFile file = openForReading(part_path + "partition.dat");
value.resize(storage.partition_expr_column_types.size());
for (size_t i = 0; i < storage.partition_expr_column_types.size(); ++i)
storage.partition_expr_column_types[i]->deserializeBinary(value[i], file);
value.resize(storage.partition_key_sample.columns());
for (size_t i = 0; i < storage.partition_key_sample.columns(); ++i)
storage.partition_key_sample.getByPosition(i).type->deserializeBinary(value[i], file);
}
void MergeTreePartition::store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const
@ -124,7 +124,7 @@ void MergeTreePartition::store(const MergeTreeData & storage, const String & par
WriteBufferFromFile out(part_path + "partition.dat");
HashingWriteBuffer out_hashing(out);
for (size_t i = 0; i < value.size(); ++i)
storage.partition_expr_column_types[i]->serializeBinary(value[i], out_hashing);
storage.partition_key_sample.getByPosition(i).type->serializeBinary(value[i], out_hashing);
out_hashing.next();
checksums.files["partition.dat"].file_size = out_hashing.count();
checksums.files["partition.dat"].file_hash = out_hashing.getHash();