Merge pull request #1941 from yandex/unsorted-mergetree

Fix MergeTree idiosyncrasies
This commit is contained in:
alexey-milovidov 2018-02-22 00:36:59 +03:00 committed by GitHub
commit d4dcb9412c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 207 additions and 47 deletions

View File

@ -1111,6 +1111,8 @@ public:
return std::make_shared<DataTypeUUID>(); return std::make_shared<DataTypeUUID>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{ {
auto col_res = ColumnVector<UInt128>::create(); auto col_res = ColumnVector<UInt128>::create();

View File

@ -1208,6 +1208,8 @@ public:
return std::make_shared<DataTypeDateTime>(); return std::make_shared<DataTypeDateTime>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{ {
block.getByPosition(result).column = DataTypeUInt32().createColumnConst( block.getByPosition(result).column = DataTypeUInt32().createColumnConst(
@ -1235,6 +1237,8 @@ public:
return std::make_shared<DataTypeDate>(); return std::make_shared<DataTypeDate>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{ {
block.getByPosition(result).column = DataTypeUInt16().createColumnConst( block.getByPosition(result).column = DataTypeUInt16().createColumnConst(
@ -1262,6 +1266,8 @@ public:
return std::make_shared<DataTypeDate>(); return std::make_shared<DataTypeDate>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{ {
block.getByPosition(result).column = DataTypeUInt16().createColumnConst( block.getByPosition(result).column = DataTypeUInt16().createColumnConst(

View File

@ -218,6 +218,8 @@ public:
bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
{ {
/// The dictionary key that defines the "point of view". /// The dictionary key that defines the "point of view".
@ -312,6 +314,8 @@ public:
return std::make_shared<DataTypeUInt8>(); return std::make_shared<DataTypeUInt8>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
{ {
/// The dictionary key that defines the "point of view". /// The dictionary key that defines the "point of view".
@ -446,6 +450,8 @@ public:
bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
{ {
/// The dictionary key that defines the "point of view". /// The dictionary key that defines the "point of view".
@ -720,6 +726,8 @@ public:
bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
{ {
RegionsNames::Language language = RegionsNames::Language::RU; RegionsNames::Language language = RegionsNames::Language::RU;

View File

@ -94,6 +94,8 @@ private:
return std::make_shared<DataTypeUInt8>(); return std::make_shared<DataTypeUInt8>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{ {
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get()); const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -274,6 +276,8 @@ private:
return std::make_shared<DataTypeString>(); return std::make_shared<DataTypeString>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{ {
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get()); const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -535,6 +539,8 @@ private:
return std::make_shared<DataTypeString>(); return std::make_shared<DataTypeString>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{ {
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get()); const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -821,6 +827,8 @@ private:
return std::make_shared<DataType>(); return std::make_shared<DataType>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{ {
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get()); const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -1134,6 +1142,8 @@ private:
return std::make_shared<DataType>(); return std::make_shared<DataType>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{ {
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get()); const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -1379,6 +1389,8 @@ private:
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()); return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{ {
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get()); const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());
@ -1549,6 +1561,8 @@ private:
return std::make_shared<DataTypeUInt8>(); return std::make_shared<DataTypeUInt8>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override
{ {
const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get()); const auto dict_name_col = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get());

View File

@ -23,6 +23,8 @@ public:
bool isVariadic() const override { return true; } bool isVariadic() const override { return true; }
bool isDeterministic() override { return false; }
size_t getNumberOfArguments() const override { return 0; } size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;

View File

@ -104,6 +104,8 @@ public:
return std::make_shared<DataTypeString>(); return std::make_shared<DataTypeString>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, const size_t result) override void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, const size_t result) override
{ {
block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), db_name); block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), db_name);
@ -126,6 +128,8 @@ public:
return name; return name;
} }
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override bool isDeterministicInScopeOfQuery() override
{ {
return false; return false;
@ -391,6 +395,8 @@ public:
return name; return name;
} }
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override bool isDeterministicInScopeOfQuery() override
{ {
return false; return false;
@ -434,6 +440,8 @@ public:
return 0; return 0;
} }
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override bool isDeterministicInScopeOfQuery() override
{ {
return false; return false;
@ -482,6 +490,8 @@ public:
return 0; return 0;
} }
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override bool isDeterministicInScopeOfQuery() override
{ {
return false; return false;
@ -524,6 +534,8 @@ public:
return 0; return 0;
} }
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override bool isDeterministicInScopeOfQuery() override
{ {
return false; return false;
@ -889,6 +901,8 @@ public:
} }
/** It could return many different values for single argument. */ /** It could return many different values for single argument. */
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override bool isDeterministicInScopeOfQuery() override
{ {
return false; return false;
@ -1288,6 +1302,8 @@ public:
return std::make_shared<DataTypeUInt32>(); return std::make_shared<DataTypeUInt32>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{ {
block.getByPosition(result).column = DataTypeUInt32().createColumnConst(block.rows(), static_cast<UInt64>(uptime)); block.getByPosition(result).column = DataTypeUInt32().createColumnConst(block.rows(), static_cast<UInt64>(uptime));
@ -1323,6 +1339,8 @@ public:
return std::make_shared<DataTypeString>(); return std::make_shared<DataTypeString>();
} }
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override
{ {
block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), DateLUT::instance().getTimeZone()); block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), DateLUT::instance().getTimeZone());
@ -1355,6 +1373,8 @@ public:
return 1; return 1;
} }
bool isDeterministic() override { return false; }
bool isDeterministicInScopeOfQuery() override bool isDeterministicInScopeOfQuery() override
{ {
return false; return false;
@ -1632,6 +1652,8 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override;
bool isDeterministic() override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override;
private: private:

View File

@ -126,6 +126,9 @@ public:
* (even for distributed query), but not deterministic it general. * (even for distributed query), but not deterministic it general.
* Example: now(). Another example: functions that work with periodically updated dictionaries. * Example: now(). Another example: functions that work with periodically updated dictionaries.
*/ */
virtual bool isDeterministic() { return true; }
virtual bool isDeterministicInScopeOfQuery() { return true; } virtual bool isDeterministicInScopeOfQuery() { return true; }
/** Lets you know if the function is monotonic in a range of values. /** Lets you know if the function is monotonic in a range of values.
@ -320,6 +323,8 @@ public:
bool isInjective(const Block & sample_block) override { return function->isInjective(sample_block); } bool isInjective(const Block & sample_block) override { return function->isInjective(sample_block); }
bool isDeterministic() override { return function->isDeterministic(); }
bool isDeterministicInScopeOfQuery() override { return function->isDeterministicInScopeOfQuery(); } bool isDeterministicInScopeOfQuery() override { return function->isDeterministicInScopeOfQuery(); }
bool hasInformationAboutMonotonicity() const override { return function->hasInformationAboutMonotonicity(); } bool hasInformationAboutMonotonicity() const override { return function->hasInformationAboutMonotonicity(); }

View File

@ -113,12 +113,12 @@ MergeTreeData::MergeTreeData(
{ {
merging_params.check(columns); merging_params.check(columns);
if (!primary_expr_ast) /// TODO Allow tables without primary key. if (!primary_expr_ast)
throw Exception("Primary key cannot be empty", ErrorCodes::BAD_ARGUMENTS); throw Exception("Primary key cannot be empty", ErrorCodes::BAD_ARGUMENTS);
initPrimaryKey(); initPrimaryKey();
if (sampling_expression && (!primary_expr_ast || !primary_key_sample.has(sampling_expression->getColumnName())) if (sampling_expression && (!primary_key_sample.has(sampling_expression->getColumnName()))
&& !attach && !settings.compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. && !attach && !settings.compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility.
throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS);
@ -178,22 +178,37 @@ MergeTreeData::MergeTreeData(
} }
static void checkForAllowedKeyColumns(const ColumnWithTypeAndName & element, const std::string & key_name) static void checkKeyExpression(const ExpressionActions & expr, const Block & sample_block, const String & key_name)
{ {
const ColumnPtr & column = element.column; for (const ExpressionAction & action : expr.getActions())
if (column && (column->isColumnConst() || column->isDummy())) {
throw Exception{key_name + " key cannot contain constants", ErrorCodes::ILLEGAL_COLUMN}; if (action.type == ExpressionAction::ARRAY_JOIN)
throw Exception(key_name + " key cannot contain array joins");
if (element.type->isNullable()) if (action.type == ExpressionAction::APPLY_FUNCTION)
throw Exception{key_name + " key cannot contain nullable columns", ErrorCodes::ILLEGAL_COLUMN}; {
IFunctionBase & func = *action.function;
if (!func.isDeterministic())
throw Exception(key_name + " key cannot contain non-deterministic functions, "
"but contains function " + func.getName(),
ErrorCodes::BAD_ARGUMENTS);
}
}
for (const ColumnWithTypeAndName & element : sample_block)
{
const ColumnPtr & column = element.column;
if (column && (column->isColumnConst() || column->isDummy()))
throw Exception{key_name + " key cannot contain constants", ErrorCodes::ILLEGAL_COLUMN};
if (element.type->isNullable())
throw Exception{key_name + " key cannot contain nullable columns", ErrorCodes::ILLEGAL_COLUMN};
}
} }
void MergeTreeData::initPrimaryKey() void MergeTreeData::initPrimaryKey()
{ {
if (!primary_expr_ast)
return;
auto addSortDescription = [](SortDescription & descr, const ASTPtr & expr_ast) auto addSortDescription = [](SortDescription & descr, const ASTPtr & expr_ast)
{ {
descr.reserve(descr.size() + expr_ast->children.size()); descr.reserve(descr.size() + expr_ast->children.size());
@ -216,14 +231,9 @@ void MergeTreeData::initPrimaryKey()
primary_key_sample = projected_expr->getSampleBlock(); primary_key_sample = projected_expr->getSampleBlock();
} }
checkKeyExpression(*primary_expr, primary_key_sample, "Primary");
size_t primary_key_size = primary_key_sample.columns(); size_t primary_key_size = primary_key_sample.columns();
/// A primary key cannot contain constants. It is meaningless.
/// (And also couldn't work because primary key is serialized with method of IDataType that doesn't support constants).
/// Also a primary key must not contain any nullable column.
for (size_t i = 0; i < primary_key_size; ++i)
checkForAllowedKeyColumns(primary_key_sample.getByPosition(i), "Primary");
primary_key_data_types.resize(primary_key_size); primary_key_data_types.resize(primary_key_size);
for (size_t i = 0; i < primary_key_size; ++i) for (size_t i = 0; i < primary_key_size; ++i)
primary_key_data_types[i] = primary_key_sample.getByPosition(i).type; primary_key_data_types[i] = primary_key_sample.getByPosition(i).type;
@ -238,8 +248,7 @@ void MergeTreeData::initPrimaryKey()
ExpressionAnalyzer(secondary_sort_expr_ast, context, nullptr, getColumnsList()).getActions(true); ExpressionAnalyzer(secondary_sort_expr_ast, context, nullptr, getColumnsList()).getActions(true);
auto secondary_key_sample = projected_expr->getSampleBlock(); auto secondary_key_sample = projected_expr->getSampleBlock();
for (size_t i = 0; i < secondary_key_sample.columns(); ++i) checkKeyExpression(*secondary_sort_expr, secondary_key_sample, "Secondary");
checkForAllowedKeyColumns(secondary_key_sample.getByPosition(i), "Secondary");
} }
} }
@ -253,14 +262,11 @@ void MergeTreeData::initPartitionKey()
for (const ASTPtr & ast : partition_expr_ast->children) for (const ASTPtr & ast : partition_expr_ast->children)
{ {
String col_name = ast->getColumnName(); String col_name = ast->getColumnName();
partition_expr_columns.emplace_back(col_name); partition_key_sample.insert(partition_expr->getSampleBlock().getByName(col_name));
const ColumnWithTypeAndName & element = partition_expr->getSampleBlock().getByName(col_name);
checkForAllowedKeyColumns(element, "Partition");
partition_expr_column_types.emplace_back(element.type);
} }
checkKeyExpression(*partition_expr, partition_key_sample, "Partition");
/// Add all columns used in the partition key to the min-max index. /// Add all columns used in the partition key to the min-max index.
const NamesAndTypesList & minmax_idx_columns_with_types = partition_expr->getRequiredColumnsWithTypes(); const NamesAndTypesList & minmax_idx_columns_with_types = partition_expr->getRequiredColumnsWithTypes();
minmax_idx_expr = std::make_shared<ExpressionActions>(minmax_idx_columns_with_types, context.getSettingsRef()); minmax_idx_expr = std::make_shared<ExpressionActions>(minmax_idx_columns_with_types, context.getSettingsRef());
@ -2031,7 +2037,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context
/// Re-parse partition key fields using the information about expected field types. /// Re-parse partition key fields using the information about expected field types.
size_t fields_count = partition_expr_column_types.size(); size_t fields_count = partition_key_sample.columns();
if (partition_ast.fields_count != fields_count) if (partition_ast.fields_count != fields_count)
throw Exception( throw Exception(
"Wrong number of fields in the partition expression: " + toString(partition_ast.fields_count) + "Wrong number of fields in the partition expression: " + toString(partition_ast.fields_count) +
@ -2047,12 +2053,8 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context
ReadBufferFromMemory right_paren_buf(")", 1); ReadBufferFromMemory right_paren_buf(")", 1);
ConcatReadBuffer buf({&left_paren_buf, &fields_buf, &right_paren_buf}); ConcatReadBuffer buf({&left_paren_buf, &fields_buf, &right_paren_buf});
Block header; ValuesRowInputStream input_stream(buf, partition_key_sample, context, /* interpret_expressions = */true);
for (size_t i = 0; i < fields_count; ++i) MutableColumns columns = partition_key_sample.cloneEmptyColumns();
header.insert(ColumnWithTypeAndName(partition_expr_column_types[i], partition_expr_columns[i]));
ValuesRowInputStream input_stream(buf, header, context, /* interpret_expressions = */true);
MutableColumns columns = header.cloneEmptyColumns();
if (!input_stream.read(columns)) if (!input_stream.read(columns))
throw Exception( throw Exception(

View File

@ -524,8 +524,7 @@ public:
ASTPtr partition_expr_ast; ASTPtr partition_expr_ast;
ExpressionActionsPtr partition_expr; ExpressionActionsPtr partition_expr;
Names partition_expr_columns; Block partition_key_sample;
DataTypes partition_expr_column_types;
ExpressionActionsPtr minmax_idx_expr; ExpressionActionsPtr minmax_idx_expr;
Names minmax_idx_columns; Names minmax_idx_columns;

View File

@ -81,9 +81,9 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block
data.partition_expr->execute(block_copy); data.partition_expr->execute(block_copy);
ColumnRawPtrs partition_columns; ColumnRawPtrs partition_columns;
partition_columns.reserve(data.partition_expr_columns.size()); partition_columns.reserve(data.partition_key_sample.columns());
for (const String & name : data.partition_expr_columns) for (const ColumnWithTypeAndName & element : data.partition_key_sample)
partition_columns.emplace_back(block_copy.getByName(name).column.get()); partition_columns.emplace_back(block_copy.getByName(element.name).column.get());
PODArray<size_t> partition_num_to_first_row; PODArray<size_t> partition_num_to_first_row;
IColumn::Selector selector; IColumn::Selector selector;
@ -103,7 +103,9 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block
if (partitions_count == 1) if (partitions_count == 1)
{ {
/// A typical case is when there is one partition (you do not need to split anything). /// A typical case is when there is one partition (you do not need to split anything).
result.emplace_back(std::move(block_copy), get_partition(0)); /// NOTE: returning a copy of the original block so that calculated partition key columns
/// do not interfere with possible calculated primary key columns of the same name.
result.emplace_back(Block(block), get_partition(0));
return result; return result;
} }

View File

@ -23,7 +23,7 @@ static ReadBufferFromFile openForReading(const String & path)
/// So if you want to change this method, be sure to guarantee compatibility with existing table data. /// So if you want to change this method, be sure to guarantee compatibility with existing table data.
String MergeTreePartition::getID(const MergeTreeData & storage) const String MergeTreePartition::getID(const MergeTreeData & storage) const
{ {
if (value.size() != storage.partition_expr_columns.size()) if (value.size() != storage.partition_key_sample.columns())
throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR);
if (value.empty()) if (value.empty())
@ -51,7 +51,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const
if (i > 0) if (i > 0)
result += '-'; result += '-';
if (typeid_cast<const DataTypeDate *>(storage.partition_expr_column_types[i].get())) if (typeid_cast<const DataTypeDate *>(storage.partition_key_sample.getByPosition(i).type.get()))
result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].safeGet<UInt64>()))); result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].safeGet<UInt64>())));
else else
result += applyVisitor(to_string_visitor, value[i]); result += applyVisitor(to_string_visitor, value[i]);
@ -79,7 +79,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const
void MergeTreePartition::serializeTextQuoted(const MergeTreeData & storage, WriteBuffer & out) const void MergeTreePartition::serializeTextQuoted(const MergeTreeData & storage, WriteBuffer & out) const
{ {
size_t key_size = storage.partition_expr_column_types.size(); size_t key_size = storage.partition_key_sample.columns();
if (key_size == 0) if (key_size == 0)
{ {
@ -95,7 +95,7 @@ void MergeTreePartition::serializeTextQuoted(const MergeTreeData & storage, Writ
if (i > 0) if (i > 0)
writeCString(", ", out); writeCString(", ", out);
const DataTypePtr & type = storage.partition_expr_column_types[i]; const DataTypePtr & type = storage.partition_key_sample.getByPosition(i).type;
auto column = type->createColumn(); auto column = type->createColumn();
column->insert(value[i]); column->insert(value[i]);
type->serializeTextQuoted(*column, 0, out); type->serializeTextQuoted(*column, 0, out);
@ -111,9 +111,9 @@ void MergeTreePartition::load(const MergeTreeData & storage, const String & part
return; return;
ReadBufferFromFile file = openForReading(part_path + "partition.dat"); ReadBufferFromFile file = openForReading(part_path + "partition.dat");
value.resize(storage.partition_expr_column_types.size()); value.resize(storage.partition_key_sample.columns());
for (size_t i = 0; i < storage.partition_expr_column_types.size(); ++i) for (size_t i = 0; i < storage.partition_key_sample.columns(); ++i)
storage.partition_expr_column_types[i]->deserializeBinary(value[i], file); storage.partition_key_sample.getByPosition(i).type->deserializeBinary(value[i], file);
} }
void MergeTreePartition::store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const void MergeTreePartition::store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const
@ -124,7 +124,7 @@ void MergeTreePartition::store(const MergeTreeData & storage, const String & par
WriteBufferFromFile out(part_path + "partition.dat"); WriteBufferFromFile out(part_path + "partition.dat");
HashingWriteBuffer out_hashing(out); HashingWriteBuffer out_hashing(out);
for (size_t i = 0; i < value.size(); ++i) for (size_t i = 0; i < value.size(); ++i)
storage.partition_expr_column_types[i]->serializeBinary(value[i], out_hashing); storage.partition_key_sample.getByPosition(i).type->serializeBinary(value[i], out_hashing);
out_hashing.next(); out_hashing.next();
checksums.files["partition.dat"].file_size = out_hashing.count(); checksums.files["partition.dat"].file_size = out_hashing.count();
checksums.files["partition.dat"].file_hash = out_hashing.getHash(); checksums.files["partition.dat"].file_hash = out_hashing.getHash();

View File

@ -586,6 +586,10 @@ static StoragePtr create(const StorageFactory::Arguments & args)
if (args.storage_def->order_by) if (args.storage_def->order_by)
primary_expr_list = extractKeyExpressionList(*args.storage_def->order_by); primary_expr_list = extractKeyExpressionList(*args.storage_def->order_by);
else
throw Exception("You must provide an ORDER BY expression in the table definition. "
"If you don't want this table to be sorted, use ORDER BY tuple()",
ErrorCodes::BAD_ARGUMENTS);
if (args.storage_def->sample_by) if (args.storage_def->sample_by)
sampling_expression = args.storage_def->sample_by->ptr(); sampling_expression = args.storage_def->sample_by->ptr();

View File

@ -0,0 +1,14 @@
*** MergeTree ***
1 a
5 b
2 c
4 d
3 e
*** ReplacingMergeTree ***
1 a 5
---
1 a 5
*** CollapsingMergeTree ***
3 c 1
---
3 c 1

View File

@ -0,0 +1,56 @@
SELECT '*** MergeTree ***';
DROP TABLE IF EXISTS test.unsorted;
CREATE TABLE test.unsorted (x UInt32, y String) ENGINE MergeTree ORDER BY tuple();
INSERT INTO test.unsorted VALUES (1, 'a'), (5, 'b');
INSERT INTO test.unsorted VALUES (2, 'c'), (4, 'd');
INSERT INTO test.unsorted VALUES (3, 'e');
OPTIMIZE TABLE test.unsorted PARTITION tuple() FINAL;
SELECT * FROM test.unsorted;
DROP TABLE test.unsorted;
SELECT '*** ReplacingMergeTree ***';
DROP TABLE IF EXISTS test.unsorted_replacing;
CREATE TABLE test.unsorted_replacing (x UInt32, s String, v UInt32) ENGINE ReplacingMergeTree(v) ORDER BY tuple();
INSERT INTO test.unsorted_replacing VALUES (1, 'a', 5), (5, 'b', 4);
INSERT INTO test.unsorted_replacing VALUES (2, 'c', 3), (4, 'd', 2);
INSERT INTO test.unsorted_replacing VALUES (3, 'e', 1);
SELECT * FROM test.unsorted_replacing FINAL;
SELECT '---';
OPTIMIZE TABLE test.unsorted_replacing PARTITION tuple() FINAL;
SELECT * FROM test.unsorted_replacing;
DROP TABLE test.unsorted_replacing;
SELECT '*** CollapsingMergeTree ***';
DROP TABLE IF EXISTS test.unsorted_collapsing;
CREATE TABLE test.unsorted_collapsing (x UInt32, s String, sign Int8) ENGINE CollapsingMergeTree(sign) ORDER BY tuple();
INSERT INTO test.unsorted_collapsing VALUES (1, 'a', 1);
INSERT INTO test.unsorted_collapsing VALUES (1, 'a', -1), (2, 'b', 1);
INSERT INTO test.unsorted_collapsing VALUES (2, 'b', -1), (3, 'c', 1);
SELECT * FROM test.unsorted_collapsing FINAL;
SELECT '---';
OPTIMIZE TABLE test.unsorted_collapsing PARTITION tuple() FINAL;
SELECT * FROM test.unsorted_collapsing;
DROP TABLE test.unsorted_collapsing;

View File

@ -0,0 +1,6 @@
2018-02-19 12:00:00
2018-02-20 12:00:00
2018-02-21 12:00:00
---
2018-02-19 12:00:00
2018-02-21 12:00:00

View File

@ -0,0 +1,18 @@
DROP TABLE IF EXISTS test.partition_and_primary_keys_using_same_expression;
CREATE TABLE test.partition_and_primary_keys_using_same_expression(dt DateTime)
ENGINE MergeTree PARTITION BY toDate(dt) ORDER BY toDayOfWeek(toDate(dt));
INSERT INTO test.partition_and_primary_keys_using_same_expression
VALUES ('2018-02-19 12:00:00');
INSERT INTO test.partition_and_primary_keys_using_same_expression
VALUES ('2018-02-20 12:00:00'), ('2018-02-21 12:00:00');
SELECT * FROM test.partition_and_primary_keys_using_same_expression ORDER BY dt;
SELECT '---';
ALTER TABLE test.partition_and_primary_keys_using_same_expression DROP PARTITION '2018-02-20';
SELECT * FROM test.partition_and_primary_keys_using_same_expression ORDER BY dt;
DROP TABLE test.partition_and_primary_keys_using_same_expression;