From 5e230a6b2d8e69e06b3847b0f31c61d3e7bdf08e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 28 Apr 2015 06:38:41 +0300 Subject: [PATCH 001/116] dbms: function 'transform': development [#METR-15987]. --- .../DB/Functions/FunctionsMiscellaneous.h | 452 ++++++++++++++++++ dbms/src/Functions/FunctionsMiscellaneous.cpp | 2 + 2 files changed, 454 insertions(+) diff --git a/dbms/include/DB/Functions/FunctionsMiscellaneous.h b/dbms/include/DB/Functions/FunctionsMiscellaneous.h index 65cd2f19577..2789d19f776 100644 --- a/dbms/include/DB/Functions/FunctionsMiscellaneous.h +++ b/dbms/include/DB/Functions/FunctionsMiscellaneous.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -21,6 +22,7 @@ #include #include #include +#include #include #include @@ -54,6 +56,8 @@ namespace DB * sleep(n) - спит n секунд каждый блок. * * bar(x, min, max, width) - рисует полосу из количества символов, пропорционального (x - min) и равного width при x == max. + * + * transform(x, from_array, to_array[, default]) - преобразовать x согласно переданному явным образом соответствию. */ @@ -884,4 +888,452 @@ using FunctionIsFinite = FunctionNumericPredicate; using FunctionIsInfinite = FunctionNumericPredicate; using FunctionIsNaN = FunctionNumericPredicate; + +/** transform(x, [from...], [to...], default) + * - преобразует значения согласно явно указанному отображению. + * + * x - что преобразовывать. + * from - константный массив значений для преобразования. + * to - константный массив значений, в которые должны быть преобразованы значения из from. + * default - константа, какое значение использовать, если x не равен ни одному из значений во from. + * from и to - массивы одинаковых размеров. + * + * Типы: + * transform(T, Array(T), Array(U), U) -> U + * + * transform(x, [from...], [to...]) + * - eсли default не указан, то для значений x, для которых нет соответствующего элемента во from, возвращается не изменённое значение x. + * + * Типы: + * transform(T, Array(T), Array(T)) -> T + * + * Замечание: реализация довольно громоздкая. + */ +class FunctionTransform : public IFunction +{ +public: + static constexpr auto name = "transform"; + static IFunction * create(const Context &) { return new FunctionTransform; } + + String getName() const override { return name; } + + DataTypePtr getReturnType(const DataTypes & arguments) const override + { + const auto args_size = arguments.size(); + if (args_size != 3 && args_size != 4) + throw Exception{ + "Number of arguments for function " + getName() + " doesn't match: passed " + + toString(args_size) + ", should be 3 or 4", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + + const IDataType * type_x = arguments[0].get(); + + if (!type_x->isNumeric() && !typeid_cast(type_x)) + throw Exception("Unsupported type " + type_x->getName() + + " of first argument of function " + getName() + + ", must be numeric type or Date/DateTime or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const DataTypeArray * type_arr_from = typeid_cast(arguments[1].get()); + + if (!type_arr_from) + throw Exception("Second argument of function " + getName() + + ", must be array of source values to transform from.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto type_arr_from_nested = type_arr_from->getNestedType(); + + if ((type_x->isNumeric() != type_arr_from_nested->isNumeric()) + || (!!typeid_cast(type_x) != !!typeid_cast(type_arr_from_nested.get()))) + throw Exception("First argument and elements of array of second argument of function " + getName() + + " must have compatible types: both numeric or both strings.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const DataTypeArray * type_arr_to = typeid_cast(arguments[2].get()); + + if (!type_arr_to) + throw Exception("Third argument of function " + getName() + + ", must be array of destination values to transform to.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto type_arr_to_nested = type_arr_to->getNestedType(); + + if (args_size == 3) + { + if ((type_x->isNumeric() != type_arr_to_nested->isNumeric()) + || (!!typeid_cast(type_x) != !!typeid_cast(type_arr_to_nested.get()))) + throw Exception("Function " + getName() + + " have signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return type_x->clone(); + } + else + { + const IDataType * type_default = arguments[3].get(); + + if (!type_default->isNumeric() && !typeid_cast(type_default)) + throw Exception("Unsupported type " + type_default->getName() + + " of fourth argument (default value) of function " + getName() + + ", must be numeric type or Date/DateTime or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if ((type_default->isNumeric() != type_arr_to_nested->isNumeric()) + || (!!typeid_cast(type_default) != !!typeid_cast(type_arr_to_nested.get()))) + throw Exception("Function " + getName() + + " have signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return type_arr_to_nested->clone(); + } + } + + void execute(Block & block, const ColumnNumbers & arguments, const size_t result) override + { + const ColumnConstArray * array_from = typeid_cast(&*block.getByPosition(arguments[1]).column); + const ColumnConstArray * array_to = typeid_cast(&*block.getByPosition(arguments[2]).column); + + if (!array_from && !array_to) + throw Exception("Second and third arguments of function " + getName() + " must be constant arrays.", ErrorCodes::ILLEGAL_COLUMN); + + prepare(array_from->getData(), array_to->getData()); + + /// Задано ли значение по-умолчанию. + Field default_value; + if (arguments.size() == 4) + { + const IColumnConst * default_col = dynamic_cast(&*block.getByPosition(arguments[3]).column); + default_value = (*default_col)[0]; + } + + const auto in = block.getByPosition(arguments.front()).column.get(); + auto column_result = block.getByPosition(result).type->createColumn(); + auto out = column_result.get(); + + if (!executeNum(in, out, default_value) + && !executeNum(in, out, default_value) + && !executeNum(in, out, default_value) + && !executeNum(in, out, default_value) + && !executeNum(in, out, default_value) + && !executeNum(in, out, default_value) + && !executeNum(in, out, default_value) + && !executeNum(in, out, default_value) + && !executeNum(in, out, default_value) + && !executeNum(in, out, default_value) + && !executeString(in, out, default_value)) + throw Exception( + "Illegal column " + in->getName() + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + block.getByPosition(result).column = column_result; + } + + template + bool executeNum(const IColumn * in_untyped, IColumn * out_untyped, const Field & default_value) + { + if (const auto in = typeid_cast *>(in_untyped)) + { + if (default_value.isNull()) + { + auto out = typeid_cast *>(out_untyped); + if (!out) + throw Exception( + "Illegal column " + out_untyped->getName() + " of elements of array of third argument of function " + getName() + + ", must be " + in->getName(), + ErrorCodes::ILLEGAL_COLUMN); + + executeImplNumToNum(in->getData(), out->getData()); + } + else + { + if (!executeNumToNumWithDefault(in, out_untyped, default_value) + && !executeNumToNumWithDefault(in, out_untyped, default_value) + && !executeNumToNumWithDefault(in, out_untyped, default_value) + && !executeNumToNumWithDefault(in, out_untyped, default_value) + && !executeNumToNumWithDefault(in, out_untyped, default_value) + && !executeNumToNumWithDefault(in, out_untyped, default_value) + && !executeNumToNumWithDefault(in, out_untyped, default_value) + && !executeNumToNumWithDefault(in, out_untyped, default_value) + && !executeNumToNumWithDefault(in, out_untyped, default_value) + && !executeNumToNumWithDefault(in, out_untyped, default_value) + && !executeNumToString(in, out_untyped, default_value)) + throw Exception( + "Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + + return true; + } + else if (const auto in = typeid_cast *>(in_untyped)) + { + /* TODO */ + + return true; + } + + return false; + } + + bool executeString(const IColumn * in_untyped, IColumn * out_untyped, const Field & default_value) + { + if (const auto in = typeid_cast(in_untyped)) + { + if (!executeStringToNum(in, out_untyped, default_value) + && !executeStringToNum(in, out_untyped, default_value) + && !executeStringToNum(in, out_untyped, default_value) + && !executeStringToNum(in, out_untyped, default_value) + && !executeStringToNum(in, out_untyped, default_value) + && !executeStringToNum(in, out_untyped, default_value) + && !executeStringToNum(in, out_untyped, default_value) + && !executeStringToNum(in, out_untyped, default_value) + && !executeStringToNum(in, out_untyped, default_value) + && !executeStringToNum(in, out_untyped, default_value) + && !executeStringToString(in, out_untyped, default_value)) + throw Exception( + "Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + return true; + } + else if (const auto in = typeid_cast(in_untyped)) + { + /* TODO */ + + return true; + } + + return false; + } + + template + bool executeNumToNumWithDefault(const ColumnVector * in, IColumn * out_untyped, const Field & default_value) + { + auto out = typeid_cast *>(out_untyped); + if (!out) + return false; + + executeImplNumToNumWithDefault(in->getData(), out->getData(), default_value.get()); + return true; + } + + template + bool executeNumToString(const ColumnVector * in, IColumn * out_untyped, const Field & default_value) + { + auto out = typeid_cast(out_untyped); + if (!out) + return false; + + const String & default_str = default_value.get(); + StringRef default_string_ref{default_str.data(), default_str.size() + 1}; + executeImplNumToStringWithDefault(in->getData(), out->getChars(), out->getOffsets(), default_string_ref); + return true; + } + + template + bool executeStringToNum(const ColumnString * in, IColumn * out_untyped, const Field & default_value) + { + auto out = typeid_cast *>(out_untyped); + if (!out) + return false; + + executeImplStringToNumWithDefault(in->getChars(), in->getOffsets(), out->getData(), default_value.get()); + return true; + } + + bool executeStringToString(const ColumnString * in, IColumn * out_untyped, const Field & default_value) + { + auto out = typeid_cast(out_untyped); + if (!out) + return false; + + if (default_value.isNull()) + executeImplStringToString(in->getChars(), in->getOffsets(), out->getChars(), out->getOffsets(), {}); + else + { + const String & default_str = default_value.get(); + StringRef default_string_ref{default_str.data(), default_str.size() + 1}; + executeImplStringToString(in->getChars(), in->getOffsets(), out->getChars(), out->getOffsets(), default_string_ref); + } + + return true; + } + + + template + void executeImplNumToNumWithDefault(const PODArray & src, PODArray & dst, U dst_default) + { + const auto & table = *table_num_to_num; + size_t size = src.size(); + dst.resize(size); + for (size_t i = 0; i < size; ++i) + { + auto it = table.find(src[i]); + if (it != table.end()) + dst[i] = it->second; + else + dst[i] = dst_default; + } + } + + template + void executeImplNumToNum(const PODArray & src, PODArray & dst) + { + const auto & table = *table_num_to_num; + size_t size = src.size(); + dst.resize(size); + for (size_t i = 0; i < size; ++i) + { + auto it = table.find(src[i]); + if (it != table.end()) + dst[i] = it->second; + else + dst[i] = src[i]; + } + } + + template + void executeImplNumToStringWithDefault(const PODArray & src, + ColumnString::Chars_t & dst_data, ColumnString::Offsets_t & dst_offsets, StringRef dst_default) + { + const auto & table = *table_num_to_string; + size_t size = src.size(); + dst_offsets.resize(size); + ColumnString::Offset_t current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + auto it = table.find(src[i]); + StringRef ref = it != table.end() ? it->second : dst_default; + dst_data.resize(current_offset + ref.size); + memcpy(&dst_data[current_offset], ref.data, ref.size); + current_offset += ref.size; + dst_offsets[i] = current_offset; + } + } + + template + void executeImplStringToNumWithDefault( + const ColumnString::Chars_t & src_data, const ColumnString::Offsets_t & src_offsets, + PODArray & dst, U dst_default) + { + const auto & table = *table_string_to_num; + size_t size = src_offsets.size(); + dst.resize(size); + ColumnString::Offset_t current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + StringRef ref{&src_data[current_offset], src_offsets[i] - current_offset}; + current_offset = src_offsets[i]; + auto it = table.find(ref); + if (it != table.end()) + dst[i] = it->second; + else + dst[i] = dst_default; + } + } + + template + void executeImplStringToString( + const ColumnString::Chars_t & src_data, const ColumnString::Offsets_t & src_offsets, + ColumnString::Chars_t & dst_data, ColumnString::Offsets_t & dst_offsets, StringRef dst_default) + { + const auto & table = *table_string_to_string; + size_t size = src_offsets.size(); + dst_offsets.resize(size); + ColumnString::Offset_t current_src_offset = 0; + ColumnString::Offset_t current_dst_offset = 0; + for (size_t i = 0; i < size; ++i) + { + StringRef src_ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset}; + current_src_offset = src_offsets[i]; + + auto it = table.find(src_ref); + + StringRef dst_ref = it != table.end() ? it->second : (with_default ? dst_default : src_ref); + dst_data.resize(current_dst_offset + dst_ref.size); + memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size); + current_dst_offset += dst_ref.size; + dst_offsets[i] = current_dst_offset; + } + } + +private: + /// Разные варианты хэш-таблиц для реализации отображения. + + using NumToNum = HashMap>; + using NumToString = HashMap>; + using StringToNum = HashMap; + using StringToString = HashMap; + + std::unique_ptr table_num_to_num; + std::unique_ptr table_num_to_string; + std::unique_ptr table_string_to_num; + std::unique_ptr table_string_to_string; + + Arena string_pool; + + bool prepared = false; + std::mutex mutex; + + /// Может вызываться из разных потоков. Срабатывает только при первом вызове. + void prepare(const Array & from, const Array & to) + { + if (prepared) + return; + + const size_t size = from.size(); + if (0 == size) + return; + + std::lock_guard lock(mutex); + + if (prepared) + return; + + if (from.size() != to.size()) + throw Exception("Second and third arguments of function " + getName() + " must be arrays of same size.", ErrorCodes::BAD_ARGUMENTS); + + /// Замечание: не делается проверка дубликатов в массиве from. + + if (from[0].getType() != Field::Types::String && to[0].getType() != Field::Types::String) + { + table_num_to_num.reset(new NumToNum); + auto & table = *table_num_to_num; + for (size_t i = 0; i < size; ++i) + table[from[i].get()] = to[i].get(); + } + else if (from[0].getType() != Field::Types::String && to[0].getType() == Field::Types::String) + { + table_num_to_string.reset(new NumToString); + auto & table = *table_num_to_string; + for (size_t i = 0; i < size; ++i) + { + const String & str_to = to[i].get(); + StringRef ref{string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1}; /// С завершающим нулём. + table[from[i].get()] = ref; + } + } + else if (from[0].getType() == Field::Types::String && to[0].getType() != Field::Types::String) + { + table_string_to_num.reset(new StringToNum); + auto & table = *table_string_to_num; + for (size_t i = 0; i < size; ++i) + { + const String & str_from = from[i].get(); + StringRef ref{string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1}; + table[ref] = to[i].get(); + } + } + else if (from[0].getType() == Field::Types::String && to[0].getType() == Field::Types::String) + { + table_string_to_string.reset(new StringToString); + auto & table = *table_string_to_string; + for (size_t i = 0; i < size; ++i) + { + const String & str_from = from[i].get(); + const String & str_to = to[i].get(); + StringRef ref_from{string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1}; + StringRef ref_to{string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1}; + table[ref_from] = ref_to; + } + } + + prepared = true; + } +}; + } diff --git a/dbms/src/Functions/FunctionsMiscellaneous.cpp b/dbms/src/Functions/FunctionsMiscellaneous.cpp index 7eaf498d8fb..8eea8a07b17 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/FunctionsMiscellaneous.cpp @@ -337,6 +337,8 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + + factory.registerFunction(); } } From 123cd3340dbf2db3914d800095071c4620455533 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 28 Apr 2015 06:54:57 +0300 Subject: [PATCH 002/116] dbms: function 'transform': development [#METR-15987]. --- dbms/include/DB/Functions/FunctionsMiscellaneous.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsMiscellaneous.h b/dbms/include/DB/Functions/FunctionsMiscellaneous.h index 2789d19f776..87d7eeb67b1 100644 --- a/dbms/include/DB/Functions/FunctionsMiscellaneous.h +++ b/dbms/include/DB/Functions/FunctionsMiscellaneous.h @@ -1277,7 +1277,7 @@ private: const size_t size = from.size(); if (0 == size) - return; + throw Exception("Empty arrays are illegal in function " + getName(), ErrorCodes::BAD_ARGUMENTS); std::lock_guard lock(mutex); @@ -1285,7 +1285,7 @@ private: return; if (from.size() != to.size()) - throw Exception("Second and third arguments of function " + getName() + " must be arrays of same size.", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Second and third arguments of function " + getName() + " must be arrays of same size", ErrorCodes::BAD_ARGUMENTS); /// Замечание: не делается проверка дубликатов в массиве from. From 3f4f96ad7dcabf4a30df10af22f709dd1c3c4411 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 28 Apr 2015 06:56:23 +0300 Subject: [PATCH 003/116] dbms: added test for function 'transform' [#METR-15987]. --- .../0_stateless/00153_transform.reference | 70 +++++++++++++++++++ .../queries/0_stateless/00153_transform.sql | 7 ++ 2 files changed, 77 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00153_transform.reference create mode 100644 dbms/tests/queries/0_stateless/00153_transform.sql diff --git a/dbms/tests/queries/0_stateless/00153_transform.reference b/dbms/tests/queries/0_stateless/00153_transform.reference new file mode 100644 index 00000000000..d7e7df164f7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00153_transform.reference @@ -0,0 +1,70 @@ +0 +1 +2 +111 +4 +222 +6 +333 +8 +9 +9999 +9999 +9999 +111 +9999 +222 +9999 +333 +9999 +9999 + + + +hello + +world + +abc + + +0 +1 +2 +hello +4 +world +6 +abc +8 +9 + + + +hello + +world + +abc + + +- +- +- +hello +- +world +- +abc +- +- +0 +0 +0 +111 +0 +222 +0 +333 +0 +0 diff --git a/dbms/tests/queries/0_stateless/00153_transform.sql b/dbms/tests/queries/0_stateless/00153_transform.sql new file mode 100644 index 00000000000..36499d8001f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00153_transform.sql @@ -0,0 +1,7 @@ +SELECT transform(number, [3, 5, 7], [111, 222, 333]) FROM system.numbers LIMIT 10; +SELECT transform(number, [3, 5, 7], [111, 222, 333], 9999) FROM system.numbers LIMIT 10; +SELECT transform(number, [3, 5, 7], ['hello', 'world', 'abc'], '') FROM system.numbers LIMIT 10; +SELECT transform(toString(number), ['3', '5', '7'], ['hello', 'world', 'abc']) FROM system.numbers LIMIT 10; +SELECT transform(toString(number), ['3', '5', '7'], ['hello', 'world', 'abc'], '') FROM system.numbers LIMIT 10; +SELECT transform(toString(number), ['3', '5', '7'], ['hello', 'world', 'abc'], '-') FROM system.numbers LIMIT 10; +SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], 0) FROM system.numbers LIMIT 10; From c89970cc01125a494d219cf397d59873336c7ed6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 28 Apr 2015 07:35:54 +0300 Subject: [PATCH 004/116] dbms: fixed comment [#METR-2944]. --- dbms/include/DB/DataStreams/ExpressionBlockInputStream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/DataStreams/ExpressionBlockInputStream.h b/dbms/include/DB/DataStreams/ExpressionBlockInputStream.h index 585ac6faad3..66e9d2be998 100644 --- a/dbms/include/DB/DataStreams/ExpressionBlockInputStream.h +++ b/dbms/include/DB/DataStreams/ExpressionBlockInputStream.h @@ -15,7 +15,7 @@ using Poco::SharedPtr; /** Выполняет над блоком вычисление некоторого выражения. * Выражение состоит из идентификаторов столбцов из блока, констант, обычных функций. * Например: hits * 2 + 3, url LIKE '%yandex%' - * Выражение не меняет количество строк в потоке, и обрабатывает каждую строку независимо от других. + * Выражение обрабатывает каждую строку независимо от других. */ class ExpressionBlockInputStream : public IProfilingBlockInputStream { From 1e55d7aa60cb5aa84b456cf9e9e01be0efb543bf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 28 Apr 2015 07:37:35 +0300 Subject: [PATCH 005/116] dbms: removed debug output [#METR-2944]. --- dbms/src/Interpreters/ExpressionActions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index d781749eb1f..b710bb03dc1 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -215,7 +215,7 @@ void ExpressionAction::prepare(Block & sample_block) void ExpressionAction::execute(Block & block) const { - std::cerr << "executing: " << toString() << std::endl; +// std::cerr << "executing: " << toString() << std::endl; if (type == REMOVE_COLUMN || type == COPY_COLUMN) if (!block.has(source_name)) From d371b7c84372ccbfdf6d34e8969ac9ef47069482 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 28 Apr 2015 07:47:27 +0300 Subject: [PATCH 006/116] dbms: function 'transform': development [#METR-15987]. --- .../DB/Functions/FunctionsMiscellaneous.h | 19 ++++-- dbms/src/Functions/FunctionsMiscellaneous.cpp | 66 +++++++++++++++++++ 2 files changed, 80 insertions(+), 5 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsMiscellaneous.h b/dbms/include/DB/Functions/FunctionsMiscellaneous.h index 87d7eeb67b1..8c670b83254 100644 --- a/dbms/include/DB/Functions/FunctionsMiscellaneous.h +++ b/dbms/include/DB/Functions/FunctionsMiscellaneous.h @@ -889,6 +889,8 @@ using FunctionIsInfinite = FunctionNumericPredicate; using FunctionIsNaN = FunctionNumericPredicate; +DataTypePtr getSmallestCommonNumericType(const IDataType & t1, const IDataType & t2); + /** transform(x, [from...], [to...], default) * - преобразует значения согласно явно указанному отображению. * @@ -979,6 +981,13 @@ public: + " have signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (type_arr_to_nested->behavesAsNumber() && type_default->behavesAsNumber()) + { + /// Берём наименьший общий тип для элементов массива значений to и для default-а. + return getSmallestCommonNumericType(*type_arr_to_nested, *type_default); + } + + /// TODO Больше проверок. return type_arr_to_nested->clone(); } } @@ -1164,7 +1173,7 @@ public: { auto it = table.find(src[i]); if (it != table.end()) - dst[i] = it->second; + memcpy(&dst[i], &it->second, sizeof(dst[i])); /// little endian. else dst[i] = dst_default; } @@ -1180,7 +1189,7 @@ public: { auto it = table.find(src[i]); if (it != table.end()) - dst[i] = it->second; + memcpy(&dst[i], &it->second, sizeof(dst[i])); else dst[i] = src[i]; } @@ -1220,7 +1229,7 @@ public: current_offset = src_offsets[i]; auto it = table.find(ref); if (it != table.end()) - dst[i] = it->second; + memcpy(&dst[i], &it->second, sizeof(dst[i])); else dst[i] = dst_default; } @@ -1255,7 +1264,7 @@ private: /// Разные варианты хэш-таблиц для реализации отображения. using NumToNum = HashMap>; - using NumToString = HashMap>; + using NumToString = HashMap>; /// Везде StringRef-ы с завершающим нулём. using StringToNum = HashMap; using StringToString = HashMap; @@ -1303,7 +1312,7 @@ private: for (size_t i = 0; i < size; ++i) { const String & str_to = to[i].get(); - StringRef ref{string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1}; /// С завершающим нулём. + StringRef ref{string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1}; table[from[i].get()] = ref; } } diff --git a/dbms/src/Functions/FunctionsMiscellaneous.cpp b/dbms/src/Functions/FunctionsMiscellaneous.cpp index 8eea8a07b17..a5a3901da9c 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/FunctionsMiscellaneous.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -308,6 +309,71 @@ void FunctionVisibleWidth::execute(Block & block, const ColumnNumbers & argument ErrorCodes::ILLEGAL_COLUMN); } + +/// TODO: Убрать copy-paste из FunctionsConditional.h +template +struct DataTypeFromFieldTypeOrError +{ + static DataTypePtr getDataType() + { + return new typename DataTypeFromFieldType::Type; + } +}; + +template <> +struct DataTypeFromFieldTypeOrError +{ + static DataTypePtr getDataType() + { + return nullptr; + } +}; + +template +DataTypePtr getSmallestCommonNumericTypeImpl() +{ + using ResultType = typename NumberTraits::ResultOfIf::Type; + auto type_res = DataTypeFromFieldTypeOrError::getDataType(); + if (!type_res) + throw Exception("Types " + TypeName::get() + " and " + TypeName::get() + + " are not upscalable to a common type without loss of precision", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return type_res; +} + +template +DataTypePtr getSmallestCommonNumericTypeLeft(const IDataType & t2) +{ + if (typeid_cast(&t2)) return getSmallestCommonNumericTypeImpl(); + if (typeid_cast(&t2)) return getSmallestCommonNumericTypeImpl(); + if (typeid_cast(&t2)) return getSmallestCommonNumericTypeImpl(); + if (typeid_cast(&t2)) return getSmallestCommonNumericTypeImpl(); + if (typeid_cast(&t2)) return getSmallestCommonNumericTypeImpl(); + if (typeid_cast(&t2)) return getSmallestCommonNumericTypeImpl(); + if (typeid_cast(&t2)) return getSmallestCommonNumericTypeImpl(); + if (typeid_cast(&t2)) return getSmallestCommonNumericTypeImpl(); + if (typeid_cast(&t2)) return getSmallestCommonNumericTypeImpl(); + if (typeid_cast(&t2)) return getSmallestCommonNumericTypeImpl(); + + throw Exception("Logical error: not a numeric type passed to function getSmallestCommonNumericType", ErrorCodes::LOGICAL_ERROR); +} + +DataTypePtr getSmallestCommonNumericType(const IDataType & t1, const IDataType & t2) +{ + if (typeid_cast(&t1)) return getSmallestCommonNumericTypeLeft(t2); + if (typeid_cast(&t1)) return getSmallestCommonNumericTypeLeft(t2); + if (typeid_cast(&t1)) return getSmallestCommonNumericTypeLeft(t2); + if (typeid_cast(&t1)) return getSmallestCommonNumericTypeLeft(t2); + if (typeid_cast(&t1)) return getSmallestCommonNumericTypeLeft(t2); + if (typeid_cast(&t1)) return getSmallestCommonNumericTypeLeft(t2); + if (typeid_cast(&t1)) return getSmallestCommonNumericTypeLeft(t2); + if (typeid_cast(&t1)) return getSmallestCommonNumericTypeLeft(t2); + if (typeid_cast(&t1)) return getSmallestCommonNumericTypeLeft(t2); + if (typeid_cast(&t1)) return getSmallestCommonNumericTypeLeft(t2); + + throw Exception("Logical error: not a numeric type passed to function getSmallestCommonNumericType", ErrorCodes::LOGICAL_ERROR); +} + } From 738e73650084a983e7e68baa6e775f59e423a80c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 28 Apr 2015 08:21:41 +0300 Subject: [PATCH 007/116] dbms: function 'transform': development [#METR-15987]. --- .../DB/Functions/FunctionsMiscellaneous.h | 120 ++++++++++-------- .../0_stateless/00153_transform.reference | 30 +++++ .../queries/0_stateless/00153_transform.sql | 3 + 3 files changed, 102 insertions(+), 51 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsMiscellaneous.h b/dbms/include/DB/Functions/FunctionsMiscellaneous.h index 8c670b83254..35b10e52e8e 100644 --- a/dbms/include/DB/Functions/FunctionsMiscellaneous.h +++ b/dbms/include/DB/Functions/FunctionsMiscellaneous.h @@ -1000,31 +1000,23 @@ public: if (!array_from && !array_to) throw Exception("Second and third arguments of function " + getName() + " must be constant arrays.", ErrorCodes::ILLEGAL_COLUMN); - prepare(array_from->getData(), array_to->getData()); - - /// Задано ли значение по-умолчанию. - Field default_value; - if (arguments.size() == 4) - { - const IColumnConst * default_col = dynamic_cast(&*block.getByPosition(arguments[3]).column); - default_value = (*default_col)[0]; - } + prepare(array_from->getData(), array_to->getData(), block, arguments); const auto in = block.getByPosition(arguments.front()).column.get(); auto column_result = block.getByPosition(result).type->createColumn(); auto out = column_result.get(); - if (!executeNum(in, out, default_value) - && !executeNum(in, out, default_value) - && !executeNum(in, out, default_value) - && !executeNum(in, out, default_value) - && !executeNum(in, out, default_value) - && !executeNum(in, out, default_value) - && !executeNum(in, out, default_value) - && !executeNum(in, out, default_value) - && !executeNum(in, out, default_value) - && !executeNum(in, out, default_value) - && !executeString(in, out, default_value)) + if (!executeNum(in, out) + && !executeNum(in, out) + && !executeNum(in, out) + && !executeNum(in, out) + && !executeNum(in, out) + && !executeNum(in, out) + && !executeNum(in, out) + && !executeNum(in, out) + && !executeNum(in, out) + && !executeNum(in, out) + && !executeString(in, out)) throw Exception( "Illegal column " + in->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); @@ -1033,7 +1025,7 @@ public: } template - bool executeNum(const IColumn * in_untyped, IColumn * out_untyped, const Field & default_value) + bool executeNum(const IColumn * in_untyped, IColumn * out_untyped) { if (const auto in = typeid_cast *>(in_untyped)) { @@ -1050,17 +1042,17 @@ public: } else { - if (!executeNumToNumWithDefault(in, out_untyped, default_value) - && !executeNumToNumWithDefault(in, out_untyped, default_value) - && !executeNumToNumWithDefault(in, out_untyped, default_value) - && !executeNumToNumWithDefault(in, out_untyped, default_value) - && !executeNumToNumWithDefault(in, out_untyped, default_value) - && !executeNumToNumWithDefault(in, out_untyped, default_value) - && !executeNumToNumWithDefault(in, out_untyped, default_value) - && !executeNumToNumWithDefault(in, out_untyped, default_value) - && !executeNumToNumWithDefault(in, out_untyped, default_value) - && !executeNumToNumWithDefault(in, out_untyped, default_value) - && !executeNumToString(in, out_untyped, default_value)) + if (!executeNumToNumWithDefault(in, out_untyped) + && !executeNumToNumWithDefault(in, out_untyped) + && !executeNumToNumWithDefault(in, out_untyped) + && !executeNumToNumWithDefault(in, out_untyped) + && !executeNumToNumWithDefault(in, out_untyped) + && !executeNumToNumWithDefault(in, out_untyped) + && !executeNumToNumWithDefault(in, out_untyped) + && !executeNumToNumWithDefault(in, out_untyped) + && !executeNumToNumWithDefault(in, out_untyped) + && !executeNumToNumWithDefault(in, out_untyped) + && !executeNumToString(in, out_untyped)) throw Exception( "Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); @@ -1078,21 +1070,21 @@ public: return false; } - bool executeString(const IColumn * in_untyped, IColumn * out_untyped, const Field & default_value) + bool executeString(const IColumn * in_untyped, IColumn * out_untyped) { if (const auto in = typeid_cast(in_untyped)) { - if (!executeStringToNum(in, out_untyped, default_value) - && !executeStringToNum(in, out_untyped, default_value) - && !executeStringToNum(in, out_untyped, default_value) - && !executeStringToNum(in, out_untyped, default_value) - && !executeStringToNum(in, out_untyped, default_value) - && !executeStringToNum(in, out_untyped, default_value) - && !executeStringToNum(in, out_untyped, default_value) - && !executeStringToNum(in, out_untyped, default_value) - && !executeStringToNum(in, out_untyped, default_value) - && !executeStringToNum(in, out_untyped, default_value) - && !executeStringToString(in, out_untyped, default_value)) + if (!executeStringToNum(in, out_untyped) + && !executeStringToNum(in, out_untyped) + && !executeStringToNum(in, out_untyped) + && !executeStringToNum(in, out_untyped) + && !executeStringToNum(in, out_untyped) + && !executeStringToNum(in, out_untyped) + && !executeStringToNum(in, out_untyped) + && !executeStringToNum(in, out_untyped) + && !executeStringToNum(in, out_untyped) + && !executeStringToNum(in, out_untyped) + && !executeStringToString(in, out_untyped)) throw Exception( "Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); @@ -1110,7 +1102,7 @@ public: } template - bool executeNumToNumWithDefault(const ColumnVector * in, IColumn * out_untyped, const Field & default_value) + bool executeNumToNumWithDefault(const ColumnVector * in, IColumn * out_untyped) { auto out = typeid_cast *>(out_untyped); if (!out) @@ -1121,7 +1113,7 @@ public: } template - bool executeNumToString(const ColumnVector * in, IColumn * out_untyped, const Field & default_value) + bool executeNumToString(const ColumnVector * in, IColumn * out_untyped) { auto out = typeid_cast(out_untyped); if (!out) @@ -1134,7 +1126,7 @@ public: } template - bool executeStringToNum(const ColumnString * in, IColumn * out_untyped, const Field & default_value) + bool executeStringToNum(const ColumnString * in, IColumn * out_untyped) { auto out = typeid_cast *>(out_untyped); if (!out) @@ -1144,7 +1136,7 @@ public: return true; } - bool executeStringToString(const ColumnString * in, IColumn * out_untyped, const Field & default_value) + bool executeStringToString(const ColumnString * in, IColumn * out_untyped) { auto out = typeid_cast(out_untyped); if (!out) @@ -1275,11 +1267,13 @@ private: Arena string_pool; + Field default_value; /// Null, если не задано. + bool prepared = false; std::mutex mutex; /// Может вызываться из разных потоков. Срабатывает только при первом вызове. - void prepare(const Array & from, const Array & to) + void prepare(const Array & from, const Array & to, Block & block, const ColumnNumbers & arguments) { if (prepared) return; @@ -1296,6 +1290,30 @@ private: if (from.size() != to.size()) throw Exception("Second and third arguments of function " + getName() + " must be arrays of same size", ErrorCodes::BAD_ARGUMENTS); + Array converted_to; + const Array * used_to = &to; + + /// Задано ли значение по-умолчанию. + + if (arguments.size() == 4) + { + const IColumnConst * default_col = dynamic_cast(&*block.getByPosition(arguments[3]).column); + default_value = (*default_col)[0]; + + /// Нужно ли преобразовать элементы to и default_value к наименьшему общему типу, который является Float64? + if (default_value.getType() == Field::Types::Float64 && to[0].getType() != Field::Types::Float64) + { + converted_to.resize(to.size()); + for (size_t i = 0, size = to.size(); i < size; ++i) + converted_to[i] = apply_visitor(FieldVisitorConvertToNumber(), to[i]); + used_to = &converted_to; + } + else if (default_value.getType() != Field::Types::Float64 && to[0].getType() == Field::Types::Float64) + { + default_value = apply_visitor(FieldVisitorConvertToNumber(), default_value); + } + } + /// Замечание: не делается проверка дубликатов в массиве from. if (from[0].getType() != Field::Types::String && to[0].getType() != Field::Types::String) @@ -1303,7 +1321,7 @@ private: table_num_to_num.reset(new NumToNum); auto & table = *table_num_to_num; for (size_t i = 0; i < size; ++i) - table[from[i].get()] = to[i].get(); + table[from[i].get()] = (*used_to)[i].get(); } else if (from[0].getType() != Field::Types::String && to[0].getType() == Field::Types::String) { @@ -1324,7 +1342,7 @@ private: { const String & str_from = from[i].get(); StringRef ref{string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1}; - table[ref] = to[i].get(); + table[ref] = (*used_to)[i].get(); } } else if (from[0].getType() == Field::Types::String && to[0].getType() == Field::Types::String) diff --git a/dbms/tests/queries/0_stateless/00153_transform.reference b/dbms/tests/queries/0_stateless/00153_transform.reference index d7e7df164f7..5265d492b9d 100644 --- a/dbms/tests/queries/0_stateless/00153_transform.reference +++ b/dbms/tests/queries/0_stateless/00153_transform.reference @@ -68,3 +68,33 @@ abc 333 0 0 +-1 +-1 +-1 +111 +-1 +222 +-1 +333 +-1 +-1 +-1.1 +-1.1 +-1.1 +111 +-1.1 +222 +-1.1 +333 +-1.1 +-1.1 +1 +1 +1 +111 +1 +222.2 +1 +333 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00153_transform.sql b/dbms/tests/queries/0_stateless/00153_transform.sql index 36499d8001f..cfa64fa4a5f 100644 --- a/dbms/tests/queries/0_stateless/00153_transform.sql +++ b/dbms/tests/queries/0_stateless/00153_transform.sql @@ -5,3 +5,6 @@ SELECT transform(toString(number), ['3', '5', '7'], ['hello', 'world', 'abc']) F SELECT transform(toString(number), ['3', '5', '7'], ['hello', 'world', 'abc'], '') FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], ['hello', 'world', 'abc'], '-') FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], 0) FROM system.numbers LIMIT 10; +SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1) FROM system.numbers LIMIT 10; +SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1.1) FROM system.numbers LIMIT 10; +SELECT transform(toString(number), ['3', '5', '7'], [111, 222.2, 333], 1) FROM system.numbers LIMIT 10; From 685aa695bb69a9bbba069f06d6fc1a19b1c87ffb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 29 Apr 2015 04:30:44 +0300 Subject: [PATCH 008/116] dbms: function 'transform': added support for constant argument [#METR-15987]. --- .../DB/Functions/FunctionsMiscellaneous.h | 53 ++++++++++++++----- .../0_stateless/00153_transform.reference | 4 ++ .../queries/0_stateless/00153_transform.sql | 4 ++ 3 files changed, 48 insertions(+), 13 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsMiscellaneous.h b/dbms/include/DB/Functions/FunctionsMiscellaneous.h index 35b10e52e8e..559dc0f8353 100644 --- a/dbms/include/DB/Functions/FunctionsMiscellaneous.h +++ b/dbms/include/DB/Functions/FunctionsMiscellaneous.h @@ -1003,6 +1003,13 @@ public: prepare(array_from->getData(), array_to->getData(), block, arguments); const auto in = block.getByPosition(arguments.front()).column.get(); + + if (in->isConst()) + { + executeConst(block, arguments, result); + return; + } + auto column_result = block.getByPosition(result).type->createColumn(); auto out = column_result.get(); @@ -1024,6 +1031,34 @@ public: block.getByPosition(result).column = column_result; } +private: + void executeConst(Block & block, const ColumnNumbers & arguments, const size_t result) + { + /// Составим блок из полноценных столбцов размера 1 и вычислим функцию как обычно. + + Block tmp_block; + ColumnNumbers tmp_arguments; + + tmp_block.insert(block.getByPosition(arguments[0])); + tmp_block.getByPosition(0).column = static_cast(tmp_block.getByPosition(0).column->cloneResized(1).get())->convertToFullColumn(); + tmp_arguments.push_back(0); + + for (size_t i = 1; i < arguments.size(); ++i) + { + tmp_block.insert(block.getByPosition(arguments[i])); + tmp_arguments.push_back(i); + } + + tmp_block.insert(block.getByPosition(result)); + size_t tmp_result = arguments.size(); + + execute(tmp_block, tmp_arguments, tmp_result); + + block.getByPosition(result).column = block.getByPosition(result).type->createConstColumn( + block.rowsInFirstColumn(), + (*tmp_block.getByPosition(tmp_result).column)[0]); + } + template bool executeNum(const IColumn * in_untyped, IColumn * out_untyped) { @@ -1060,12 +1095,6 @@ public: return true; } - else if (const auto in = typeid_cast *>(in_untyped)) - { - /* TODO */ - - return true; - } return false; } @@ -1091,12 +1120,6 @@ public: return true; } - else if (const auto in = typeid_cast(in_untyped)) - { - /* TODO */ - - return true; - } return false; } @@ -1252,7 +1275,7 @@ public: } } -private: + /// Разные варианты хэш-таблиц для реализации отображения. using NumToNum = HashMap>; @@ -1298,6 +1321,10 @@ private: if (arguments.size() == 4) { const IColumnConst * default_col = dynamic_cast(&*block.getByPosition(arguments[3]).column); + + if (!default_col) + throw Exception("Fourth argument of function " + getName() + " (default value) must be constant", ErrorCodes::ILLEGAL_COLUMN); + default_value = (*default_col)[0]; /// Нужно ли преобразовать элементы to и default_value к наименьшему общему типу, который является Float64? diff --git a/dbms/tests/queries/0_stateless/00153_transform.reference b/dbms/tests/queries/0_stateless/00153_transform.reference index 5265d492b9d..eea4fa0e1a8 100644 --- a/dbms/tests/queries/0_stateless/00153_transform.reference +++ b/dbms/tests/queries/0_stateless/00153_transform.reference @@ -98,3 +98,7 @@ abc 333 1 1 +Остальные +Яндекс +Google +Остальные diff --git a/dbms/tests/queries/0_stateless/00153_transform.sql b/dbms/tests/queries/0_stateless/00153_transform.sql index cfa64fa4a5f..a5b79eeecec 100644 --- a/dbms/tests/queries/0_stateless/00153_transform.sql +++ b/dbms/tests/queries/0_stateless/00153_transform.sql @@ -8,3 +8,7 @@ SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], 0) FROM sys SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1.1) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222.2, 333], 1) FROM system.numbers LIMIT 10; +SELECT transform(1, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; +SELECT transform(2, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; +SELECT transform(3, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; +SELECT transform(4, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; From cb1301d3cb46a3b87ce8ce05b3965fe4615691c2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 30 Apr 2015 00:30:40 +0300 Subject: [PATCH 009/116] dbms: improvement [#METR-16164]. --- dbms/include/DB/Parsers/ASTFunction.h | 34 ++++++++++++++------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTFunction.h b/dbms/include/DB/Parsers/ASTFunction.h index 70380b67e7e..8f981eec593 100644 --- a/dbms/include/DB/Parsers/ASTFunction.h +++ b/dbms/include/DB/Parsers/ASTFunction.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -39,32 +40,33 @@ public: String getColumnName() const override { - String res; - WriteBufferFromString wb(res); - writeString(name, wb); + SipHash hash; + + hash.update(name.data(), name.size()); if (parameters) { - writeChar('(', wb); - for (ASTs::const_iterator it = parameters->children.begin(); it != parameters->children.end(); ++it) + hash.update("(", 1); + for (const auto & param : parameters->children) { - if (it != parameters->children.begin()) - writeCString(", ", wb); - writeString((*it)->getColumnName(), wb); + String param_name = param->getColumnName(); /// TODO Сделать метод updateHashWith. + hash.update(param_name.data(), param_name.size() + 1); } - writeChar(')', wb); + hash.update(")", 1); } - writeChar('(', wb); - for (ASTs::const_iterator it = arguments->children.begin(); it != arguments->children.end(); ++it) + hash.update("(", 1); + for (const auto & arg : arguments->children) { - if (it != arguments->children.begin()) - writeCString(", ", wb); - writeString((*it)->getColumnName(), wb); + String arg_name = arg->getColumnName(); + hash.update(arg_name.data(), arg_name.size() + 1); } - writeChar(')', wb); + hash.update(")", 1); - return res; + UInt64 low, high; + hash.get128(low, high); + + return toString(high) + "_" + toString(low); /// TODO hex. } /** Получить текст, который идентифицирует этот элемент. */ From b5f1084132da5d8fd16e627624920de6c5c29544 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 30 Apr 2015 08:45:34 +0300 Subject: [PATCH 010/116] dbms: improvement (incomplete) [#METR-16164]. --- dbms/include/DB/Common/UInt128.h | 5 + dbms/include/DB/Core/Field.h | 59 ++++++++++ .../LogicalExpressionsOptimizer.h | 4 +- dbms/include/DB/Parsers/ASTAlterQuery.h | 7 ++ dbms/include/DB/Parsers/ASTAsterisk.h | 5 + dbms/include/DB/Parsers/ASTCheckQuery.h | 7 ++ .../include/DB/Parsers/ASTColumnDeclaration.h | 6 + dbms/include/DB/Parsers/ASTCreateQuery.h | 10 +- dbms/include/DB/Parsers/ASTDropQuery.h | 10 +- dbms/include/DB/Parsers/ASTExpressionList.h | 9 +- dbms/include/DB/Parsers/ASTFunction.h | 38 +------ dbms/include/DB/Parsers/ASTIdentifier.h | 6 + dbms/include/DB/Parsers/ASTInsertQuery.h | 9 +- dbms/include/DB/Parsers/ASTJoin.h | 8 ++ dbms/include/DB/Parsers/ASTLiteral.h | 6 + dbms/include/DB/Parsers/ASTNameTypePair.h | 6 + dbms/include/DB/Parsers/ASTOptimizeQuery.h | 9 +- dbms/include/DB/Parsers/ASTOrderByElement.h | 11 +- dbms/include/DB/Parsers/ASTQueryWithOutput.h | 9 +- .../DB/Parsers/ASTQueryWithTableAndOutput.h | 19 +++- dbms/include/DB/Parsers/ASTRenameQuery.h | 7 +- dbms/include/DB/Parsers/ASTSelectQuery.h | 5 + dbms/include/DB/Parsers/ASTSet.h | 5 + dbms/include/DB/Parsers/ASTSetQuery.h | 9 +- dbms/include/DB/Parsers/ASTShowTablesQuery.h | 13 ++- dbms/include/DB/Parsers/ASTSubquery.h | 13 ++- dbms/include/DB/Parsers/ASTUseQuery.h | 8 +- dbms/include/DB/Parsers/IAST.h | 105 +++++++++++------- dbms/src/DataStreams/tests/filter_stream.cpp | 1 - .../tests/filter_stream_hitlog.cpp | 1 - dbms/src/Interpreters/ExpressionAnalyzer.cpp | 39 ++++++- .../LogicalExpressionsOptimizer.cpp | 2 +- dbms/src/Parsers/tests/select_parser.cpp | 1 - 33 files changed, 343 insertions(+), 109 deletions(-) diff --git a/dbms/include/DB/Common/UInt128.h b/dbms/include/DB/Common/UInt128.h index f780a627b76..a975a004302 100644 --- a/dbms/include/DB/Common/UInt128.h +++ b/dbms/include/DB/Common/UInt128.h @@ -21,6 +21,11 @@ struct UInt128 bool operator!= (const UInt64 rhs) const { return first != rhs || second != 0; } UInt128 & operator= (const UInt64 rhs) { first = rhs; second = 0; return *this; } + + bool operator< (const UInt128 rhs) const + { + return std::tie(first, second) < std::tie(rhs.first, rhs.second); + } }; struct UInt128Hash diff --git a/dbms/include/DB/Core/Field.h b/dbms/include/DB/Core/Field.h index 33c0835f7fa..3070758d571 100644 --- a/dbms/include/DB/Core/Field.h +++ b/dbms/include/DB/Core/Field.h @@ -18,6 +18,7 @@ #include #include +#include namespace DB @@ -572,6 +573,64 @@ public: } }; +/** Обновляет состояние хэш-функции значением. */ +class FieldVisitorUpdateHash : public StaticVisitor<> +{ +private: + SipHash & hash; + +public: + FieldVisitorUpdateHash(SipHash & hash_) : hash(hash_) {} + + void operator() (const Null & x) const + { + auto type = Field::Types::Null; + hash.update(reinterpret_cast(&type), 1); + } + + void operator() (const UInt64 & x) const + { + auto type = Field::Types::UInt64; + hash.update(reinterpret_cast(&type), 1); + hash.update(reinterpret_cast(&x), sizeof(x)); + } + + void operator() (const Int64 & x) const + { + auto type = Field::Types::Int64; + hash.update(reinterpret_cast(&type), 1); + hash.update(reinterpret_cast(&x), sizeof(x)); + } + + void operator() (const Float64 & x) const + { + auto type = Field::Types::Float64; + hash.update(reinterpret_cast(&type), 1); + hash.update(reinterpret_cast(&x), sizeof(x)); + } + + void operator() (const String & x) const + { + auto type = Field::Types::String; + hash.update(reinterpret_cast(&type), 1); + size_t size = x.size(); + hash.update(reinterpret_cast(&size), sizeof(size)); + hash.update(x.data(), x.size()); + } + + void operator() (const Array & x) const + { + auto type = Field::Types::Array; + hash.update(reinterpret_cast(&type), 1); + size_t size = x.size(); + hash.update(reinterpret_cast(&size), sizeof(size)); + + for (const auto & elem : x) + apply_visitor(*this, elem); + } +}; + + /** Выводит текстовое представление типа, как литерала в SQL запросе */ class FieldVisitorToString : public StaticVisitor { diff --git a/dbms/include/DB/Interpreters/LogicalExpressionsOptimizer.h b/dbms/include/DB/Interpreters/LogicalExpressionsOptimizer.h index 653dc80d5d7..e3491322558 100644 --- a/dbms/include/DB/Interpreters/LogicalExpressionsOptimizer.h +++ b/dbms/include/DB/Interpreters/LogicalExpressionsOptimizer.h @@ -39,11 +39,11 @@ private: */ struct OrWithExpression { - OrWithExpression(ASTFunction * or_function_, const std::string & expression_); + OrWithExpression(ASTFunction * or_function_, UInt128 expression_); bool operator<(const OrWithExpression & rhs) const; ASTFunction * or_function; - const std::string expression; + const UInt128 expression; }; struct Equalities diff --git a/dbms/include/DB/Parsers/ASTAlterQuery.h b/dbms/include/DB/Parsers/ASTAlterQuery.h index 1d22d25e186..914f8ad6d72 100644 --- a/dbms/include/DB/Parsers/ASTAlterQuery.h +++ b/dbms/include/DB/Parsers/ASTAlterQuery.h @@ -89,6 +89,13 @@ public: /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return ("AlterQuery_" + database + "_" + table); }; + void updateHashWith(SipHash & hash) const override + { + hash.update("AlterQuery", strlen("AlterQuery") + 1); + hash.update(database.data(), database.size() + 1); + hash.update(table.data(), table.size() + 1); + } + ASTPtr clone() const override { ASTAlterQuery * res = new ASTAlterQuery(*this); diff --git a/dbms/include/DB/Parsers/ASTAsterisk.h b/dbms/include/DB/Parsers/ASTAsterisk.h index aa90d676c71..e881f6b3845 100644 --- a/dbms/include/DB/Parsers/ASTAsterisk.h +++ b/dbms/include/DB/Parsers/ASTAsterisk.h @@ -16,6 +16,11 @@ public: String getID() const override { return "Asterisk"; } ASTPtr clone() const override { return new ASTAsterisk(*this); } String getColumnName() const override { return "*"; } + + void updateHashWith(SipHash & hash) const override + { + hash.update("Asterisk", strlen("Asterisk") + 1); + } }; } diff --git a/dbms/include/DB/Parsers/ASTCheckQuery.h b/dbms/include/DB/Parsers/ASTCheckQuery.h index 901ad7ef567..18019751e99 100644 --- a/dbms/include/DB/Parsers/ASTCheckQuery.h +++ b/dbms/include/DB/Parsers/ASTCheckQuery.h @@ -12,6 +12,13 @@ struct ASTCheckQuery : public IAST /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return ("CheckQuery_" + database + "_" + table); }; + void updateHashWith(SipHash & hash) const override + { + hash.update("CheckQuery", strlen("CheckQuery") + 1); + hash.update(database.data(), database.size() + 1); + hash.update(table.data(), table.size() + 1); + } + ASTPtr clone() const override { return new ASTCheckQuery(*this); diff --git a/dbms/include/DB/Parsers/ASTColumnDeclaration.h b/dbms/include/DB/Parsers/ASTColumnDeclaration.h index 9862c5a81b6..2f343549ad8 100644 --- a/dbms/include/DB/Parsers/ASTColumnDeclaration.h +++ b/dbms/include/DB/Parsers/ASTColumnDeclaration.h @@ -21,6 +21,12 @@ public: String getID() const override { return "ColumnDeclaration_" + name; } + void updateHashWith(SipHash & hash) const override + { + hash.update("ColumnDeclaration", strlen("ColumnDeclaration") + 1); + hash.update(name.data(), name.size() + 1); + } + ASTPtr clone() const override { const auto res = new ASTColumnDeclaration{*this}; diff --git a/dbms/include/DB/Parsers/ASTCreateQuery.h b/dbms/include/DB/Parsers/ASTCreateQuery.h index 33d69a8e8e3..d5f75f000ce 100644 --- a/dbms/include/DB/Parsers/ASTCreateQuery.h +++ b/dbms/include/DB/Parsers/ASTCreateQuery.h @@ -30,10 +30,18 @@ public: ASTCreateQuery() = default; ASTCreateQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return (attach ? "AttachQuery_" : "CreateQuery_") + database + "_" + table; }; + void updateHashWith(SipHash & hash) const override + { + hash.update(reinterpret_cast(&attach), sizeof(attach)); + hash.update("CreateQuery", strlen("CreateQuery") + 1); + hash.update(database.data(), database.size() + 1); + hash.update(table.data(), table.size() + 1); + } + ASTPtr clone() const override { ASTCreateQuery * res = new ASTCreateQuery(*this); diff --git a/dbms/include/DB/Parsers/ASTDropQuery.h b/dbms/include/DB/Parsers/ASTDropQuery.h index c0ac24017d0..137f9c3e071 100644 --- a/dbms/include/DB/Parsers/ASTDropQuery.h +++ b/dbms/include/DB/Parsers/ASTDropQuery.h @@ -19,10 +19,18 @@ public: ASTDropQuery() = default; ASTDropQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return (detach ? "DetachQuery_" : "DropQuery_") + database + "_" + table; }; + void updateHashWith(SipHash & hash) const override + { + hash.update(reinterpret_cast(&detach), sizeof(detach)); + hash.update("DropQuery", strlen("DropQuery") + 1); + hash.update(database.data(), database.size() + 1); + hash.update(table.data(), table.size() + 1); + } + ASTPtr clone() const override { return new ASTDropQuery(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTExpressionList.h b/dbms/include/DB/Parsers/ASTExpressionList.h index 1ec814a8d1b..63935dfb42a 100644 --- a/dbms/include/DB/Parsers/ASTExpressionList.h +++ b/dbms/include/DB/Parsers/ASTExpressionList.h @@ -16,16 +16,21 @@ class ASTExpressionList : public IAST public: ASTExpressionList() = default; ASTExpressionList(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "ExpressionList"; } + void updateHashWith(SipHash & hash) const override + { + hash.update("ExpressionList", strlen("ExpressionList") + 1); + } + ASTPtr clone() const override { const auto res = new ASTExpressionList(*this); ASTPtr ptr{res}; res->children.clear(); - + for (const auto & child : children) res->children.emplace_back(child->clone()); diff --git a/dbms/include/DB/Parsers/ASTFunction.h b/dbms/include/DB/Parsers/ASTFunction.h index 8f981eec593..3d4e0819e91 100644 --- a/dbms/include/DB/Parsers/ASTFunction.h +++ b/dbms/include/DB/Parsers/ASTFunction.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB @@ -38,40 +37,15 @@ public: ASTFunction() = default; ASTFunction(const StringRange range_) : ASTWithAlias(range_) {} - String getColumnName() const override - { - SipHash hash; - - hash.update(name.data(), name.size()); - - if (parameters) - { - hash.update("(", 1); - for (const auto & param : parameters->children) - { - String param_name = param->getColumnName(); /// TODO Сделать метод updateHashWith. - hash.update(param_name.data(), param_name.size() + 1); - } - hash.update(")", 1); - } - - hash.update("(", 1); - for (const auto & arg : arguments->children) - { - String arg_name = arg->getColumnName(); - hash.update(arg_name.data(), arg_name.size() + 1); - } - hash.update(")", 1); - - UInt64 low, high; - hash.get128(low, high); - - return toString(high) + "_" + toString(low); /// TODO hex. - } - /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Function_" + name; } + void updateHashWith(SipHash & hash) const override + { + hash.update("Function", strlen("Function") + 1); + hash.update(name.data(), name.size() + 1); + } + ASTPtr clone() const override { ASTFunction * res = new ASTFunction(*this); diff --git a/dbms/include/DB/Parsers/ASTIdentifier.h b/dbms/include/DB/Parsers/ASTIdentifier.h index 58ae38ca434..25ba2a19a33 100644 --- a/dbms/include/DB/Parsers/ASTIdentifier.h +++ b/dbms/include/DB/Parsers/ASTIdentifier.h @@ -35,6 +35,12 @@ public: /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Identifier_" + name; } + void updateHashWith(SipHash & hash) const override + { + hash.update("Identifier", strlen("Identifier") + 1); + hash.update(name.data(), name.size() + 1); + } + ASTPtr clone() const override { return new ASTIdentifier(*this); } void collectIdentifierNames(IdentifierNameSet & set) const override diff --git a/dbms/include/DB/Parsers/ASTInsertQuery.h b/dbms/include/DB/Parsers/ASTInsertQuery.h index 5e6988bcfc6..51728ecb5d1 100644 --- a/dbms/include/DB/Parsers/ASTInsertQuery.h +++ b/dbms/include/DB/Parsers/ASTInsertQuery.h @@ -26,10 +26,17 @@ public: ASTInsertQuery() = default; ASTInsertQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "InsertQuery_" + database + "_" + table; }; + void updateHashWith(SipHash & hash) const override + { + hash.update("InsertQuery", strlen("InsertQuery") + 1); + hash.update(database.data(), database.size() + 1); + hash.update(table.data(), table.size() + 1); + } + ASTPtr clone() const override { ASTInsertQuery * res = new ASTInsertQuery(*this); diff --git a/dbms/include/DB/Parsers/ASTJoin.h b/dbms/include/DB/Parsers/ASTJoin.h index 5809c8b4eb5..5aaee2528b4 100644 --- a/dbms/include/DB/Parsers/ASTJoin.h +++ b/dbms/include/DB/Parsers/ASTJoin.h @@ -69,6 +69,14 @@ public: return res; }; + void updateHashWith(SipHash & hash) const override + { + hash.update("Join", strlen("Join") + 1); + hash.update(reinterpret_cast(&locality), sizeof(locality)); + hash.update(reinterpret_cast(&strictness), sizeof(strictness)); + hash.update(reinterpret_cast(&kind), sizeof(kind)); + } + ASTPtr clone() const override { ASTJoin * res = new ASTJoin(*this); diff --git a/dbms/include/DB/Parsers/ASTLiteral.h b/dbms/include/DB/Parsers/ASTLiteral.h index b7b24ef56db..73004c8dfa3 100644 --- a/dbms/include/DB/Parsers/ASTLiteral.h +++ b/dbms/include/DB/Parsers/ASTLiteral.h @@ -25,6 +25,12 @@ public: /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Literal_" + apply_visitor(FieldVisitorDump(), value); } + void updateHashWith(SipHash & hash) const override + { + hash.update("Literal", strlen("Literal") + 1); + apply_visitor(FieldVisitorUpdateHash(hash), value); + } + ASTPtr clone() const override { return new ASTLiteral(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTNameTypePair.h b/dbms/include/DB/Parsers/ASTNameTypePair.h index 176c431cafa..87232c23e4c 100644 --- a/dbms/include/DB/Parsers/ASTNameTypePair.h +++ b/dbms/include/DB/Parsers/ASTNameTypePair.h @@ -23,6 +23,12 @@ public: /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "NameTypePair_" + name; } + void updateHashWith(SipHash & hash) const override + { + hash.update("NameTypePair", strlen("NameTypePair") + 1); + hash.update(name.data(), name.size() + 1); + } + ASTPtr clone() const override { ASTNameTypePair * res = new ASTNameTypePair(*this); diff --git a/dbms/include/DB/Parsers/ASTOptimizeQuery.h b/dbms/include/DB/Parsers/ASTOptimizeQuery.h index 906b3d1edb5..305ef4788e7 100644 --- a/dbms/include/DB/Parsers/ASTOptimizeQuery.h +++ b/dbms/include/DB/Parsers/ASTOptimizeQuery.h @@ -17,10 +17,17 @@ public: ASTOptimizeQuery() = default; ASTOptimizeQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "OptimizeQuery_" + database + "_" + table; }; + void updateHashWith(SipHash & hash) const override + { + hash.update("OptimizeQuery", strlen("OptimizeQuery") + 1); + hash.update(database.data(), database.size() + 1); + hash.update(table.data(), table.size() + 1); + } + ASTPtr clone() const override { return new ASTOptimizeQuery(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTOrderByElement.h b/dbms/include/DB/Parsers/ASTOrderByElement.h index f341265d93b..ba87eef3666 100644 --- a/dbms/include/DB/Parsers/ASTOrderByElement.h +++ b/dbms/include/DB/Parsers/ASTOrderByElement.h @@ -15,19 +15,24 @@ class ASTOrderByElement : public IAST { public: int direction; /// 1, если ASC, -1, если DESC - + /** Collator для locale-specific сортировки строк. * Если nullptr, то производится сортировка по байтам. */ Poco::SharedPtr collator; - + ASTOrderByElement() = default; ASTOrderByElement(const StringRange range_, const int direction_, const Poco::SharedPtr & collator_ = nullptr) : IAST(range_), direction(direction_), collator(collator_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "OrderByElement"; } + void updateHashWith(SipHash & hash) const override + { + hash.update("OrderByElement", strlen("OrderByElement") + 1); + } + ASTPtr clone() const override { return new ASTOrderByElement(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTQueryWithOutput.h b/dbms/include/DB/Parsers/ASTQueryWithOutput.h index 2b5723249f2..1ab8aecfa69 100644 --- a/dbms/include/DB/Parsers/ASTQueryWithOutput.h +++ b/dbms/include/DB/Parsers/ASTQueryWithOutput.h @@ -5,8 +5,8 @@ namespace DB { - - + + /** Запрос с секцией FORMAT. */ class ASTQueryWithOutput : public IAST @@ -28,6 +28,11 @@ public: \ Name(StringRange range_) : ASTQueryWithOutput(range_) {} \ String getID() const override { return ID; }; \ \ + void updateHashWith(SipHash & hash) const override \ + { \ + hash.update(ID, strlen(ID) + 1); \ + } \ + \ ASTPtr clone() const override \ { \ Name * res = new Name(*this); \ diff --git a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h index 32ebb1e528d..7dfd61ec1fc 100644 --- a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h @@ -6,8 +6,8 @@ namespace DB { - - + + /** Запрос с указанием названия таблицы и, возможно, БД и секцией FORMAT. */ class ASTQueryWithTableAndOutput : public ASTQueryWithOutput @@ -15,12 +15,12 @@ namespace DB public: String database; String table; - + ASTQueryWithTableAndOutput() = default; ASTQueryWithTableAndOutput(const StringRange range_) : ASTQueryWithOutput(range_) {} }; - - + + /// Объявляет класс-наследник ASTQueryWithTableAndOutput с реализованными методами getID и clone. #define DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(Name, ID) \ class Name : public ASTQueryWithTableAndOutput \ @@ -29,7 +29,14 @@ public: \ Name() = default; \ Name(const StringRange range_) : ASTQueryWithTableAndOutput(range_) {} \ String getID() const override { return ID"_" + database + "_" + table; }; \ - \ + \ + void updateHashWith(SipHash & hash) const override \ + { \ + hash.update(ID, strlen(ID) + 1); \ + hash.update(database.data(), database.size() + 1); \ + hash.update(table.data(), table.size() + 1); \ + } \ + \ ASTPtr clone() const override \ { \ Name * res = new Name(*this); \ diff --git a/dbms/include/DB/Parsers/ASTRenameQuery.h b/dbms/include/DB/Parsers/ASTRenameQuery.h index 4eb6624e4c3..4074a0a50e4 100644 --- a/dbms/include/DB/Parsers/ASTRenameQuery.h +++ b/dbms/include/DB/Parsers/ASTRenameQuery.h @@ -29,10 +29,15 @@ public: ASTRenameQuery() = default; ASTRenameQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Rename"; }; + void updateHashWith(SipHash & hash) const override + { + hash.update("Rename", strlen("Rename") + 1); + } + ASTPtr clone() const override { return new ASTRenameQuery(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTSelectQuery.h b/dbms/include/DB/Parsers/ASTSelectQuery.h index 20b2065310a..43aeb219a99 100644 --- a/dbms/include/DB/Parsers/ASTSelectQuery.h +++ b/dbms/include/DB/Parsers/ASTSelectQuery.h @@ -39,6 +39,11 @@ public: /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "SelectQuery"; }; + void updateHashWith(SipHash & hash) const override + { + hash.update("SelectQuery", strlen("SelectQuery") + 1); + } + /// Проверить наличие функции arrayJoin. (Не большого ARRAY JOIN.) static bool hasArrayJoin(const ASTPtr & ast) { diff --git a/dbms/include/DB/Parsers/ASTSet.h b/dbms/include/DB/Parsers/ASTSet.h index 18edddc999f..3761f0775f3 100644 --- a/dbms/include/DB/Parsers/ASTSet.h +++ b/dbms/include/DB/Parsers/ASTSet.h @@ -22,6 +22,11 @@ public: String getID() const override { return "Set_" + getColumnName(); } ASTPtr clone() const override { return new ASTSet(*this); } String getColumnName() const override { return column_name; } + + void updateHashWith(SipHash & hash) const override + { + hash.update("Set", strlen("Set") + 1); + } }; } diff --git a/dbms/include/DB/Parsers/ASTSetQuery.h b/dbms/include/DB/Parsers/ASTSetQuery.h index 0334d1167bb..12c0e9c85c1 100644 --- a/dbms/include/DB/Parsers/ASTSetQuery.h +++ b/dbms/include/DB/Parsers/ASTSetQuery.h @@ -26,9 +26,14 @@ public: ASTSetQuery() = default; ASTSetQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ - String getID() const override { return "Set"; }; + String getID() const override { return "SetQuery"; }; + + void updateHashWith(SipHash & hash) const override + { + hash.update("SetQuery", strlen("SetQuery") + 1); + } ASTPtr clone() const override { return new ASTSetQuery(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTShowTablesQuery.h b/dbms/include/DB/Parsers/ASTShowTablesQuery.h index 4b51b8f1aba..671716d7202 100644 --- a/dbms/include/DB/Parsers/ASTShowTablesQuery.h +++ b/dbms/include/DB/Parsers/ASTShowTablesQuery.h @@ -20,9 +20,14 @@ public: ASTShowTablesQuery() = default; ASTShowTablesQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ - String getID() const override { return "ShowTables"; }; + String getID() const override { return "ShowTablesQuery"; }; + + void updateHashWith(SipHash & hash) const override + { + hash.update("ShowTablesQuery", strlen("ShowTablesQuery") + 1); + } ASTPtr clone() const override { @@ -30,13 +35,13 @@ public: ASTPtr ptr{res}; res->children.clear(); - + if (format) { res->format = format->clone(); res->children.push_back(res->format); } - + return ptr; } }; diff --git a/dbms/include/DB/Parsers/ASTSubquery.h b/dbms/include/DB/Parsers/ASTSubquery.h index f94cbfe8346..f3cefd87ea0 100644 --- a/dbms/include/DB/Parsers/ASTSubquery.h +++ b/dbms/include/DB/Parsers/ASTSubquery.h @@ -16,10 +16,15 @@ class ASTSubquery : public IAST public: ASTSubquery() = default; ASTSubquery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Subquery"; } + void updateHashWith(SipHash & hash) const override + { + hash.update("Subquery", strlen("Subquery") + 1); + } + ASTPtr clone() const override { const auto res = new ASTSubquery{*this}; @@ -33,7 +38,11 @@ public: return ptr; } - String getColumnName() const override { return getTreeID(); } + String getColumnName() const override + { + auto id = getTreeID(); + return toString(id.first) + "_" + toString(id.second); + } }; } diff --git a/dbms/include/DB/Parsers/ASTUseQuery.h b/dbms/include/DB/Parsers/ASTUseQuery.h index eafe3496293..6a2d60353e5 100644 --- a/dbms/include/DB/Parsers/ASTUseQuery.h +++ b/dbms/include/DB/Parsers/ASTUseQuery.h @@ -16,10 +16,16 @@ public: ASTUseQuery() = default; ASTUseQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "UseQuery_" + database; }; + void updateHashWith(SipHash & hash) const override + { + hash.update("UseQuery", strlen("UseQuery") + 1); + hash.update(database.data(), database.size() + 1); + } + ASTPtr clone() const override { return new ASTUseQuery(*this); } }; diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index 803cc9eb31f..d81b8156a4b 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -9,8 +9,10 @@ #include #include +#include #include #include +#include #include #include @@ -43,15 +45,67 @@ public: IAST(const StringRange range_) : range(range_) {} virtual ~IAST() = default; - /** Получить каноническое имя столбца, если элемент является столбцом */ - virtual String getColumnName() const { throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::NOT_A_COLUMN); } + /** Получить имя, однозначно идентифицирующее выражение, если элемент является столбцом. У одинаковых выражений будет одинаковое имя. */ + virtual String getColumnName() const + { + /// По-умолчанию - подчёркивание, а затем getTreeID в hex-е. - /** Получить алиас, если он есть, или каноническое имя столбца, если его нет. */ + union + { + UInt128 id; + UInt8 id_bytes[16]; + }; + + id = getTreeID(); + String res(1 + 2 * sizeof(id), '_'); + + for (size_t i = 0; i < sizeof(id); ++i) + { + res[i * 2 + 1] = (id_bytes[i] / 16) < 10 ? ('0' + (id_bytes[i] / 16)) : ('A' + (id_bytes[i] / 16 - 10)); + res[i * 2 + 2] = (id_bytes[i] % 16) < 10 ? ('0' + (id_bytes[i] % 16)) : ('A' + (id_bytes[i] % 16 - 10)); + } + + return res; + } + + /** Получить алиас, если он есть, или имя столбца, если его нет. */ virtual String getAliasOrColumnName() const { return getColumnName(); } /** Получить алиас, если он есть, или пустую строку, если его нет, или если элемент не поддерживает алиасы. */ virtual String tryGetAlias() const { return String(); } + /** Обновить состояние хэш-функции элементом дерева. */ + virtual void updateHashWith(SipHash & hash) const = 0; + + /** Обновить состояние хэш-функции целым поддеревом. Используется для склейки одинаковых выражений. */ + void updateHashWithTree(SipHash & hash) const + { + updateHashWith(hash); + + if (!children.empty()) + { + size_t size = children.size(); + hash.update(reinterpret_cast(&size), sizeof(size)); + + for (size_t i = 0; i < size; ++i) + { + hash.update(reinterpret_cast(&i), sizeof(i)); + children[i]->updateHashWithTree(hash); + } + } + } + + /** Получить идентификатор поддерева. Используется для склейки одинаковых выражений. + */ + UInt128 getTreeID() const + { + SipHash hash; + updateHashWithTree(hash); + UInt128 res; + hash.get128(reinterpret_cast(&res)); + return res; + } + /** Установить алиас. */ virtual void setAlias(const String & to) { @@ -71,37 +125,6 @@ public: (*it)->is_visited = false; } - /** Получить текст, который идентифицирует этот элемент и всё поддерево. - * Обычно он содержит идентификатор элемента и getTreeID от всех детей. - */ - String getTreeID() const - { - std::stringstream s; - s << getID(); - - if (!children.empty()) - { - s << "("; - for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) - { - if (it != children.begin()) - s << ", "; - s << (*it)->getTreeID(); - } - s << ")"; - } - - return s.str(); - } - - void dumpTree(std::ostream & ostr, size_t indent = 0) const - { - String indent_str(indent, '-'); - ostr << indent_str << getID() << ", " << this << std::endl; - for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) - (*it)->dumpTree(ostr, indent + 1); - } - /** Проверить глубину дерева. * Если задано max_depth и глубина больше - кинуть исключение. * Возвращает глубину дерева. @@ -116,8 +139,8 @@ public: size_t checkSize(size_t max_size) const { size_t res = 1; - for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) - res += (*it)->checkSize(max_size); + for (const auto & ast : children) + res += ast->checkSize(max_size); if (res > max_size) throw Exception("AST is too big. Maximum: " + toString(max_size), ErrorCodes::TOO_BIG_AST); @@ -126,22 +149,22 @@ public: } /** Получить set из имен индентификаторов - */ + */ virtual void collectIdentifierNames(IdentifierNameSet & set) const { - for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) - (*it)->collectIdentifierNames(set); + for (const auto & ast : children) + ast->collectIdentifierNames(set); } private: size_t checkDepthImpl(size_t max_depth, size_t level) const { size_t res = level + 1; - for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) + for (const auto & ast : children) { if (level >= max_depth) throw Exception("AST is too deep. Maximum: " + toString(max_depth), ErrorCodes::TOO_DEEP_AST); - res = std::max(res, (*it)->checkDepthImpl(max_depth, level + 1)); + res = std::max(res, ast->checkDepthImpl(max_depth, level + 1)); } return res; diff --git a/dbms/src/DataStreams/tests/filter_stream.cpp b/dbms/src/DataStreams/tests/filter_stream.cpp index 2893d16b5ec..639a434880a 100644 --- a/dbms/src/DataStreams/tests/filter_stream.cpp +++ b/dbms/src/DataStreams/tests/filter_stream.cpp @@ -41,7 +41,6 @@ int main(int argc, char ** argv) formatAST(*ast, std::cerr); std::cerr << std::endl; - std::cerr << ast->getTreeID() << std::endl; Context context; context.getColumns().push_back(NameAndTypePair("number", new DataTypeUInt64)); diff --git a/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp b/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp index b383715a0a1..4f2b58c3843 100644 --- a/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp +++ b/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp @@ -106,7 +106,6 @@ int main(int argc, char ** argv) formatAST(*ast, std::cerr); std::cerr << std::endl; - std::cerr << ast->getTreeID() << std::endl; /// создаём объект существующей таблицы хит лога diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index cead1d2ac8d..33ec5ce5797 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1653,10 +1653,43 @@ void ExpressionAnalyzer::appendProjectResult(DB::ExpressionActionsChain & chain, NamesWithAliases result_columns; ASTs asts = select_query->select_expression_list->children; - for (size_t i = 0; i < asts.size(); ++i) + + /// Выбор имён для столбцов результата. + size_t i = 1; + for (const auto & ast : asts) { - result_columns.emplace_back(asts[i]->getColumnName(), asts[i]->getAliasOrColumnName()); - step.required_output.push_back(result_columns.back().second); + String source_column_name = ast->getColumnName(); + String result_column_name = ast->tryGetAlias(); + + /// Если не задан алиас - нужно сгенерировать какое-нибудь имя автоматически. + if (result_column_name.empty()) + { + if (typeid_cast(ast.get()) || typeid_cast(ast.get())) + { + /// Если выражение простое, то будем использовать его имя. + result_column_name = source_column_name; + } + else if (auto func = typeid_cast(ast.get())) + { + /// Для функций используем имя вида _1_func, где func - имя функции. + WriteBufferFromString wb(result_column_name); + writeChar('_', wb); + writeIntText(i, wb); + writeChar('_', wb); + writeString(func->name, wb); + } + else + { + /// Если выражение сложное и для него не задан алиас, будем использовать имя вида _1, _2, ... + WriteBufferFromString wb(result_column_name); + writeChar('_', wb); + writeIntText(i, wb); + } + } + + result_columns.emplace_back(source_column_name, result_column_name); + step.required_output.emplace_back(result_columns.back().second); + ++i; } step.actions->add(ExpressionAction::project(result_columns)); diff --git a/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp b/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp index bf3fb1a274e..637ec65ddf0 100644 --- a/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -12,7 +12,7 @@ namespace DB { -LogicalExpressionsOptimizer::OrWithExpression::OrWithExpression(ASTFunction * or_function_, const std::string & expression_) +LogicalExpressionsOptimizer::OrWithExpression::OrWithExpression(ASTFunction * or_function_, UInt128 expression_) : or_function(or_function_), expression(expression_) { } diff --git a/dbms/src/Parsers/tests/select_parser.cpp b/dbms/src/Parsers/tests/select_parser.cpp index 690cf27a391..22ff2d4770e 100644 --- a/dbms/src/Parsers/tests/select_parser.cpp +++ b/dbms/src/Parsers/tests/select_parser.cpp @@ -27,7 +27,6 @@ int main(int argc, char ** argv) std::cout << "Success." << std::endl; formatAST(*ast, std::cerr); std::cout << std::endl; - std::cout << std::endl << ast->getTreeID() << std::endl; return 0; } From fa78aa633fa13b8b7dbadd8273a522214a9506b2 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 30 Apr 2015 15:43:16 +0300 Subject: [PATCH 011/116] Merge --- dbms/include/DB/Interpreters/Cluster.h | 15 +- dbms/include/DB/Interpreters/Context.h | 2 + .../DB/Storages/StorageSystemClusters.h | 41 ++++++ dbms/src/Interpreters/Cluster.cpp | 14 +- dbms/src/Interpreters/Context.cpp | 6 + dbms/src/Server/Server.cpp | 2 + dbms/src/Storages/StorageSystemClusters.cpp | 131 ++++++++++++++++++ 7 files changed, 205 insertions(+), 6 deletions(-) create mode 100644 dbms/include/DB/Storages/StorageSystemClusters.h create mode 100644 dbms/src/Storages/StorageSystemClusters.cpp diff --git a/dbms/include/DB/Interpreters/Cluster.h b/dbms/include/DB/Interpreters/Cluster.h index d3b28e07a2c..ccf3b223d41 100644 --- a/dbms/include/DB/Interpreters/Cluster.h +++ b/dbms/include/DB/Interpreters/Cluster.h @@ -26,6 +26,10 @@ public: /// к локальным узлам обращаемся напрямую size_t getLocalNodesNum() const { return local_nodes_num; } + /// используеться для выставления ограничения на размер таймаута + static Poco::Timespan saturate(const Poco::Timespan & v, const Poco::Timespan & limit); + +public: /// Соединения с удалёнными серверами. ConnectionPools pools; @@ -33,15 +37,13 @@ public: { /// contains names of directories for asynchronous write to StorageDistributed std::vector dir_names; + UInt32 shard_num; int weight; size_t num_local_nodes; }; std::vector shard_info_vec; std::vector slot_to_shard; - /// используеться для выставления ограничения на размер таймаута - static Poco::Timespan saturate(const Poco::Timespan & v, const Poco::Timespan & limit); - struct Address { /** В конфиге адреса либо находятся в узлах : @@ -63,6 +65,7 @@ public: Poco::Net::SocketAddress host_port; String user; String password; + UInt32 replica_num; Address(const String & config_prefix); Address(const String & host_port_, const String & user_, const String & password_); @@ -71,12 +74,18 @@ public: private: static bool isLocal(const Address & address); +public: /// Массив шардов. Каждый шард - адреса одного сервера. typedef std::vector
Addresses; /// Массив шардов. Для каждого шарда - массив адресов реплик (серверов, считающихся идентичными). typedef std::vector AddressesWithFailover; +public: + const Addresses & getShardsInfo() const { return addresses; } + const AddressesWithFailover & getShardsWithFailoverInfo() const { return addresses_with_failover; } + +private: Addresses addresses; AddressesWithFailover addresses_with_failover; diff --git a/dbms/include/DB/Interpreters/Context.h b/dbms/include/DB/Interpreters/Context.h index 8fb950f29a8..2fb66570878 100644 --- a/dbms/include/DB/Interpreters/Context.h +++ b/dbms/include/DB/Interpreters/Context.h @@ -39,6 +39,7 @@ class ProcessList; class ProcessListElement; class Macros; class Progress; +class Clusters; /// имя таблицы -> таблица @@ -238,6 +239,7 @@ public: void initClusters(); Cluster & getCluster(const std::string & cluster_name); + Poco::SharedPtr getClusters() const; Compiler & getCompiler(); diff --git a/dbms/include/DB/Storages/StorageSystemClusters.h b/dbms/include/DB/Storages/StorageSystemClusters.h new file mode 100644 index 00000000000..b024e7b7441 --- /dev/null +++ b/dbms/include/DB/Storages/StorageSystemClusters.h @@ -0,0 +1,41 @@ +#pragma once + +#include + +namespace DB +{ + +class Context; + +/** Реализует системную таблицу columns, которая позволяет получить информацию + * о столбцах каждой таблицы для всех баз данных. + */ +class StorageSystemClusters : public IStorage +{ +public: + StorageSystemClusters(const std::string & name_, Context & context_); + static StoragePtr create(const std::string & name_, Context & context_); + + std::string getName() const override { return "SystemColumns"; } + std::string getTableName() const override { return name; } + const NamesAndTypesList & getColumnsListImpl() const override { return columns; } + + BlockInputStreams read( + const Names & column_names, + ASTPtr query, + const Context & context, + const Settings & settings, + QueryProcessingStage::Enum & processed_stage, + size_t max_block_size = DEFAULT_BLOCK_SIZE, + unsigned threads = 1) override; + +private: + StorageSystemClusters(const std::string & name_); + +private: + const std::string name; + NamesAndTypesList columns; + Context & context; +}; + +} diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp index 7da53e13f85..c2341791a22 100644 --- a/dbms/src/Interpreters/Cluster.cpp +++ b/dbms/src/Interpreters/Cluster.cpp @@ -68,6 +68,8 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa const auto & config_prefix = cluster_name + "."; + UInt32 current_shard_num = 0; + for (auto it = config_keys.begin(); it != config_keys.end(); ++it) { if (0 == strncmp(it->c_str(), "node", strlen("node"))) @@ -78,12 +80,13 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa continue; addresses.emplace_back(prefix); + addresses.back().replica_num = 0; slot_to_shard.insert(std::end(slot_to_shard), weight, shard_info_vec.size()); if (const auto is_local = isLocal(addresses.back())) - shard_info_vec.push_back({{}, weight, is_local}); + shard_info_vec.push_back({{}, current_shard_num, weight, is_local}); else - shard_info_vec.push_back({{addressToDirName(addresses.back())}, weight, is_local}); + shard_info_vec.push_back({{addressToDirName(addresses.back())}, current_shard_num, weight, is_local}); } else if (0 == strncmp(it->c_str(), "shard", strlen("shard"))) { @@ -92,6 +95,7 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa addresses_with_failover.emplace_back(); Addresses & replica_addresses = addresses_with_failover.back(); + UInt32 current_replica_num = 0; const auto & partial_prefix = config_prefix + *it + "."; const auto weight = config.getInt(partial_prefix + ".weight", 1); @@ -116,6 +120,8 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa if (0 == strncmp(jt->c_str(), "replica", strlen("replica"))) { replica_addresses.emplace_back(partial_prefix + *jt); + replica_addresses.back().replica_num = current_replica_num; + ++current_replica_num; if (isLocal(replica_addresses.back())) { @@ -142,10 +148,12 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa } slot_to_shard.insert(std::end(slot_to_shard), weight, shard_info_vec.size()); - shard_info_vec.push_back({std::move(dir_names), weight, num_local_nodes}); + shard_info_vec.push_back({std::move(dir_names), current_shard_num, weight, num_local_nodes}); } else throw Exception("Unknown element in config: " + *it, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + + ++current_shard_num; } if (!addresses_with_failover.empty() && !addresses.empty()) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index ae247af2730..5910ab3be40 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -815,6 +815,12 @@ Cluster & Context::getCluster(const std::string & cluster_name) throw Poco::Exception("Failed to find cluster with name = " + cluster_name); } +Poco::SharedPtr Context::getClusters() const +{ + if (!shared->clusters) + throw Poco::Exception("Clusters have not been initialized yet."); + return shared->clusters; +} Compiler & Context::getCompiler() { diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 71be3db58d5..dd86f4d5116 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -541,6 +542,7 @@ int Server::main(const std::vector & args) global_context->addTable("system", "dictionaries", StorageSystemDictionaries::create("dictionaries")); global_context->addTable("system", "columns", StorageSystemColumns::create("columns")); global_context->addTable("system", "functions", StorageSystemFunctions::create("functions")); + global_context->addTable("system", "clusters", StorageSystemClusters::create("clusters", *global_context)); if (has_zookeeper) global_context->addTable("system", "zookeeper", StorageSystemZooKeeper::create("zookeeper")); diff --git a/dbms/src/Storages/StorageSystemClusters.cpp b/dbms/src/Storages/StorageSystemClusters.cpp new file mode 100644 index 00000000000..52e665d4062 --- /dev/null +++ b/dbms/src/Storages/StorageSystemClusters.cpp @@ -0,0 +1,131 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +StorageSystemClusters::StorageSystemClusters(const std::string & name_, Context & context_) + : name(name_) + , columns{ + { "cluster", new DataTypeString }, + { "shard_num", new DataTypeUInt32 }, + { "shard_weight", new DataTypeUInt32 }, + { "replica_num", new DataTypeUInt32 }, + { "host_name", new DataTypeString }, + { "host_address", new DataTypeString }, + { "port", new DataTypeUInt16 }, + { "user", new DataTypeString } + } + , context(context_) +{ +} + +StoragePtr StorageSystemClusters::create(const std::string & name_, Context & context_) +{ + context_.initClusters(); + return (new StorageSystemClusters{name_, context_})->thisPtr(); +} + +BlockInputStreams StorageSystemClusters::read( + const Names & column_names, + ASTPtr query, + const Context & context_, + const Settings & settings, + QueryProcessingStage::Enum & processed_stage, + const size_t max_block_size, + const unsigned threads) +{ + check(column_names); + processed_stage = QueryProcessingStage::FetchColumns; + + ColumnPtr cluster_column = new ColumnString; + ColumnPtr shard_num_column = new ColumnUInt32; + ColumnPtr shard_weight_column = new ColumnUInt32; + ColumnPtr replica_num_column = new ColumnUInt32; + ColumnPtr host_name_column = new ColumnString; + ColumnPtr host_address_column = new ColumnString; + ColumnPtr port_column = new ColumnUInt16; + ColumnPtr user_column = new ColumnString; + + auto updateColumns = [&](const std::string & cluster_name, const Cluster::ShardInfo & shard_info, + const Cluster::Address & address) + { + cluster_column->insert(cluster_name); + shard_num_column->insert(static_cast(shard_info.shard_num)); + shard_weight_column->insert(static_cast(shard_info.weight)); + replica_num_column->insert(static_cast(address.replica_num)); + + const std::string & source = address.host_port.host().toString(); + const auto host_entry = Poco::Net::DNS::resolve(source); + host_name_column->insert(host_entry.name()); + host_address_column->insert(host_entry.addresses()[0].toString()); + + port_column->insert(static_cast(address.host_port.port())); + user_column->insert(address.user); + }; + + const auto & clusters = context.getClusters(); + for (const auto & entry : clusters->impl) + { + const std::string cluster_name = entry.first; + const Cluster & cluster = entry.second; + const auto & addresses = cluster.getShardsInfo(); + const auto & addresses_with_failover = cluster.getShardsWithFailoverInfo(); + const auto & shards_info = cluster.shard_info_vec; + + if (!addresses.empty()) + { + auto it1 = addresses.cbegin(); + auto it2 = shards_info.cbegin(); + + while (it1 != addresses.cend()) + { + const auto & address = *it1; + const auto & shard_info = *it2; + + updateColumns(cluster_name, shard_info, address); + + ++it1; + ++it2; + } + } + else if (!addresses_with_failover.empty()) + { + auto it1 = addresses_with_failover.cbegin(); + auto it2 = shards_info.cbegin(); + + while (it1 != addresses_with_failover.cend()) + { + const auto & addresses = *it1; + const auto & shard_info = *it2; + + for (const auto & address : addresses) + updateColumns(cluster_name, shard_info, address); + + ++it1; + ++it2; + } + } + } + + Block block; + + block.insert(ColumnWithNameAndType(cluster_column, new DataTypeString, "cluster")); + block.insert(ColumnWithNameAndType(shard_num_column, new DataTypeUInt32, "shard_num")); + block.insert(ColumnWithNameAndType(shard_weight_column, new DataTypeUInt32, "shard_weight")); + block.insert(ColumnWithNameAndType(replica_num_column, new DataTypeUInt32, "replica_num")); + block.insert(ColumnWithNameAndType(host_name_column, new DataTypeString, "host_name")); + block.insert(ColumnWithNameAndType(host_address_column, new DataTypeString, "host_address")); + block.insert(ColumnWithNameAndType(port_column, new DataTypeUInt16, "port")); + block.insert(ColumnWithNameAndType(user_column, new DataTypeString, "user")); + + return BlockInputStreams{ 1, new OneBlockInputStream(block) }; +} + +} From 14823c82a59d01e4c51e8c72c8deb0768b074f37 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 30 Apr 2015 16:00:56 +0300 Subject: [PATCH 012/116] dbms: Server: Added comment. [#METR-15354] --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 4fd289d3332..9b47fc9b101 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -610,6 +610,8 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(BlockInpu interpreter_subquery = new InterpreterSelectQuery( query.table, subquery_context, required_columns, QueryProcessingStage::Complete, subquery_depth + 1); + + /// Если во внешнем запросе есть аггрегация, то WITH TOTALS игнорируется в подзапросе. if (query_analyzer->hasAggregation()) interpreter_subquery->ignoreWithTotals(); } From fe683b8c78319cc83eafb9c6443fe1fe0e406826 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 30 Apr 2015 16:25:34 +0300 Subject: [PATCH 013/116] dbms: Server: Remove unneeded event statistics. [#METR-15090] --- dbms/src/IO/WriteBufferAIO.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/IO/WriteBufferAIO.cpp b/dbms/src/IO/WriteBufferAIO.cpp index a21577c8bea..7465ca37948 100644 --- a/dbms/src/IO/WriteBufferAIO.cpp +++ b/dbms/src/IO/WriteBufferAIO.cpp @@ -35,8 +35,6 @@ WriteBufferAIO::WriteBufferAIO(const std::string & filename_, size_t buffer_size throwFromErrno("Cannot open file " + filename, error_code); } - ProfileEvents::increment(ProfileEvents::FileOpen); - ::memset(&request, 0, sizeof(request)); } From 47b9cd0dd67c168424302a99a7f101510ae153de Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 30 Apr 2015 18:04:42 +0300 Subject: [PATCH 014/116] dbms: Server: Small cosmetic cleanup. [#METR-15090] --- dbms/include/DB/IO/WriteBufferAIO.h | 3 ++- dbms/include/DB/IO/WriteBufferFromFileBase.h | 4 +++- .../DB/IO/WriteBufferFromFileDescriptor.h | 14 +++++++------- dbms/src/IO/WriteBufferAIO.cpp | 18 +++++++++--------- dbms/src/IO/WriteBufferFromFileBase.cpp | 5 +++++ 5 files changed, 26 insertions(+), 18 deletions(-) diff --git a/dbms/include/DB/IO/WriteBufferAIO.h b/dbms/include/DB/IO/WriteBufferAIO.h index fc238f5d6ce..08a6ff67cb8 100644 --- a/dbms/include/DB/IO/WriteBufferAIO.h +++ b/dbms/include/DB/IO/WriteBufferAIO.h @@ -26,7 +26,6 @@ public: WriteBufferAIO & operator=(const WriteBufferAIO &) = delete; off_t getPositionInFile() override; - void truncate(off_t length = 0) override; void sync() override; std::string getFileName() const override { return filename; } int getFD() const override { return fd; } @@ -36,6 +35,8 @@ private: void nextImpl() override; /// off_t doSeek(off_t off, int whence) override; + /// + void doTruncate(off_t length) override; /// Если в буфере ещё остались данные - запишем их. void flush(); /// Ждать окончания текущей асинхронной задачи. diff --git a/dbms/include/DB/IO/WriteBufferFromFileBase.h b/dbms/include/DB/IO/WriteBufferFromFileBase.h index badfef29739..e054e5e8e16 100644 --- a/dbms/include/DB/IO/WriteBufferFromFileBase.h +++ b/dbms/include/DB/IO/WriteBufferFromFileBase.h @@ -14,15 +14,17 @@ class WriteBufferFromFileBase : public BufferWithOwnMemory public: WriteBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment); virtual ~WriteBufferFromFileBase(); + off_t seek(off_t off, int whence = SEEK_SET); + void truncate(off_t length = 0); virtual off_t getPositionInFile() = 0; - virtual void truncate(off_t length) = 0; virtual void sync() = 0; virtual std::string getFileName() const = 0; virtual int getFD() const = 0; protected: virtual off_t doSeek(off_t off, int whence) = 0; + virtual void doTruncate(off_t length) = 0; }; } diff --git a/dbms/include/DB/IO/WriteBufferFromFileDescriptor.h b/dbms/include/DB/IO/WriteBufferFromFileDescriptor.h index 357ec9ad504..9b3e5abc3b0 100644 --- a/dbms/include/DB/IO/WriteBufferFromFileDescriptor.h +++ b/dbms/include/DB/IO/WriteBufferFromFileDescriptor.h @@ -85,13 +85,6 @@ public: return seek(0, SEEK_CUR); } - void truncate(off_t length = 0) override - { - int res = ftruncate(fd, length); - if (-1 == res) - throwFromErrno("Cannot truncate file " + getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE); - } - void sync() override { /// Если в буфере ещё остались данные - запишем их. @@ -111,6 +104,13 @@ private: throwFromErrno("Cannot seek through file " + getFileName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); return res; } + + void doTruncate(off_t length) override + { + int res = ftruncate(fd, length); + if (-1 == res) + throwFromErrno("Cannot truncate file " + getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE); + } }; } diff --git a/dbms/src/IO/WriteBufferAIO.cpp b/dbms/src/IO/WriteBufferAIO.cpp index 7465ca37948..78f892ecad1 100644 --- a/dbms/src/IO/WriteBufferAIO.cpp +++ b/dbms/src/IO/WriteBufferAIO.cpp @@ -61,15 +61,6 @@ off_t WriteBufferAIO::getPositionInFile() return seek(0, SEEK_CUR); } -void WriteBufferAIO::truncate(off_t length) -{ - flush(); - - int res = ::ftruncate(fd, length); - if (res == -1) - throwFromErrno("Cannot truncate file " + filename, ErrorCodes::CANNOT_TRUNCATE_FILE); -} - void WriteBufferAIO::sync() { flush(); @@ -140,6 +131,15 @@ off_t WriteBufferAIO::doSeek(off_t off, int whence) return pos_in_file; } +void WriteBufferAIO::doTruncate(off_t length) +{ + flush(); + + int res = ::ftruncate(fd, length); + if (res == -1) + throwFromErrno("Cannot truncate file " + filename, ErrorCodes::CANNOT_TRUNCATE_FILE); +} + void WriteBufferAIO::flush() { next(); diff --git a/dbms/src/IO/WriteBufferFromFileBase.cpp b/dbms/src/IO/WriteBufferFromFileBase.cpp index fec26327c32..e85530ebbea 100644 --- a/dbms/src/IO/WriteBufferFromFileBase.cpp +++ b/dbms/src/IO/WriteBufferFromFileBase.cpp @@ -17,4 +17,9 @@ off_t WriteBufferFromFileBase::seek(off_t off, int whence) return doSeek(off, whence); } +void WriteBufferFromFileBase::truncate(off_t length) +{ + return doTruncate(length); +} + } From 2f2c7869f643d2651f0cdd3c8ff22228c7e8364f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 May 2015 11:06:15 +0300 Subject: [PATCH 015/116] dbms: fixed error [#METR-16164]. --- dbms/include/DB/Columns/ColumnConst.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/include/DB/Columns/ColumnConst.h b/dbms/include/DB/Columns/ColumnConst.h index 3b30d53f0f1..3286f707e7e 100644 --- a/dbms/include/DB/Columns/ColumnConst.h +++ b/dbms/include/DB/Columns/ColumnConst.h @@ -41,14 +41,14 @@ public: bool isNumeric() const override { return IsNumber::value; } bool isFixed() const override { return IsNumber::value; } size_t sizeOfField() const override { return sizeof(T); } - ColumnPtr cloneResized(size_t s_) const override { return new ColumnConst(s_, data); } + ColumnPtr cloneResized(size_t s_) const override { return new ColumnConst(s_, data, data_type); } size_t size() const override { return s; } Field operator[](size_t n) const override { return FieldType(data); } void get(size_t n, Field & res) const override { res = FieldType(data); } ColumnPtr cut(size_t start, size_t length) const override { - return new ColumnConst(length, data, data_type); + return new ColumnConst(length, data, data_type); } void insert(const Field & x) override @@ -79,7 +79,7 @@ public: if (s != filt.size()) throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - return new ColumnConst(countBytesInFilter(filt), data, data_type); + return new ColumnConst(countBytesInFilter(filt), data, data_type); } ColumnPtr replicate(const Offsets_t & offsets) const override @@ -88,7 +88,7 @@ public: throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); size_t replicated_size = 0 == s ? 0 : offsets.back(); - return new ColumnConst(replicated_size, data, data_type); + return new ColumnConst(replicated_size, data, data_type); } size_t byteSize() const override { return sizeof(data) + sizeof(s); } @@ -103,7 +103,7 @@ public: if (perm.size() < limit) throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - return new ColumnConst(limit, data, data_type); + return new ColumnConst(limit, data, data_type); } int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override From cb562a1d97e6540e68b1cc88b7f70901fb7f4d8c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 May 2015 11:07:31 +0300 Subject: [PATCH 016/116] dbms: don't calculate constant expressions for every block [#METR-16164]. --- dbms/src/Interpreters/ExpressionActions.cpp | 23 +++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index b710bb03dc1..fbfdd349381 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -327,7 +327,7 @@ void ExpressionAction::execute(Block & block) const break; case ADD_COLUMN: - block.insert(ColumnWithNameAndType(added_column->cloneResized(block.rows()), result_type, result_name)); + block.insert(ColumnWithNameAndType(added_column->cloneResized(block.rowsInFirstColumn()), result_type, result_name)); break; case COPY_COLUMN: @@ -608,6 +608,8 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum void ExpressionActions::finalize(const Names & output_columns) { +// std::cerr << "finalize\n"; + NameSet final_columns; for (size_t i = 0; i < output_columns.size(); ++i) { @@ -629,7 +631,7 @@ void ExpressionActions::finalize(const Names & output_columns) unmodified_columns.insert(it->name); } - /// Будем идти с конца и поодерживать множество нужных на данном этапе столбцов. + /// Будем идти с конца и поддерживать множество нужных на данном этапе столбцов. /// Будем выбрасывать ненужные действия, хотя обычно их нет по построению. for (int i = static_cast(actions.size()) - 1; i >= 0; --i) { @@ -691,6 +693,23 @@ void ExpressionActions::finalize(const Names & output_columns) unmodified_columns.erase(out); needed_columns.erase(out); + + /** Если функция - константное выражение, то заменим действие на добавление столбца-константы - результата. + * То есть, осуществляем constant folding. + */ + if (action.type == ExpressionAction::APPLY_FUNCTION && sample_block.has(out)) + { + auto & result = sample_block.getByName(out); + if (!result.column.isNull()) + { + action.type = ExpressionAction::ADD_COLUMN; + action.result_type = result.type; + action.added_column = result.column; + action.function = nullptr; + action.argument_names.clear(); + in.clear(); + } + } } needed_columns.insert(in.begin(), in.end()); From 3b5fe94b82a93c594d69e66335175649cc63ee33 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 May 2015 11:35:50 +0300 Subject: [PATCH 017/116] dbms: fixed indent [#METR-16164]. --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 112 +++++++++---------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 33ec5ce5797..ff2c3e27afc 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -809,76 +809,76 @@ void ExpressionAnalyzer::makeSet(ASTFunction * node, const Block & sample_block) /// Случай явного перечисления значений. void ExpressionAnalyzer::makeExplicitSet(ASTFunction * node, const Block & sample_block, bool create_ordered_set) { - IAST & args = *node->arguments; - ASTPtr & arg = args.children.at(1); + IAST & args = *node->arguments; + ASTPtr & arg = args.children.at(1); - DataTypes set_element_types; - ASTPtr & left_arg = args.children.at(0); + DataTypes set_element_types; + ASTPtr & left_arg = args.children.at(0); - ASTFunction * left_arg_tuple = typeid_cast(&*left_arg); + ASTFunction * left_arg_tuple = typeid_cast(&*left_arg); - if (left_arg_tuple && left_arg_tuple->name == "tuple") + if (left_arg_tuple && left_arg_tuple->name == "tuple") + { + for (const auto & arg : left_arg_tuple->arguments->children) { - for (const auto & arg : left_arg_tuple->arguments->children) - { - const auto & data_type = sample_block.getByName(arg->getColumnName()).type; + const auto & data_type = sample_block.getByName(arg->getColumnName()).type; - /// @note prevent crash in query: SELECT (1, [1]) in (1, 1) - if (const auto array = typeid_cast(data_type.get())) - throw Exception("Incorrect element of tuple: " + array->getName(), ErrorCodes::INCORRECT_ELEMENT_OF_SET); + /// @note prevent crash in query: SELECT (1, [1]) in (1, 1) + if (const auto array = typeid_cast(data_type.get())) + throw Exception("Incorrect element of tuple: " + array->getName(), ErrorCodes::INCORRECT_ELEMENT_OF_SET); - set_element_types.push_back(data_type); - } + set_element_types.push_back(data_type); } + } + else + { + DataTypePtr left_type = sample_block.getByName(left_arg->getColumnName()).type; + if (DataTypeArray * array_type = typeid_cast(&*left_type)) + set_element_types.push_back(array_type->getNestedType()); else - { - DataTypePtr left_type = sample_block.getByName(left_arg->getColumnName()).type; - if (DataTypeArray * array_type = typeid_cast(&*left_type)) - set_element_types.push_back(array_type->getNestedType()); - else - set_element_types.push_back(left_type); - } + set_element_types.push_back(left_type); + } - /// Отличим случай x in (1, 2) от случая x in 1 (он же x in (1)). - bool single_value = false; - ASTPtr elements_ast = arg; + /// Отличим случай x in (1, 2) от случая x in 1 (он же x in (1)). + bool single_value = false; + ASTPtr elements_ast = arg; - if (ASTFunction * set_func = typeid_cast(&*arg)) - { - if (set_func->name != "tuple") - throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - /// Отличм случай (x, y) in ((1, 2), (3, 4)) от случая (x, y) in (1, 2). - ASTFunction * any_element = typeid_cast(&*set_func->arguments->children.at(0)); - if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple")) - single_value = true; - else - elements_ast = set_func->arguments; - } - else if (typeid_cast(&*arg)) - { - single_value = true; - } - else - { + if (ASTFunction * set_func = typeid_cast(&*arg)) + { + if (set_func->name != "tuple") throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - if (single_value) - { - ASTPtr exp_list = new ASTExpressionList; - exp_list->children.push_back(elements_ast); - elements_ast = exp_list; - } + /// Отличм случай (x, y) in ((1, 2), (3, 4)) от случая (x, y) in (1, 2). + ASTFunction * any_element = typeid_cast(&*set_func->arguments->children.at(0)); + if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple")) + single_value = true; + else + elements_ast = set_func->arguments; + } + else if (typeid_cast(&*arg)) + { + single_value = true; + } + else + { + throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } - ASTSet * ast_set = new ASTSet(arg->getColumnName()); - ASTPtr ast_set_ptr = ast_set; - ast_set->set = new Set(settings.limits); - ast_set->is_explicit = true; - ast_set->set->createFromAST(set_element_types, elements_ast, create_ordered_set); - arg = ast_set_ptr; + if (single_value) + { + ASTPtr exp_list = new ASTExpressionList; + exp_list->children.push_back(elements_ast); + elements_ast = exp_list; + } + + ASTSet * ast_set = new ASTSet(arg->getColumnName()); + ASTPtr ast_set_ptr = ast_set; + ast_set->set = new Set(settings.limits); + ast_set->is_explicit = true; + ast_set->set->createFromAST(set_element_types, elements_ast, create_ordered_set); + arg = ast_set_ptr; } From 310ed66b00307f29f97639123ec543bb64000d06 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 May 2015 12:13:08 +0300 Subject: [PATCH 018/116] Revert "dbms: improvement (incomplete) [#METR-16164]." This reverts commit 6f4f44ce7980cace32edd0913b8d1d53cd51682b. --- dbms/include/DB/Common/UInt128.h | 5 - dbms/include/DB/Core/Field.h | 59 ---------- .../LogicalExpressionsOptimizer.h | 4 +- dbms/include/DB/Parsers/ASTAlterQuery.h | 7 -- dbms/include/DB/Parsers/ASTAsterisk.h | 5 - dbms/include/DB/Parsers/ASTCheckQuery.h | 7 -- .../include/DB/Parsers/ASTColumnDeclaration.h | 6 - dbms/include/DB/Parsers/ASTCreateQuery.h | 10 +- dbms/include/DB/Parsers/ASTDropQuery.h | 10 +- dbms/include/DB/Parsers/ASTExpressionList.h | 9 +- dbms/include/DB/Parsers/ASTFunction.h | 38 ++++++- dbms/include/DB/Parsers/ASTIdentifier.h | 6 - dbms/include/DB/Parsers/ASTInsertQuery.h | 9 +- dbms/include/DB/Parsers/ASTJoin.h | 8 -- dbms/include/DB/Parsers/ASTLiteral.h | 6 - dbms/include/DB/Parsers/ASTNameTypePair.h | 6 - dbms/include/DB/Parsers/ASTOptimizeQuery.h | 9 +- dbms/include/DB/Parsers/ASTOrderByElement.h | 11 +- dbms/include/DB/Parsers/ASTQueryWithOutput.h | 9 +- .../DB/Parsers/ASTQueryWithTableAndOutput.h | 19 +--- dbms/include/DB/Parsers/ASTRenameQuery.h | 7 +- dbms/include/DB/Parsers/ASTSelectQuery.h | 5 - dbms/include/DB/Parsers/ASTSet.h | 5 - dbms/include/DB/Parsers/ASTSetQuery.h | 9 +- dbms/include/DB/Parsers/ASTShowTablesQuery.h | 13 +-- dbms/include/DB/Parsers/ASTSubquery.h | 13 +-- dbms/include/DB/Parsers/ASTUseQuery.h | 8 +- dbms/include/DB/Parsers/IAST.h | 105 +++++++----------- dbms/src/DataStreams/tests/filter_stream.cpp | 1 + .../tests/filter_stream_hitlog.cpp | 1 + dbms/src/Interpreters/ExpressionAnalyzer.cpp | 39 +------ .../LogicalExpressionsOptimizer.cpp | 2 +- dbms/src/Parsers/tests/select_parser.cpp | 1 + 33 files changed, 109 insertions(+), 343 deletions(-) diff --git a/dbms/include/DB/Common/UInt128.h b/dbms/include/DB/Common/UInt128.h index a975a004302..f780a627b76 100644 --- a/dbms/include/DB/Common/UInt128.h +++ b/dbms/include/DB/Common/UInt128.h @@ -21,11 +21,6 @@ struct UInt128 bool operator!= (const UInt64 rhs) const { return first != rhs || second != 0; } UInt128 & operator= (const UInt64 rhs) { first = rhs; second = 0; return *this; } - - bool operator< (const UInt128 rhs) const - { - return std::tie(first, second) < std::tie(rhs.first, rhs.second); - } }; struct UInt128Hash diff --git a/dbms/include/DB/Core/Field.h b/dbms/include/DB/Core/Field.h index 3070758d571..33c0835f7fa 100644 --- a/dbms/include/DB/Core/Field.h +++ b/dbms/include/DB/Core/Field.h @@ -18,7 +18,6 @@ #include #include -#include namespace DB @@ -573,64 +572,6 @@ public: } }; -/** Обновляет состояние хэш-функции значением. */ -class FieldVisitorUpdateHash : public StaticVisitor<> -{ -private: - SipHash & hash; - -public: - FieldVisitorUpdateHash(SipHash & hash_) : hash(hash_) {} - - void operator() (const Null & x) const - { - auto type = Field::Types::Null; - hash.update(reinterpret_cast(&type), 1); - } - - void operator() (const UInt64 & x) const - { - auto type = Field::Types::UInt64; - hash.update(reinterpret_cast(&type), 1); - hash.update(reinterpret_cast(&x), sizeof(x)); - } - - void operator() (const Int64 & x) const - { - auto type = Field::Types::Int64; - hash.update(reinterpret_cast(&type), 1); - hash.update(reinterpret_cast(&x), sizeof(x)); - } - - void operator() (const Float64 & x) const - { - auto type = Field::Types::Float64; - hash.update(reinterpret_cast(&type), 1); - hash.update(reinterpret_cast(&x), sizeof(x)); - } - - void operator() (const String & x) const - { - auto type = Field::Types::String; - hash.update(reinterpret_cast(&type), 1); - size_t size = x.size(); - hash.update(reinterpret_cast(&size), sizeof(size)); - hash.update(x.data(), x.size()); - } - - void operator() (const Array & x) const - { - auto type = Field::Types::Array; - hash.update(reinterpret_cast(&type), 1); - size_t size = x.size(); - hash.update(reinterpret_cast(&size), sizeof(size)); - - for (const auto & elem : x) - apply_visitor(*this, elem); - } -}; - - /** Выводит текстовое представление типа, как литерала в SQL запросе */ class FieldVisitorToString : public StaticVisitor { diff --git a/dbms/include/DB/Interpreters/LogicalExpressionsOptimizer.h b/dbms/include/DB/Interpreters/LogicalExpressionsOptimizer.h index e3491322558..653dc80d5d7 100644 --- a/dbms/include/DB/Interpreters/LogicalExpressionsOptimizer.h +++ b/dbms/include/DB/Interpreters/LogicalExpressionsOptimizer.h @@ -39,11 +39,11 @@ private: */ struct OrWithExpression { - OrWithExpression(ASTFunction * or_function_, UInt128 expression_); + OrWithExpression(ASTFunction * or_function_, const std::string & expression_); bool operator<(const OrWithExpression & rhs) const; ASTFunction * or_function; - const UInt128 expression; + const std::string expression; }; struct Equalities diff --git a/dbms/include/DB/Parsers/ASTAlterQuery.h b/dbms/include/DB/Parsers/ASTAlterQuery.h index 914f8ad6d72..1d22d25e186 100644 --- a/dbms/include/DB/Parsers/ASTAlterQuery.h +++ b/dbms/include/DB/Parsers/ASTAlterQuery.h @@ -89,13 +89,6 @@ public: /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return ("AlterQuery_" + database + "_" + table); }; - void updateHashWith(SipHash & hash) const override - { - hash.update("AlterQuery", strlen("AlterQuery") + 1); - hash.update(database.data(), database.size() + 1); - hash.update(table.data(), table.size() + 1); - } - ASTPtr clone() const override { ASTAlterQuery * res = new ASTAlterQuery(*this); diff --git a/dbms/include/DB/Parsers/ASTAsterisk.h b/dbms/include/DB/Parsers/ASTAsterisk.h index e881f6b3845..aa90d676c71 100644 --- a/dbms/include/DB/Parsers/ASTAsterisk.h +++ b/dbms/include/DB/Parsers/ASTAsterisk.h @@ -16,11 +16,6 @@ public: String getID() const override { return "Asterisk"; } ASTPtr clone() const override { return new ASTAsterisk(*this); } String getColumnName() const override { return "*"; } - - void updateHashWith(SipHash & hash) const override - { - hash.update("Asterisk", strlen("Asterisk") + 1); - } }; } diff --git a/dbms/include/DB/Parsers/ASTCheckQuery.h b/dbms/include/DB/Parsers/ASTCheckQuery.h index 18019751e99..901ad7ef567 100644 --- a/dbms/include/DB/Parsers/ASTCheckQuery.h +++ b/dbms/include/DB/Parsers/ASTCheckQuery.h @@ -12,13 +12,6 @@ struct ASTCheckQuery : public IAST /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return ("CheckQuery_" + database + "_" + table); }; - void updateHashWith(SipHash & hash) const override - { - hash.update("CheckQuery", strlen("CheckQuery") + 1); - hash.update(database.data(), database.size() + 1); - hash.update(table.data(), table.size() + 1); - } - ASTPtr clone() const override { return new ASTCheckQuery(*this); diff --git a/dbms/include/DB/Parsers/ASTColumnDeclaration.h b/dbms/include/DB/Parsers/ASTColumnDeclaration.h index 2f343549ad8..9862c5a81b6 100644 --- a/dbms/include/DB/Parsers/ASTColumnDeclaration.h +++ b/dbms/include/DB/Parsers/ASTColumnDeclaration.h @@ -21,12 +21,6 @@ public: String getID() const override { return "ColumnDeclaration_" + name; } - void updateHashWith(SipHash & hash) const override - { - hash.update("ColumnDeclaration", strlen("ColumnDeclaration") + 1); - hash.update(name.data(), name.size() + 1); - } - ASTPtr clone() const override { const auto res = new ASTColumnDeclaration{*this}; diff --git a/dbms/include/DB/Parsers/ASTCreateQuery.h b/dbms/include/DB/Parsers/ASTCreateQuery.h index d5f75f000ce..33d69a8e8e3 100644 --- a/dbms/include/DB/Parsers/ASTCreateQuery.h +++ b/dbms/include/DB/Parsers/ASTCreateQuery.h @@ -30,18 +30,10 @@ public: ASTCreateQuery() = default; ASTCreateQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return (attach ? "AttachQuery_" : "CreateQuery_") + database + "_" + table; }; - void updateHashWith(SipHash & hash) const override - { - hash.update(reinterpret_cast(&attach), sizeof(attach)); - hash.update("CreateQuery", strlen("CreateQuery") + 1); - hash.update(database.data(), database.size() + 1); - hash.update(table.data(), table.size() + 1); - } - ASTPtr clone() const override { ASTCreateQuery * res = new ASTCreateQuery(*this); diff --git a/dbms/include/DB/Parsers/ASTDropQuery.h b/dbms/include/DB/Parsers/ASTDropQuery.h index 137f9c3e071..c0ac24017d0 100644 --- a/dbms/include/DB/Parsers/ASTDropQuery.h +++ b/dbms/include/DB/Parsers/ASTDropQuery.h @@ -19,18 +19,10 @@ public: ASTDropQuery() = default; ASTDropQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return (detach ? "DetachQuery_" : "DropQuery_") + database + "_" + table; }; - void updateHashWith(SipHash & hash) const override - { - hash.update(reinterpret_cast(&detach), sizeof(detach)); - hash.update("DropQuery", strlen("DropQuery") + 1); - hash.update(database.data(), database.size() + 1); - hash.update(table.data(), table.size() + 1); - } - ASTPtr clone() const override { return new ASTDropQuery(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTExpressionList.h b/dbms/include/DB/Parsers/ASTExpressionList.h index 63935dfb42a..1ec814a8d1b 100644 --- a/dbms/include/DB/Parsers/ASTExpressionList.h +++ b/dbms/include/DB/Parsers/ASTExpressionList.h @@ -16,21 +16,16 @@ class ASTExpressionList : public IAST public: ASTExpressionList() = default; ASTExpressionList(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "ExpressionList"; } - void updateHashWith(SipHash & hash) const override - { - hash.update("ExpressionList", strlen("ExpressionList") + 1); - } - ASTPtr clone() const override { const auto res = new ASTExpressionList(*this); ASTPtr ptr{res}; res->children.clear(); - + for (const auto & child : children) res->children.emplace_back(child->clone()); diff --git a/dbms/include/DB/Parsers/ASTFunction.h b/dbms/include/DB/Parsers/ASTFunction.h index 3d4e0819e91..8f981eec593 100644 --- a/dbms/include/DB/Parsers/ASTFunction.h +++ b/dbms/include/DB/Parsers/ASTFunction.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -37,15 +38,40 @@ public: ASTFunction() = default; ASTFunction(const StringRange range_) : ASTWithAlias(range_) {} + String getColumnName() const override + { + SipHash hash; + + hash.update(name.data(), name.size()); + + if (parameters) + { + hash.update("(", 1); + for (const auto & param : parameters->children) + { + String param_name = param->getColumnName(); /// TODO Сделать метод updateHashWith. + hash.update(param_name.data(), param_name.size() + 1); + } + hash.update(")", 1); + } + + hash.update("(", 1); + for (const auto & arg : arguments->children) + { + String arg_name = arg->getColumnName(); + hash.update(arg_name.data(), arg_name.size() + 1); + } + hash.update(")", 1); + + UInt64 low, high; + hash.get128(low, high); + + return toString(high) + "_" + toString(low); /// TODO hex. + } + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Function_" + name; } - void updateHashWith(SipHash & hash) const override - { - hash.update("Function", strlen("Function") + 1); - hash.update(name.data(), name.size() + 1); - } - ASTPtr clone() const override { ASTFunction * res = new ASTFunction(*this); diff --git a/dbms/include/DB/Parsers/ASTIdentifier.h b/dbms/include/DB/Parsers/ASTIdentifier.h index 25ba2a19a33..58ae38ca434 100644 --- a/dbms/include/DB/Parsers/ASTIdentifier.h +++ b/dbms/include/DB/Parsers/ASTIdentifier.h @@ -35,12 +35,6 @@ public: /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Identifier_" + name; } - void updateHashWith(SipHash & hash) const override - { - hash.update("Identifier", strlen("Identifier") + 1); - hash.update(name.data(), name.size() + 1); - } - ASTPtr clone() const override { return new ASTIdentifier(*this); } void collectIdentifierNames(IdentifierNameSet & set) const override diff --git a/dbms/include/DB/Parsers/ASTInsertQuery.h b/dbms/include/DB/Parsers/ASTInsertQuery.h index 51728ecb5d1..5e6988bcfc6 100644 --- a/dbms/include/DB/Parsers/ASTInsertQuery.h +++ b/dbms/include/DB/Parsers/ASTInsertQuery.h @@ -26,17 +26,10 @@ public: ASTInsertQuery() = default; ASTInsertQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "InsertQuery_" + database + "_" + table; }; - void updateHashWith(SipHash & hash) const override - { - hash.update("InsertQuery", strlen("InsertQuery") + 1); - hash.update(database.data(), database.size() + 1); - hash.update(table.data(), table.size() + 1); - } - ASTPtr clone() const override { ASTInsertQuery * res = new ASTInsertQuery(*this); diff --git a/dbms/include/DB/Parsers/ASTJoin.h b/dbms/include/DB/Parsers/ASTJoin.h index 5aaee2528b4..5809c8b4eb5 100644 --- a/dbms/include/DB/Parsers/ASTJoin.h +++ b/dbms/include/DB/Parsers/ASTJoin.h @@ -69,14 +69,6 @@ public: return res; }; - void updateHashWith(SipHash & hash) const override - { - hash.update("Join", strlen("Join") + 1); - hash.update(reinterpret_cast(&locality), sizeof(locality)); - hash.update(reinterpret_cast(&strictness), sizeof(strictness)); - hash.update(reinterpret_cast(&kind), sizeof(kind)); - } - ASTPtr clone() const override { ASTJoin * res = new ASTJoin(*this); diff --git a/dbms/include/DB/Parsers/ASTLiteral.h b/dbms/include/DB/Parsers/ASTLiteral.h index 73004c8dfa3..b7b24ef56db 100644 --- a/dbms/include/DB/Parsers/ASTLiteral.h +++ b/dbms/include/DB/Parsers/ASTLiteral.h @@ -25,12 +25,6 @@ public: /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Literal_" + apply_visitor(FieldVisitorDump(), value); } - void updateHashWith(SipHash & hash) const override - { - hash.update("Literal", strlen("Literal") + 1); - apply_visitor(FieldVisitorUpdateHash(hash), value); - } - ASTPtr clone() const override { return new ASTLiteral(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTNameTypePair.h b/dbms/include/DB/Parsers/ASTNameTypePair.h index 87232c23e4c..176c431cafa 100644 --- a/dbms/include/DB/Parsers/ASTNameTypePair.h +++ b/dbms/include/DB/Parsers/ASTNameTypePair.h @@ -23,12 +23,6 @@ public: /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "NameTypePair_" + name; } - void updateHashWith(SipHash & hash) const override - { - hash.update("NameTypePair", strlen("NameTypePair") + 1); - hash.update(name.data(), name.size() + 1); - } - ASTPtr clone() const override { ASTNameTypePair * res = new ASTNameTypePair(*this); diff --git a/dbms/include/DB/Parsers/ASTOptimizeQuery.h b/dbms/include/DB/Parsers/ASTOptimizeQuery.h index 305ef4788e7..906b3d1edb5 100644 --- a/dbms/include/DB/Parsers/ASTOptimizeQuery.h +++ b/dbms/include/DB/Parsers/ASTOptimizeQuery.h @@ -17,17 +17,10 @@ public: ASTOptimizeQuery() = default; ASTOptimizeQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "OptimizeQuery_" + database + "_" + table; }; - void updateHashWith(SipHash & hash) const override - { - hash.update("OptimizeQuery", strlen("OptimizeQuery") + 1); - hash.update(database.data(), database.size() + 1); - hash.update(table.data(), table.size() + 1); - } - ASTPtr clone() const override { return new ASTOptimizeQuery(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTOrderByElement.h b/dbms/include/DB/Parsers/ASTOrderByElement.h index ba87eef3666..f341265d93b 100644 --- a/dbms/include/DB/Parsers/ASTOrderByElement.h +++ b/dbms/include/DB/Parsers/ASTOrderByElement.h @@ -15,24 +15,19 @@ class ASTOrderByElement : public IAST { public: int direction; /// 1, если ASC, -1, если DESC - + /** Collator для locale-specific сортировки строк. * Если nullptr, то производится сортировка по байтам. */ Poco::SharedPtr collator; - + ASTOrderByElement() = default; ASTOrderByElement(const StringRange range_, const int direction_, const Poco::SharedPtr & collator_ = nullptr) : IAST(range_), direction(direction_), collator(collator_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "OrderByElement"; } - void updateHashWith(SipHash & hash) const override - { - hash.update("OrderByElement", strlen("OrderByElement") + 1); - } - ASTPtr clone() const override { return new ASTOrderByElement(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTQueryWithOutput.h b/dbms/include/DB/Parsers/ASTQueryWithOutput.h index 1ab8aecfa69..2b5723249f2 100644 --- a/dbms/include/DB/Parsers/ASTQueryWithOutput.h +++ b/dbms/include/DB/Parsers/ASTQueryWithOutput.h @@ -5,8 +5,8 @@ namespace DB { - - + + /** Запрос с секцией FORMAT. */ class ASTQueryWithOutput : public IAST @@ -28,11 +28,6 @@ public: \ Name(StringRange range_) : ASTQueryWithOutput(range_) {} \ String getID() const override { return ID; }; \ \ - void updateHashWith(SipHash & hash) const override \ - { \ - hash.update(ID, strlen(ID) + 1); \ - } \ - \ ASTPtr clone() const override \ { \ Name * res = new Name(*this); \ diff --git a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h index 7dfd61ec1fc..32ebb1e528d 100644 --- a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h @@ -6,8 +6,8 @@ namespace DB { - - + + /** Запрос с указанием названия таблицы и, возможно, БД и секцией FORMAT. */ class ASTQueryWithTableAndOutput : public ASTQueryWithOutput @@ -15,12 +15,12 @@ namespace DB public: String database; String table; - + ASTQueryWithTableAndOutput() = default; ASTQueryWithTableAndOutput(const StringRange range_) : ASTQueryWithOutput(range_) {} }; - - + + /// Объявляет класс-наследник ASTQueryWithTableAndOutput с реализованными методами getID и clone. #define DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(Name, ID) \ class Name : public ASTQueryWithTableAndOutput \ @@ -29,14 +29,7 @@ public: \ Name() = default; \ Name(const StringRange range_) : ASTQueryWithTableAndOutput(range_) {} \ String getID() const override { return ID"_" + database + "_" + table; }; \ - \ - void updateHashWith(SipHash & hash) const override \ - { \ - hash.update(ID, strlen(ID) + 1); \ - hash.update(database.data(), database.size() + 1); \ - hash.update(table.data(), table.size() + 1); \ - } \ - \ + \ ASTPtr clone() const override \ { \ Name * res = new Name(*this); \ diff --git a/dbms/include/DB/Parsers/ASTRenameQuery.h b/dbms/include/DB/Parsers/ASTRenameQuery.h index 4074a0a50e4..4eb6624e4c3 100644 --- a/dbms/include/DB/Parsers/ASTRenameQuery.h +++ b/dbms/include/DB/Parsers/ASTRenameQuery.h @@ -29,15 +29,10 @@ public: ASTRenameQuery() = default; ASTRenameQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Rename"; }; - void updateHashWith(SipHash & hash) const override - { - hash.update("Rename", strlen("Rename") + 1); - } - ASTPtr clone() const override { return new ASTRenameQuery(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTSelectQuery.h b/dbms/include/DB/Parsers/ASTSelectQuery.h index 43aeb219a99..20b2065310a 100644 --- a/dbms/include/DB/Parsers/ASTSelectQuery.h +++ b/dbms/include/DB/Parsers/ASTSelectQuery.h @@ -39,11 +39,6 @@ public: /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "SelectQuery"; }; - void updateHashWith(SipHash & hash) const override - { - hash.update("SelectQuery", strlen("SelectQuery") + 1); - } - /// Проверить наличие функции arrayJoin. (Не большого ARRAY JOIN.) static bool hasArrayJoin(const ASTPtr & ast) { diff --git a/dbms/include/DB/Parsers/ASTSet.h b/dbms/include/DB/Parsers/ASTSet.h index 3761f0775f3..18edddc999f 100644 --- a/dbms/include/DB/Parsers/ASTSet.h +++ b/dbms/include/DB/Parsers/ASTSet.h @@ -22,11 +22,6 @@ public: String getID() const override { return "Set_" + getColumnName(); } ASTPtr clone() const override { return new ASTSet(*this); } String getColumnName() const override { return column_name; } - - void updateHashWith(SipHash & hash) const override - { - hash.update("Set", strlen("Set") + 1); - } }; } diff --git a/dbms/include/DB/Parsers/ASTSetQuery.h b/dbms/include/DB/Parsers/ASTSetQuery.h index 12c0e9c85c1..0334d1167bb 100644 --- a/dbms/include/DB/Parsers/ASTSetQuery.h +++ b/dbms/include/DB/Parsers/ASTSetQuery.h @@ -26,14 +26,9 @@ public: ASTSetQuery() = default; ASTSetQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ - String getID() const override { return "SetQuery"; }; - - void updateHashWith(SipHash & hash) const override - { - hash.update("SetQuery", strlen("SetQuery") + 1); - } + String getID() const override { return "Set"; }; ASTPtr clone() const override { return new ASTSetQuery(*this); } }; diff --git a/dbms/include/DB/Parsers/ASTShowTablesQuery.h b/dbms/include/DB/Parsers/ASTShowTablesQuery.h index 671716d7202..4b51b8f1aba 100644 --- a/dbms/include/DB/Parsers/ASTShowTablesQuery.h +++ b/dbms/include/DB/Parsers/ASTShowTablesQuery.h @@ -20,14 +20,9 @@ public: ASTShowTablesQuery() = default; ASTShowTablesQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ - String getID() const override { return "ShowTablesQuery"; }; - - void updateHashWith(SipHash & hash) const override - { - hash.update("ShowTablesQuery", strlen("ShowTablesQuery") + 1); - } + String getID() const override { return "ShowTables"; }; ASTPtr clone() const override { @@ -35,13 +30,13 @@ public: ASTPtr ptr{res}; res->children.clear(); - + if (format) { res->format = format->clone(); res->children.push_back(res->format); } - + return ptr; } }; diff --git a/dbms/include/DB/Parsers/ASTSubquery.h b/dbms/include/DB/Parsers/ASTSubquery.h index f3cefd87ea0..f94cbfe8346 100644 --- a/dbms/include/DB/Parsers/ASTSubquery.h +++ b/dbms/include/DB/Parsers/ASTSubquery.h @@ -16,15 +16,10 @@ class ASTSubquery : public IAST public: ASTSubquery() = default; ASTSubquery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Subquery"; } - void updateHashWith(SipHash & hash) const override - { - hash.update("Subquery", strlen("Subquery") + 1); - } - ASTPtr clone() const override { const auto res = new ASTSubquery{*this}; @@ -38,11 +33,7 @@ public: return ptr; } - String getColumnName() const override - { - auto id = getTreeID(); - return toString(id.first) + "_" + toString(id.second); - } + String getColumnName() const override { return getTreeID(); } }; } diff --git a/dbms/include/DB/Parsers/ASTUseQuery.h b/dbms/include/DB/Parsers/ASTUseQuery.h index 6a2d60353e5..eafe3496293 100644 --- a/dbms/include/DB/Parsers/ASTUseQuery.h +++ b/dbms/include/DB/Parsers/ASTUseQuery.h @@ -16,16 +16,10 @@ public: ASTUseQuery() = default; ASTUseQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "UseQuery_" + database; }; - void updateHashWith(SipHash & hash) const override - { - hash.update("UseQuery", strlen("UseQuery") + 1); - hash.update(database.data(), database.size() + 1); - } - ASTPtr clone() const override { return new ASTUseQuery(*this); } }; diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index d81b8156a4b..803cc9eb31f 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -9,10 +9,8 @@ #include #include -#include #include #include -#include #include #include @@ -45,67 +43,15 @@ public: IAST(const StringRange range_) : range(range_) {} virtual ~IAST() = default; - /** Получить имя, однозначно идентифицирующее выражение, если элемент является столбцом. У одинаковых выражений будет одинаковое имя. */ - virtual String getColumnName() const - { - /// По-умолчанию - подчёркивание, а затем getTreeID в hex-е. + /** Получить каноническое имя столбца, если элемент является столбцом */ + virtual String getColumnName() const { throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::NOT_A_COLUMN); } - union - { - UInt128 id; - UInt8 id_bytes[16]; - }; - - id = getTreeID(); - String res(1 + 2 * sizeof(id), '_'); - - for (size_t i = 0; i < sizeof(id); ++i) - { - res[i * 2 + 1] = (id_bytes[i] / 16) < 10 ? ('0' + (id_bytes[i] / 16)) : ('A' + (id_bytes[i] / 16 - 10)); - res[i * 2 + 2] = (id_bytes[i] % 16) < 10 ? ('0' + (id_bytes[i] % 16)) : ('A' + (id_bytes[i] % 16 - 10)); - } - - return res; - } - - /** Получить алиас, если он есть, или имя столбца, если его нет. */ + /** Получить алиас, если он есть, или каноническое имя столбца, если его нет. */ virtual String getAliasOrColumnName() const { return getColumnName(); } /** Получить алиас, если он есть, или пустую строку, если его нет, или если элемент не поддерживает алиасы. */ virtual String tryGetAlias() const { return String(); } - /** Обновить состояние хэш-функции элементом дерева. */ - virtual void updateHashWith(SipHash & hash) const = 0; - - /** Обновить состояние хэш-функции целым поддеревом. Используется для склейки одинаковых выражений. */ - void updateHashWithTree(SipHash & hash) const - { - updateHashWith(hash); - - if (!children.empty()) - { - size_t size = children.size(); - hash.update(reinterpret_cast(&size), sizeof(size)); - - for (size_t i = 0; i < size; ++i) - { - hash.update(reinterpret_cast(&i), sizeof(i)); - children[i]->updateHashWithTree(hash); - } - } - } - - /** Получить идентификатор поддерева. Используется для склейки одинаковых выражений. - */ - UInt128 getTreeID() const - { - SipHash hash; - updateHashWithTree(hash); - UInt128 res; - hash.get128(reinterpret_cast(&res)); - return res; - } - /** Установить алиас. */ virtual void setAlias(const String & to) { @@ -125,6 +71,37 @@ public: (*it)->is_visited = false; } + /** Получить текст, который идентифицирует этот элемент и всё поддерево. + * Обычно он содержит идентификатор элемента и getTreeID от всех детей. + */ + String getTreeID() const + { + std::stringstream s; + s << getID(); + + if (!children.empty()) + { + s << "("; + for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) + { + if (it != children.begin()) + s << ", "; + s << (*it)->getTreeID(); + } + s << ")"; + } + + return s.str(); + } + + void dumpTree(std::ostream & ostr, size_t indent = 0) const + { + String indent_str(indent, '-'); + ostr << indent_str << getID() << ", " << this << std::endl; + for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) + (*it)->dumpTree(ostr, indent + 1); + } + /** Проверить глубину дерева. * Если задано max_depth и глубина больше - кинуть исключение. * Возвращает глубину дерева. @@ -139,8 +116,8 @@ public: size_t checkSize(size_t max_size) const { size_t res = 1; - for (const auto & ast : children) - res += ast->checkSize(max_size); + for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) + res += (*it)->checkSize(max_size); if (res > max_size) throw Exception("AST is too big. Maximum: " + toString(max_size), ErrorCodes::TOO_BIG_AST); @@ -149,22 +126,22 @@ public: } /** Получить set из имен индентификаторов - */ + */ virtual void collectIdentifierNames(IdentifierNameSet & set) const { - for (const auto & ast : children) - ast->collectIdentifierNames(set); + for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) + (*it)->collectIdentifierNames(set); } private: size_t checkDepthImpl(size_t max_depth, size_t level) const { size_t res = level + 1; - for (const auto & ast : children) + for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) { if (level >= max_depth) throw Exception("AST is too deep. Maximum: " + toString(max_depth), ErrorCodes::TOO_DEEP_AST); - res = std::max(res, ast->checkDepthImpl(max_depth, level + 1)); + res = std::max(res, (*it)->checkDepthImpl(max_depth, level + 1)); } return res; diff --git a/dbms/src/DataStreams/tests/filter_stream.cpp b/dbms/src/DataStreams/tests/filter_stream.cpp index 639a434880a..2893d16b5ec 100644 --- a/dbms/src/DataStreams/tests/filter_stream.cpp +++ b/dbms/src/DataStreams/tests/filter_stream.cpp @@ -41,6 +41,7 @@ int main(int argc, char ** argv) formatAST(*ast, std::cerr); std::cerr << std::endl; + std::cerr << ast->getTreeID() << std::endl; Context context; context.getColumns().push_back(NameAndTypePair("number", new DataTypeUInt64)); diff --git a/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp b/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp index 4f2b58c3843..b383715a0a1 100644 --- a/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp +++ b/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp @@ -106,6 +106,7 @@ int main(int argc, char ** argv) formatAST(*ast, std::cerr); std::cerr << std::endl; + std::cerr << ast->getTreeID() << std::endl; /// создаём объект существующей таблицы хит лога diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index ff2c3e27afc..f233d0d100d 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1653,43 +1653,10 @@ void ExpressionAnalyzer::appendProjectResult(DB::ExpressionActionsChain & chain, NamesWithAliases result_columns; ASTs asts = select_query->select_expression_list->children; - - /// Выбор имён для столбцов результата. - size_t i = 1; - for (const auto & ast : asts) + for (size_t i = 0; i < asts.size(); ++i) { - String source_column_name = ast->getColumnName(); - String result_column_name = ast->tryGetAlias(); - - /// Если не задан алиас - нужно сгенерировать какое-нибудь имя автоматически. - if (result_column_name.empty()) - { - if (typeid_cast(ast.get()) || typeid_cast(ast.get())) - { - /// Если выражение простое, то будем использовать его имя. - result_column_name = source_column_name; - } - else if (auto func = typeid_cast(ast.get())) - { - /// Для функций используем имя вида _1_func, где func - имя функции. - WriteBufferFromString wb(result_column_name); - writeChar('_', wb); - writeIntText(i, wb); - writeChar('_', wb); - writeString(func->name, wb); - } - else - { - /// Если выражение сложное и для него не задан алиас, будем использовать имя вида _1, _2, ... - WriteBufferFromString wb(result_column_name); - writeChar('_', wb); - writeIntText(i, wb); - } - } - - result_columns.emplace_back(source_column_name, result_column_name); - step.required_output.emplace_back(result_columns.back().second); - ++i; + result_columns.emplace_back(asts[i]->getColumnName(), asts[i]->getAliasOrColumnName()); + step.required_output.push_back(result_columns.back().second); } step.actions->add(ExpressionAction::project(result_columns)); diff --git a/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp b/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp index 637ec65ddf0..bf3fb1a274e 100644 --- a/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -12,7 +12,7 @@ namespace DB { -LogicalExpressionsOptimizer::OrWithExpression::OrWithExpression(ASTFunction * or_function_, UInt128 expression_) +LogicalExpressionsOptimizer::OrWithExpression::OrWithExpression(ASTFunction * or_function_, const std::string & expression_) : or_function(or_function_), expression(expression_) { } diff --git a/dbms/src/Parsers/tests/select_parser.cpp b/dbms/src/Parsers/tests/select_parser.cpp index 22ff2d4770e..690cf27a391 100644 --- a/dbms/src/Parsers/tests/select_parser.cpp +++ b/dbms/src/Parsers/tests/select_parser.cpp @@ -27,6 +27,7 @@ int main(int argc, char ** argv) std::cout << "Success." << std::endl; formatAST(*ast, std::cerr); std::cout << std::endl; + std::cout << std::endl << ast->getTreeID() << std::endl; return 0; } From f33943144da601c722698ece0a42baa44c641458 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 May 2015 12:13:29 +0300 Subject: [PATCH 019/116] Revert "dbms: improvement [#METR-16164]." This reverts commit b9f977d430df589f006d6c2bb3ffc32bc2f54a31. --- dbms/include/DB/Parsers/ASTFunction.h | 34 +++++++++++++-------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTFunction.h b/dbms/include/DB/Parsers/ASTFunction.h index 8f981eec593..70380b67e7e 100644 --- a/dbms/include/DB/Parsers/ASTFunction.h +++ b/dbms/include/DB/Parsers/ASTFunction.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB @@ -40,33 +39,32 @@ public: String getColumnName() const override { - SipHash hash; - - hash.update(name.data(), name.size()); + String res; + WriteBufferFromString wb(res); + writeString(name, wb); if (parameters) { - hash.update("(", 1); - for (const auto & param : parameters->children) + writeChar('(', wb); + for (ASTs::const_iterator it = parameters->children.begin(); it != parameters->children.end(); ++it) { - String param_name = param->getColumnName(); /// TODO Сделать метод updateHashWith. - hash.update(param_name.data(), param_name.size() + 1); + if (it != parameters->children.begin()) + writeCString(", ", wb); + writeString((*it)->getColumnName(), wb); } - hash.update(")", 1); + writeChar(')', wb); } - hash.update("(", 1); - for (const auto & arg : arguments->children) + writeChar('(', wb); + for (ASTs::const_iterator it = arguments->children.begin(); it != arguments->children.end(); ++it) { - String arg_name = arg->getColumnName(); - hash.update(arg_name.data(), arg_name.size() + 1); + if (it != arguments->children.begin()) + writeCString(", ", wb); + writeString((*it)->getColumnName(), wb); } - hash.update(")", 1); + writeChar(')', wb); - UInt64 low, high; - hash.get128(low, high); - - return toString(high) + "_" + toString(low); /// TODO hex. + return res; } /** Получить текст, который идентифицирует этот элемент. */ From 6ce28acc0ffd64f043afc538c6186d86a1ff565d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 4 May 2015 20:52:19 +0300 Subject: [PATCH 020/116] dbms: cut dependency [#METR-2944]. --- dbms/include/DB/Functions/FunctionsConversion.h | 4 ++-- dbms/include/DB/Functions/FunctionsMiscellaneous.h | 1 + dbms/include/DB/Functions/IFunction.h | 6 ++++-- dbms/src/Storages/StorageMerge.cpp | 1 + 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsConversion.h b/dbms/include/DB/Functions/FunctionsConversion.h index 1b4290dc144..b5f5f4c2022 100644 --- a/dbms/include/DB/Functions/FunctionsConversion.h +++ b/dbms/include/DB/Functions/FunctionsConversion.h @@ -490,8 +490,8 @@ public: * Для неконстантных столбцов arguments[i].column = nullptr. */ void getReturnTypeAndPrerequisites(const ColumnsWithNameAndType & arguments, - DataTypePtr & out_return_type, - ExpressionActions::Actions & out_prerequisites) + DataTypePtr & out_return_type, + std::vector & out_prerequisites) { if (arguments.size() != 2) throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " diff --git a/dbms/include/DB/Functions/FunctionsMiscellaneous.h b/dbms/include/DB/Functions/FunctionsMiscellaneous.h index 559dc0f8353..2cb9d314362 100644 --- a/dbms/include/DB/Functions/FunctionsMiscellaneous.h +++ b/dbms/include/DB/Functions/FunctionsMiscellaneous.h @@ -24,6 +24,7 @@ #include #include #include +#include #include diff --git a/dbms/include/DB/Functions/IFunction.h b/dbms/include/DB/Functions/IFunction.h index 555a65124c9..431cdfd4dc3 100644 --- a/dbms/include/DB/Functions/IFunction.h +++ b/dbms/include/DB/Functions/IFunction.h @@ -7,12 +7,14 @@ #include #include #include -#include namespace DB { +class ExpressionAction; + + /** Интерфейс для обычных функций. * Обычные функции - это функции, которые не меняют количество строк в таблице, * и результат работы которых для каждой строчки не зависит от других строк. @@ -54,7 +56,7 @@ public: virtual void getReturnTypeAndPrerequisites( const ColumnsWithNameAndType & arguments, DataTypePtr & out_return_type, - ExpressionActions::Actions & out_prerequisites) + std::vector & out_prerequisites) { DataTypes types(arguments.size()); for (size_t i = 0; i < arguments.size(); ++i) diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index eb79cb8ce30..ff6e41aa7d3 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include From 550d5e9e7a482dfd88a374ad476efd31468b1cd1 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 5 May 2015 13:13:45 +0300 Subject: [PATCH 021/116] dbms: Server: Fixed hostname + IP address presentation. [#METR-15859] --- dbms/include/DB/Interpreters/Cluster.h | 1 + dbms/src/Interpreters/Cluster.cpp | 3 ++- dbms/src/Storages/StorageSystemClusters.cpp | 13 +++++++++---- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/dbms/include/DB/Interpreters/Cluster.h b/dbms/include/DB/Interpreters/Cluster.h index ccf3b223d41..1bb84dabb9a 100644 --- a/dbms/include/DB/Interpreters/Cluster.h +++ b/dbms/include/DB/Interpreters/Cluster.h @@ -63,6 +63,7 @@ public: * */ Poco::Net::SocketAddress host_port; + String host_name; String user; String password; UInt32 replica_num; diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp index c2341791a22..7a8f14f47e2 100644 --- a/dbms/src/Interpreters/Cluster.cpp +++ b/dbms/src/Interpreters/Cluster.cpp @@ -13,8 +13,9 @@ Cluster::Address::Address(const String & config_prefix) { auto & config = Poco::Util::Application::instance().config(); + host_name = config.getString(config_prefix + ".host"); host_port = Poco::Net::SocketAddress( - config.getString(config_prefix + ".host"), + host_name, config.getInt(config_prefix + ".port") ); diff --git a/dbms/src/Storages/StorageSystemClusters.cpp b/dbms/src/Storages/StorageSystemClusters.cpp index 52e665d4062..ba2f3f10f12 100644 --- a/dbms/src/Storages/StorageSystemClusters.cpp +++ b/dbms/src/Storages/StorageSystemClusters.cpp @@ -61,11 +61,16 @@ BlockInputStreams StorageSystemClusters::read( shard_weight_column->insert(static_cast(shard_info.weight)); replica_num_column->insert(static_cast(address.replica_num)); - const std::string & source = address.host_port.host().toString(); - const auto host_entry = Poco::Net::DNS::resolve(source); - host_name_column->insert(host_entry.name()); - host_address_column->insert(host_entry.addresses()[0].toString()); + host_name_column->insert(address.host_name); + const auto & ip_address = address.host_port.host(); + Poco::Net::IPAddress presented_ip_address; + if (ip_address.family() == Poco::Net::IPAddress::IPv6) + presented_ip_address = ip_address; + else + presented_ip_address = Poco::Net::IPAddress("::FFFF:" + ip_address.toString()); + + host_address_column->insert(presented_ip_address.toString()); port_column->insert(static_cast(address.host_port.port())); user_column->insert(address.user); }; From 6a3c429bf36c3a5070d0b5b6ffafaa50907cc860 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 5 May 2015 13:45:45 +0300 Subject: [PATCH 022/116] dbms: Server: Removed unneeded code. [#METR-15859] --- dbms/src/Storages/StorageSystemClusters.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/dbms/src/Storages/StorageSystemClusters.cpp b/dbms/src/Storages/StorageSystemClusters.cpp index ba2f3f10f12..37e99e9daed 100644 --- a/dbms/src/Storages/StorageSystemClusters.cpp +++ b/dbms/src/Storages/StorageSystemClusters.cpp @@ -62,15 +62,7 @@ BlockInputStreams StorageSystemClusters::read( replica_num_column->insert(static_cast(address.replica_num)); host_name_column->insert(address.host_name); - - const auto & ip_address = address.host_port.host(); - Poco::Net::IPAddress presented_ip_address; - if (ip_address.family() == Poco::Net::IPAddress::IPv6) - presented_ip_address = ip_address; - else - presented_ip_address = Poco::Net::IPAddress("::FFFF:" + ip_address.toString()); - - host_address_column->insert(presented_ip_address.toString()); + host_address_column->insert(address.host_port.host().toString()); port_column->insert(static_cast(address.host_port.port())); user_column->insert(address.user); }; From 2d8d0970edb5363baaab96ace8387b9964839d97 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 5 May 2015 14:53:09 +0300 Subject: [PATCH 023/116] dbms: Server: Fixed numbering. [#METR-15859] --- dbms/src/Interpreters/Cluster.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp index 7a8f14f47e2..e667766b6af 100644 --- a/dbms/src/Interpreters/Cluster.cpp +++ b/dbms/src/Interpreters/Cluster.cpp @@ -69,7 +69,7 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa const auto & config_prefix = cluster_name + "."; - UInt32 current_shard_num = 0; + UInt32 current_shard_num = 1; for (auto it = config_keys.begin(); it != config_keys.end(); ++it) { @@ -81,7 +81,7 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa continue; addresses.emplace_back(prefix); - addresses.back().replica_num = 0; + addresses.back().replica_num = 1; slot_to_shard.insert(std::end(slot_to_shard), weight, shard_info_vec.size()); if (const auto is_local = isLocal(addresses.back())) @@ -96,7 +96,7 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa addresses_with_failover.emplace_back(); Addresses & replica_addresses = addresses_with_failover.back(); - UInt32 current_replica_num = 0; + UInt32 current_replica_num = 1; const auto & partial_prefix = config_prefix + *it + "."; const auto weight = config.getInt(partial_prefix + ".weight", 1); From 8ff005e909a1921a7f09627451c5dbff1962796b Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 5 May 2015 17:02:57 +0300 Subject: [PATCH 024/116] dbms: Server: Fixed incorrect behavior with the DISTINCT keyword [#METR-14408] --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 7 +++++-- .../00154_distributed_with_distinct.reference | 10 ++++++++++ .../0_stateless/00154_distributed_with_distinct.sql | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00154_distributed_with_distinct.reference create mode 100644 dbms/tests/queries/0_stateless/00154_distributed_with_distinct.sql diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 9b47fc9b101..47f5857e73e 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -499,7 +499,7 @@ void InterpreterSelectQuery::executeSingleQuery() if (second_stage) { - bool need_second_distinct_pass = true; + bool need_second_distinct_pass = query.distinct; if (need_aggregate) { @@ -515,7 +515,7 @@ void InterpreterSelectQuery::executeSingleQuery() executeExpression(streams, before_order_and_select); executeDistinct(streams, true, selected_columns); - need_second_distinct_pass = streams.size() > 1; + need_second_distinct_pass = query.distinct && (streams.size() > 1); } else if (query.group_by_with_totals && !aggregate_final) { @@ -548,6 +548,9 @@ void InterpreterSelectQuery::executeSingleQuery() if (query.limit_length && streams.size() > 1 && !query.distinct) executePreLimit(streams); + if (need_second_distinct_pass) + union_within_single_query = true; + if (union_within_single_query) executeUnion(streams); diff --git a/dbms/tests/queries/0_stateless/00154_distributed_with_distinct.reference b/dbms/tests/queries/0_stateless/00154_distributed_with_distinct.reference new file mode 100644 index 00000000000..8b1acc12b63 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00154_distributed_with_distinct.reference @@ -0,0 +1,10 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/dbms/tests/queries/0_stateless/00154_distributed_with_distinct.sql b/dbms/tests/queries/0_stateless/00154_distributed_with_distinct.sql new file mode 100644 index 00000000000..4ac10c1adc1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00154_distributed_with_distinct.sql @@ -0,0 +1 @@ +SELECT DISTINCT number FROM remote('127.0.0.{1,2}', system.numbers) LIMIT 10 From 46eb776111e34ae87fc2b8c7b1012b304c74bb1b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 5 May 2015 17:54:14 +0300 Subject: [PATCH 025/116] dbms: increased 'max_ast_elements' by default and for profile 'web' [#METRIKASUPP-5044]. --- dbms/include/DB/Interpreters/Limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Interpreters/Limits.h b/dbms/include/DB/Interpreters/Limits.h index 41a65d9e2e6..e86628f40b7 100644 --- a/dbms/include/DB/Interpreters/Limits.h +++ b/dbms/include/DB/Interpreters/Limits.h @@ -61,7 +61,7 @@ struct Limits M(SettingUInt64, max_subquery_depth, 100) \ M(SettingUInt64, max_pipeline_depth, 1000) \ M(SettingUInt64, max_ast_depth, 1000) /** Проверяются не во время парсинга, */ \ - M(SettingUInt64, max_ast_elements, 10000) /** а уже после парсинга запроса. */ \ + M(SettingUInt64, max_ast_elements, 50000) /** а уже после парсинга запроса. */ \ \ /** 0 - можно всё. 1 - только запросы на чтение. 2 - только запросы на чтение, а также изменение настроек, кроме настройки readonly. */ \ M(SettingUInt64, readonly, 0) \ From 9c99febfc318c686ada3ad20b290e546f2850b17 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 5 May 2015 18:50:20 +0300 Subject: [PATCH 026/116] dbms: added ProfileEvents for Compiler [#METR-16212]. --- dbms/include/DB/Common/ProfileEvents.h | 3 +++ dbms/src/Interpreters/Compiler.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/dbms/include/DB/Common/ProfileEvents.h b/dbms/include/DB/Common/ProfileEvents.h index 2170e91309b..d664f5fe3a8 100644 --- a/dbms/include/DB/Common/ProfileEvents.h +++ b/dbms/include/DB/Common/ProfileEvents.h @@ -66,6 +66,9 @@ M(DistributedConnectionFailTry) \ M(DistributedConnectionFailAtAll) \ \ + M(CompileAttempt) \ + M(CompileSuccess) \ + \ M(END) namespace ProfileEvents diff --git a/dbms/src/Interpreters/Compiler.cpp b/dbms/src/Interpreters/Compiler.cpp index 995015227ea..14d404f0d9c 100644 --- a/dbms/src/Interpreters/Compiler.cpp +++ b/dbms/src/Interpreters/Compiler.cpp @@ -191,6 +191,8 @@ void Compiler::compile( CodeGenerator get_code, ReadyCallback on_ready) { + ProfileEvents::increment(ProfileEvents::CompileAttempt); + std::string prefix = path + "/" + file_name; std::string cpp_file_path = prefix + ".cpp"; std::string so_file_path = prefix + ".so"; @@ -257,6 +259,7 @@ void Compiler::compile( } LOG_INFO(log, "Compiled code " << file_name); + ProfileEvents::increment(ProfileEvents::CompileSuccess); on_ready(lib); } From cebe87f321ca332eb52964d56591d7cdc9ff8770 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 5 May 2015 21:22:59 +0300 Subject: [PATCH 027/116] Merge --- dbms/src/Storages/StorageMergeTree.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index d8972e9d0fd..febaccd0036 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -262,7 +262,8 @@ void StorageMergeTree::dropPartition(const Field & partition, bool detach, bool /// Просит завершить мерджи и не позволяет им начаться. /// Это защищает от "оживания" данных за удалённую партицию после завершения мерджа. const MergeTreeMergeBlocker merge_blocker{merger}; - auto structure_lock = lockStructure(true); + /// Дожидается завершения мерджей и не даёт начаться новым. + auto lock = lockForAlter(); DayNum_t month = MergeTreeData::getMonthDayNum(partition); From a5afb7a86316d84e528b0814c9c39b5d1cedc14d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 7 May 2015 02:35:37 +0300 Subject: [PATCH 028/116] dbms: cut dependency [#METR-2944]. --- .../DB/Interpreters/InterpreterSelectQuery.h | 15 ++++++++++----- .../src/Interpreters/InterpreterSelectQuery.cpp | 17 +++++++++++++---- dbms/src/Storages/StorageDistributed.cpp | 2 ++ 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/dbms/include/DB/Interpreters/InterpreterSelectQuery.h b/dbms/include/DB/Interpreters/InterpreterSelectQuery.h index e0707951674..6c6d278dc6b 100644 --- a/dbms/include/DB/Interpreters/InterpreterSelectQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterSelectQuery.h @@ -2,14 +2,17 @@ #include #include -#include +#include #include -#include -#include namespace DB { +class ExpressionAnalyzer; +class ASTSelectQuery; +class SubqueryForSet; + + /** Интерпретирует запрос SELECT. Возвращает поток блоков с результатами выполнения запроса до стадии to_stage. */ class InterpreterSelectQuery @@ -58,6 +61,8 @@ public: size_t subquery_depth_ = 0, BlockInputStreamPtr input = nullptr); + ~InterpreterSelectQuery(); + /** Выполнить запрос, возможно являющиийся цепочкой UNION ALL. * Получить поток блоков для чтения */ @@ -121,9 +126,9 @@ private: void executeLimit( BlockInputStreams & streams); void executeProjection( BlockInputStreams & streams, ExpressionActionsPtr expression); void executeDistinct( BlockInputStreams & streams, bool before_order, Names columns); - void executeSubqueriesInSetsAndJoins(BlockInputStreams & streams, SubqueriesForSets & subqueries_for_sets); + void executeSubqueriesInSetsAndJoins(BlockInputStreams & streams, std::unordered_map & subqueries_for_sets); - void ignoreWithTotals() { query.group_by_with_totals = false; } + void ignoreWithTotals(); ASTPtr query_ptr; ASTSelectQuery & query; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 9b47fc9b101..654b6a4fef9 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -33,6 +34,9 @@ namespace DB { +InterpreterSelectQuery::~InterpreterSelectQuery() = default; + + void InterpreterSelectQuery::init(BlockInputStreamPtr input, const Names & required_column_names, const NamesAndTypesList & table_column_names) { original_max_threads = settings.max_threads; @@ -269,10 +273,9 @@ DataTypes InterpreterSelectQuery::getReturnTypes() { DataTypes res; NamesAndTypesList columns = query_analyzer->getSelectSampleBlock().getColumnsList(); - for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it) - { - res.push_back(it->type); - } + for (auto & column : columns) + res.push_back(column.type); + return res; } @@ -1006,4 +1009,10 @@ BlockInputStreamPtr InterpreterSelectQuery::executeAndFormat(WriteBuffer & buf) } +void InterpreterSelectQuery::ignoreWithTotals() +{ + query.group_by_with_totals = false; +} + + } diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 39f0637f3bb..fad8e7cdf52 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -7,9 +7,11 @@ #include #include #include +#include #include #include +#include #include From 44066984cf25752eedfc30032579281a2ca1db53 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 7 May 2015 13:31:50 +0300 Subject: [PATCH 029/116] Merge --- dbms/include/DB/Common/BaseCellAging.h | 26 +++ dbms/include/DB/Common/CellAging.h | 26 +++ dbms/include/DB/Common/LRUCache.h | 60 +++++-- dbms/include/DB/Common/TrivialCellAging.h | 18 ++ dbms/include/DB/Interpreters/Settings.h | 3 + dbms/include/DB/Storages/MarkCache.h | 9 +- dbms/src/Common/tests/lru_cache.cpp | 204 +++++++++++++++++----- dbms/src/Interpreters/Context.cpp | 3 +- 8 files changed, 287 insertions(+), 62 deletions(-) create mode 100644 dbms/include/DB/Common/BaseCellAging.h create mode 100644 dbms/include/DB/Common/CellAging.h create mode 100644 dbms/include/DB/Common/TrivialCellAging.h diff --git a/dbms/include/DB/Common/BaseCellAging.h b/dbms/include/DB/Common/BaseCellAging.h new file mode 100644 index 00000000000..1da3acd3bb4 --- /dev/null +++ b/dbms/include/DB/Common/BaseCellAging.h @@ -0,0 +1,26 @@ +#pragma once + +namespace DB +{ + +/** Базовый класс для управления временем жизнью элементов кэша. + */ +template +class BaseCellAging +{ +public: + using Timestamp = TTimestamp; + using Delay = TDelay; + +public: + virtual ~BaseCellAging() = default; + /// Обновить timestamp элемента кэша. + virtual const Timestamp & update() = 0; + /// Просрочен ли элемент кэша? Срок истечения годности задается в секундах. + virtual bool expired(const Timestamp & last_timestamp, const Delay & expiration_delay) const = 0; + +protected: + Timestamp timestamp = Timestamp(); +}; + +} diff --git a/dbms/include/DB/Common/CellAging.h b/dbms/include/DB/Common/CellAging.h new file mode 100644 index 00000000000..a947e6ced74 --- /dev/null +++ b/dbms/include/DB/Common/CellAging.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/** Класс для управления временем жизнью элементов кэша. + */ +class CellAging final : public BaseCellAging +{ +public: + const Timestamp & update() override + { + timestamp = std::chrono::steady_clock::now(); + return timestamp; + } + + bool expired(const Timestamp & last_timestamp, const Delay & expiration_delay) const override + { + return (last_timestamp > timestamp) && ((last_timestamp - timestamp) > expiration_delay); + } +}; + +} diff --git a/dbms/include/DB/Common/LRUCache.h b/dbms/include/DB/Common/LRUCache.h index df29d32a9bc..677685c62fb 100644 --- a/dbms/include/DB/Common/LRUCache.h +++ b/dbms/include/DB/Common/LRUCache.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -20,22 +21,30 @@ struct TrivialWeightFunction } }; - -/** Кеш, вытесняющий долго не использовавшиеся записи. thread-safe. +/** Кеш, вытесняющий долго не использовавшиеся и устаревшие записи. thread-safe. * WeightFunction - тип, оператор () которого принимает Mapped и возвращает "вес" (примерный размер) этого значения. - * Кеш начинает выбрасывать значения, когда их суммарный вес превышает max_size. + * Кеш начинает выбрасывать значения, когда их суммарный вес превышает max_size и срок годности этих значений истёк. * После вставки значения его вес не должен меняться. */ -template , typename WeightFunction = TrivialWeightFunction > +template , + typename WeightFunction = TrivialWeightFunction, + typename TCellAging = TrivialCellAging> class LRUCache { public: - typedef TKey Key; - typedef TMapped Mapped; - typedef std::shared_ptr MappedPtr; + using Key = TKey; + using Mapped = TMapped; + using MappedPtr = std::shared_ptr; - LRUCache(size_t max_size_) - : max_size(std::max(1ul, max_size_)) {} +private: + using CacheCellAging = TCellAging; + +public: + using Delay = typename CacheCellAging::Delay; + + LRUCache(size_t max_size_, const Delay & expiration_delay_ = Delay()) + : max_size(std::max(1ul, max_size_)), expiration_delay(expiration_delay_) {} MappedPtr get(const Key & key) { @@ -51,6 +60,8 @@ public: ++hits; Cell & cell = it->second; + (void) cell.aging.update(); + /// Переместим ключ в конец очереди. Итератор остается валидным. queue.splice(queue.end(), queue, cell.queue_iterator); @@ -82,7 +93,8 @@ public: cell.size = cell.value ? weight_function(*cell.value) : 0; current_size += cell.size; - removeOverflow(); + const auto & last_timestamp = cell.aging.update(); + removeOverflow(last_timestamp); } void getStats(size_t & out_hits, size_t & out_misses) const @@ -120,17 +132,18 @@ protected: /// Суммарный вес выброшенных из кеша элементов. /// Обнуляется каждый раз, когда информация добавляется в Profile events private: - typedef std::list LRUQueue; - typedef typename LRUQueue::iterator LRUQueueIterator; + using LRUQueue = std::list; + using LRUQueueIterator = typename LRUQueue::iterator; struct Cell { MappedPtr value; size_t size; LRUQueueIterator queue_iterator; + CacheCellAging aging; }; - typedef std::unordered_map Cells; + using Cells = std::unordered_map; LRUQueue queue; Cells cells; @@ -138,6 +151,7 @@ private: /// Суммарный вес значений. size_t current_size = 0; const size_t max_size; + const Delay expiration_delay; mutable Poco::FastMutex mutex; size_t hits = 0; @@ -145,15 +159,27 @@ private: WeightFunction weight_function; - void removeOverflow() + using Timestamp = typename CacheCellAging::Timestamp; + + void removeOverflow(const Timestamp & last_timestamp) { size_t queue_size = cells.size(); - while (current_size > max_size && queue_size > 1) + while ((current_size > max_size) && (queue_size > 1)) { const Key & key = queue.front(); + auto it = cells.find(key); - current_size -= it->second.size; - current_weight_lost += it->second.size; + if (it == cells.end()) + throw Exception("LRUCache became inconsistent. There must be a bug in it. Clearing it for now.", + ErrorCodes::LOGICAL_ERROR); + + const auto & cell = it->second; + if (!cell.aging.expired(last_timestamp, expiration_delay)) + break; + + current_size -= cell.size; + current_weight_lost += cell.size; + cells.erase(it); queue.pop_front(); --queue_size; diff --git a/dbms/include/DB/Common/TrivialCellAging.h b/dbms/include/DB/Common/TrivialCellAging.h new file mode 100644 index 00000000000..b83ff8d7a17 --- /dev/null +++ b/dbms/include/DB/Common/TrivialCellAging.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +namespace DB +{ + +/** Класс производный от BaseCellAging для тех случаев, когда на самом деле кэш не нуждается + * в управлении временем жизнью своих элементов. Ничего не делает. + */ +class TrivialCellAging final : public BaseCellAging +{ +public: + const Timestamp & update() override { return timestamp; } + bool expired(const Timestamp & last_timestamp, const Delay & expiration_delay) const override { return true; } +}; + +} diff --git a/dbms/include/DB/Interpreters/Settings.h b/dbms/include/DB/Interpreters/Settings.h index 7a359218cf3..29853bce226 100644 --- a/dbms/include/DB/Interpreters/Settings.h +++ b/dbms/include/DB/Interpreters/Settings.h @@ -120,6 +120,9 @@ struct Settings \ /** В запросе INSERT с указанием столбцов, заполнять значения по-умолчанию только для столбцов с явными DEFAULT-ами. */ \ M(SettingBool, strict_insert_defaults, 0) \ + \ + /** В случае превышения максимального размера mark_cache, удалять только записи, старше чем mark_cache_min_lifetime секунд. */ \ + M(SettingUInt64, mark_cache_min_lifetime, 10000) \ /// Всевозможные ограничения на выполнение запроса. Limits limits; diff --git a/dbms/include/DB/Storages/MarkCache.h b/dbms/include/DB/Storages/MarkCache.h index 00c1b0c0755..f80a546974a 100644 --- a/dbms/include/DB/Storages/MarkCache.h +++ b/dbms/include/DB/Storages/MarkCache.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -47,14 +48,14 @@ struct MarksWeightFunction /** Кэш засечек в столбце из StorageMergeTree. */ -class MarkCache : public LRUCache +class MarkCache : public LRUCache { private: - typedef LRUCache Base; + typedef LRUCache Base; public: - MarkCache(size_t max_size_in_bytes) - : Base(max_size_in_bytes) {} + MarkCache(size_t max_size_in_bytes, const Delay & expiration_delay) + : Base(max_size_in_bytes, expiration_delay) {} /// Посчитать ключ от пути к файлу и смещения. static UInt128 hash(const String & path_to_file) diff --git a/dbms/src/Common/tests/lru_cache.cpp b/dbms/src/Common/tests/lru_cache.cpp index f7205021196..dd6abca36be 100644 --- a/dbms/src/Common/tests/lru_cache.cpp +++ b/dbms/src/Common/tests/lru_cache.cpp @@ -1,8 +1,76 @@ #include +#include + #include #include +#include +#include -using namespace DB; +namespace +{ + +void run(); +void runTest(unsigned int num, const std::function func); +bool test1(); +bool test2(); + +#define ASSERT_CHECK(cond, res) \ +do \ +{ \ + if (!(cond)) \ + { \ + std::cout << __FILE__ << ":" << __LINE__ << ":" \ + << "Assertion " << #cond << " failed.\n"; \ + if ((res)) { (res) = false; } \ + } \ +} \ +while (0) + +void run() +{ + const std::vector > tests = + { + test1, + test2 + }; + + unsigned int num = 0; + for (const auto & test : tests) + { + ++num; + runTest(num, test); + } +} + +void runTest(unsigned int num, const std::function func) +{ + bool ok; + + try + { + ok = func(); + } + catch (const DB::Exception & ex) + { + ok = false; + std::cout << "Caught exception " << ex.displayText() << "\n"; + } + catch (const std::exception & ex) + { + ok = false; + std::cout << "Caught exception " << ex.what() << "\n"; + } + catch (...) + { + ok = false; + std::cout << "Caught unhandled exception\n"; + } + + if (ok) + std::cout << "Test " << num << " passed\n"; + else + std::cout << "Test " << num << " failed\n"; +} struct Weight { @@ -12,52 +80,108 @@ struct Weight } }; -void fail() +bool test1() { - std::cout << "failed" << std::endl; - exit(1); + using Cache = DB::LRUCache, Weight>; + using MappedPtr = Cache::MappedPtr; + + auto ptr = [](const std::string & s) + { + return MappedPtr(new std::string(s)); + }; + + Cache cache(10); + + bool res = true; + + ASSERT_CHECK(!cache.get("asd"), res); + + cache.set("asd", ptr("qwe")); + + ASSERT_CHECK((*cache.get("asd") == "qwe"), res); + + cache.set("zxcv", ptr("12345")); + cache.set("01234567891234567", ptr("--")); + + ASSERT_CHECK((*cache.get("zxcv") == "12345"), res); + ASSERT_CHECK((*cache.get("asd") == "qwe"), res); + ASSERT_CHECK((*cache.get("01234567891234567") == "--"), res); + ASSERT_CHECK(!cache.get("123x"), res); + + cache.set("321x", ptr("+")); + + ASSERT_CHECK(!cache.get("zxcv"), res); + ASSERT_CHECK((*cache.get("asd") == "qwe"), res); + ASSERT_CHECK((*cache.get("01234567891234567") == "--"), res); + ASSERT_CHECK(!cache.get("123x"), res); + ASSERT_CHECK((*cache.get("321x") == "+"), res); + + ASSERT_CHECK((cache.weight() == 6), res); + ASSERT_CHECK((cache.count() == 3), res); + + return res; } -typedef LRUCache, Weight> Cache; -typedef Cache::MappedPtr MappedPtr; - -MappedPtr ptr(const std::string & s) +bool test2() { - return MappedPtr(new std::string(s)); + using namespace std::literals; + using Cache = DB::LRUCache, Weight, DB::CellAging>; + using MappedPtr = Cache::MappedPtr; + + auto ptr = [](const std::string & s) + { + return MappedPtr(new std::string(s)); + }; + + Cache cache(10, 3s); + + bool res = true; + + ASSERT_CHECK(!cache.get("asd"), res); + + cache.set("asd", ptr("qwe")); + + ASSERT_CHECK((*cache.get("asd") == "qwe"), res); + + cache.set("zxcv", ptr("12345")); + cache.set("01234567891234567", ptr("--")); + + ASSERT_CHECK((*cache.get("zxcv") == "12345"), res); + ASSERT_CHECK((*cache.get("asd") == "qwe"), res); + ASSERT_CHECK((*cache.get("01234567891234567") == "--"), res); + ASSERT_CHECK(!cache.get("123x"), res); + + cache.set("321x", ptr("+")); + + ASSERT_CHECK((cache.get("zxcv")), res); + ASSERT_CHECK((*cache.get("asd") == "qwe"), res); + ASSERT_CHECK((*cache.get("01234567891234567") == "--"), res); + ASSERT_CHECK(!cache.get("123x"), res); + ASSERT_CHECK((*cache.get("321x") == "+"), res); + + ASSERT_CHECK((cache.weight() == 11), res); + ASSERT_CHECK((cache.count() == 4), res); + + std::this_thread::sleep_for(5s); + + cache.set("123x", ptr("2769")); + + ASSERT_CHECK(!cache.get("zxcv"), res); + ASSERT_CHECK((*cache.get("asd") == "qwe"), res); + ASSERT_CHECK((*cache.get("01234567891234567") == "--"), res); + ASSERT_CHECK((*cache.get("321x") == "+"), res); + + ASSERT_CHECK((cache.weight() == 10), res); + ASSERT_CHECK((cache.count() == 4), res); + + return res; +} + } int main() { - try - { - Cache cache(10); - - if (cache.get("asd")) fail(); - cache.set("asd", ptr("qwe")); - if (*cache.get("asd") != "qwe") fail(); - cache.set("zxcv", ptr("12345")); - cache.set("01234567891234567", ptr("--")); - if (*cache.get("zxcv") != "12345") fail(); - if (*cache.get("asd") != "qwe") fail(); - if (*cache.get("01234567891234567") != "--") fail(); - if (cache.get("123x")) fail(); - cache.set("321x", ptr("+")); - if (cache.get("zxcv")) fail(); - - if (*cache.get("asd") != "qwe") fail(); - if (*cache.get("01234567891234567") != "--") fail(); - if (cache.get("123x")) fail(); - if (*cache.get("321x") != "+") fail(); - - if (cache.weight() != 6) fail(); - if (cache.count() != 3) fail(); - - std::cout << "passed" << std::endl; - } - catch (...) - { - fail(); - } - + run(); return 0; } + diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 5910ab3be40..f8152e9c835 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -730,7 +731,7 @@ void Context::setMarkCache(size_t cache_size_in_bytes) if (shared->mark_cache) throw Exception("Uncompressed cache has been already created.", ErrorCodes::LOGICAL_ERROR); - shared->mark_cache.reset(new MarkCache(cache_size_in_bytes)); + shared->mark_cache.reset(new MarkCache(cache_size_in_bytes, std::chrono::seconds(settings.mark_cache_min_lifetime))); } MarkCachePtr Context::getMarkCache() const From ac6a365ea1eae75d6041118fa8ab30e721e0c3a5 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 7 May 2015 15:28:09 +0300 Subject: [PATCH 030/116] Greatly simplfied code. [#METR-15652] --- dbms/include/DB/Common/BaseCellAging.h | 26 --------------- dbms/include/DB/Common/CellAging.h | 26 --------------- dbms/include/DB/Common/LRUCache.h | 40 ++++++++++++++--------- dbms/include/DB/Common/TrivialCellAging.h | 18 ---------- dbms/include/DB/Storages/MarkCache.h | 5 ++- dbms/src/Common/tests/lru_cache.cpp | 3 +- 6 files changed, 27 insertions(+), 91 deletions(-) delete mode 100644 dbms/include/DB/Common/BaseCellAging.h delete mode 100644 dbms/include/DB/Common/CellAging.h delete mode 100644 dbms/include/DB/Common/TrivialCellAging.h diff --git a/dbms/include/DB/Common/BaseCellAging.h b/dbms/include/DB/Common/BaseCellAging.h deleted file mode 100644 index 1da3acd3bb4..00000000000 --- a/dbms/include/DB/Common/BaseCellAging.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -namespace DB -{ - -/** Базовый класс для управления временем жизнью элементов кэша. - */ -template -class BaseCellAging -{ -public: - using Timestamp = TTimestamp; - using Delay = TDelay; - -public: - virtual ~BaseCellAging() = default; - /// Обновить timestamp элемента кэша. - virtual const Timestamp & update() = 0; - /// Просрочен ли элемент кэша? Срок истечения годности задается в секундах. - virtual bool expired(const Timestamp & last_timestamp, const Delay & expiration_delay) const = 0; - -protected: - Timestamp timestamp = Timestamp(); -}; - -} diff --git a/dbms/include/DB/Common/CellAging.h b/dbms/include/DB/Common/CellAging.h deleted file mode 100644 index a947e6ced74..00000000000 --- a/dbms/include/DB/Common/CellAging.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -/** Класс для управления временем жизнью элементов кэша. - */ -class CellAging final : public BaseCellAging -{ -public: - const Timestamp & update() override - { - timestamp = std::chrono::steady_clock::now(); - return timestamp; - } - - bool expired(const Timestamp & last_timestamp, const Delay & expiration_delay) const override - { - return (last_timestamp > timestamp) && ((last_timestamp - timestamp) > expiration_delay); - } -}; - -} diff --git a/dbms/include/DB/Common/LRUCache.h b/dbms/include/DB/Common/LRUCache.h index 677685c62fb..a4a8f058db0 100644 --- a/dbms/include/DB/Common/LRUCache.h +++ b/dbms/include/DB/Common/LRUCache.h @@ -3,11 +3,11 @@ #include #include #include +#include #include #include #include #include -#include namespace DB { @@ -26,24 +26,21 @@ struct TrivialWeightFunction * Кеш начинает выбрасывать значения, когда их суммарный вес превышает max_size и срок годности этих значений истёк. * После вставки значения его вес не должен меняться. */ -template , - typename WeightFunction = TrivialWeightFunction, - typename TCellAging = TrivialCellAging> +template , typename WeightFunction = TrivialWeightFunction > class LRUCache { public: using Key = TKey; using Mapped = TMapped; using MappedPtr = std::shared_ptr; + using Delay = std::chrono::seconds; private: - using CacheCellAging = TCellAging; + using Clock = std::chrono::steady_clock; + using Timestamp = Clock::time_point; public: - using Delay = typename CacheCellAging::Delay; - - LRUCache(size_t max_size_, const Delay & expiration_delay_ = Delay()) + LRUCache(size_t max_size_, const Delay & expiration_delay_ = Delay::zero()) : max_size(std::max(1ul, max_size_)), expiration_delay(expiration_delay_) {} MappedPtr get(const Key & key) @@ -59,8 +56,7 @@ public: ++hits; Cell & cell = it->second; - - (void) cell.aging.update(); + updateCellTimestamp(cell); /// Переместим ключ в конец очереди. Итератор остается валидным. queue.splice(queue.end(), queue, cell.queue_iterator); @@ -92,9 +88,9 @@ public: cell.value = mapped; cell.size = cell.value ? weight_function(*cell.value) : 0; current_size += cell.size; + updateCellTimestamp(cell); - const auto & last_timestamp = cell.aging.update(); - removeOverflow(last_timestamp); + removeOverflow(cell.timestamp); } void getStats(size_t & out_hits, size_t & out_misses) const @@ -137,10 +133,18 @@ private: struct Cell { + public: + bool expired(const Timestamp & last_timestamp, const Delay & expiration_delay) const + { + return (expiration_delay == Delay::zero()) || + ((last_timestamp > timestamp) && ((last_timestamp - timestamp) > expiration_delay)); + } + + public: MappedPtr value; size_t size; LRUQueueIterator queue_iterator; - CacheCellAging aging; + Timestamp timestamp; }; using Cells = std::unordered_map; @@ -159,7 +163,11 @@ private: WeightFunction weight_function; - using Timestamp = typename CacheCellAging::Timestamp; + void updateCellTimestamp(Cell & cell) + { + if (expiration_delay != Delay::zero()) + cell.timestamp = Clock::now(); + } void removeOverflow(const Timestamp & last_timestamp) { @@ -174,7 +182,7 @@ private: ErrorCodes::LOGICAL_ERROR); const auto & cell = it->second; - if (!cell.aging.expired(last_timestamp, expiration_delay)) + if (!cell.expired(last_timestamp, expiration_delay)) break; current_size -= cell.size; diff --git a/dbms/include/DB/Common/TrivialCellAging.h b/dbms/include/DB/Common/TrivialCellAging.h deleted file mode 100644 index b83ff8d7a17..00000000000 --- a/dbms/include/DB/Common/TrivialCellAging.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -/** Класс производный от BaseCellAging для тех случаев, когда на самом деле кэш не нуждается - * в управлении временем жизнью своих элементов. Ничего не делает. - */ -class TrivialCellAging final : public BaseCellAging -{ -public: - const Timestamp & update() override { return timestamp; } - bool expired(const Timestamp & last_timestamp, const Delay & expiration_delay) const override { return true; } -}; - -} diff --git a/dbms/include/DB/Storages/MarkCache.h b/dbms/include/DB/Storages/MarkCache.h index f80a546974a..2bea8630824 100644 --- a/dbms/include/DB/Storages/MarkCache.h +++ b/dbms/include/DB/Storages/MarkCache.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -48,10 +47,10 @@ struct MarksWeightFunction /** Кэш засечек в столбце из StorageMergeTree. */ -class MarkCache : public LRUCache +class MarkCache : public LRUCache { private: - typedef LRUCache Base; + typedef LRUCache Base; public: MarkCache(size_t max_size_in_bytes, const Delay & expiration_delay) diff --git a/dbms/src/Common/tests/lru_cache.cpp b/dbms/src/Common/tests/lru_cache.cpp index dd6abca36be..6a52f32ed54 100644 --- a/dbms/src/Common/tests/lru_cache.cpp +++ b/dbms/src/Common/tests/lru_cache.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -125,7 +124,7 @@ bool test1() bool test2() { using namespace std::literals; - using Cache = DB::LRUCache, Weight, DB::CellAging>; + using Cache = DB::LRUCache, Weight>; using MappedPtr = Cache::MappedPtr; auto ptr = [](const std::string & s) From 51711df1c06b07a688e107c4acd5c891cca2a0d9 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 7 May 2015 15:28:41 +0300 Subject: [PATCH 031/116] Removed blank. [#METR-15652] --- dbms/include/DB/Common/LRUCache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Common/LRUCache.h b/dbms/include/DB/Common/LRUCache.h index a4a8f058db0..4945b04bac6 100644 --- a/dbms/include/DB/Common/LRUCache.h +++ b/dbms/include/DB/Common/LRUCache.h @@ -136,7 +136,7 @@ private: public: bool expired(const Timestamp & last_timestamp, const Delay & expiration_delay) const { - return (expiration_delay == Delay::zero()) || + return (expiration_delay == Delay::zero()) || ((last_timestamp > timestamp) && ((last_timestamp - timestamp) > expiration_delay)); } From be67de26b5006186758bf0f48082aa4bdd891ea2 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 29 Apr 2015 23:31:28 +0300 Subject: [PATCH 032/116] dbms: sequenceMatch aggregate function draft, temporal conditions currently ignored [#METR-16058] --- .../AggregateFunctionSequenceMatch.h | 422 ++++++++++++++++++ .../AggregateFunctionFactory.cpp | 11 +- 2 files changed, 432 insertions(+), 1 deletion(-) create mode 100644 dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h new file mode 100644 index 00000000000..1e0abd17530 --- /dev/null +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -0,0 +1,422 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +/// helper type for comparing `std::pair`s using solely the .first member +template