From 6d71c367edfa0594e7404d16ea1fbc45dcc5b1b6 Mon Sep 17 00:00:00 2001 From: Pavel Kartavyy Date: Thu, 28 May 2015 17:08:31 +0300 Subject: [PATCH 01/67] WriteHelpers: add template specialization for const char * [#METR-15514] --- dbms/include/DB/IO/WriteHelpers.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/dbms/include/DB/IO/WriteHelpers.h b/dbms/include/DB/IO/WriteHelpers.h index 8e2b73dcd86..08e6e43da9a 100644 --- a/dbms/include/DB/IO/WriteHelpers.h +++ b/dbms/include/DB/IO/WriteHelpers.h @@ -252,13 +252,19 @@ void writeAnyEscapedString(const String & s, WriteBuffer & buf) } -inline void writeEscapedString(const String & s, WriteBuffer & buf) +inline void writeEscapedString(const char * str, size_t size, WriteBuffer & buf) { /// strpbrk в libc под Linux на процессорах с SSE 4.2 хорошо оптимизирована (этот if ускоряет код в 1.5 раза) - if (nullptr == strpbrk(s.data(), "\b\f\n\r\t\'\\") && strlen(s.data()) == s.size()) - writeString(s, buf); + if (nullptr == strpbrk(str, "\b\f\n\r\t\'\\") && strlen(str) == size) + writeString(str, size, buf); else - writeAnyEscapedString<'\''>(s, buf); + writeAnyEscapedString<'\''>(str, str + size, buf); +} + + +inline void writeEscapedString(const String & s, WriteBuffer & buf) +{ + writeEscapedString(s.data(), s.size(), buf); } @@ -470,6 +476,10 @@ inline void writeText(const Float32 & x, WriteBuffer & buf) { writeFloatText(x, inline void writeText(const Float64 & x, WriteBuffer & buf) { writeFloatText(x, buf); } inline void writeText(const String & x, WriteBuffer & buf) { writeEscapedString(x, buf); } inline void writeText(const bool & x, WriteBuffer & buf) { writeBoolText(x, buf); } +/// в отличие от метода для std::string +/// здесь предполагается, что x null-terminated строка. +inline void writeText(const char * x, WriteBuffer & buf) { writeEscapedString(x, strlen(x), buf); } +inline void writeText(const char * x, size_t size, WriteBuffer & buf) { writeEscapedString(x, size, buf); } inline void writeText(const VisitID_t & x, WriteBuffer & buf) { writeIntText(static_cast(x), buf); } inline void writeText(const mysqlxx::Date & x, WriteBuffer & buf) { writeDateText(x, buf); } From 4463a747dace754a1ffa3bae03f84166eb078dcb Mon Sep 17 00:00:00 2001 From: Pavel Kartavyy Date: Wed, 3 Jun 2015 18:32:06 +0300 Subject: [PATCH 02/67] ReadHelpers: add assertChar function [#METR-15514] --- dbms/include/DB/IO/ReadHelpers.h | 1 + dbms/src/IO/ReadHelpers.cpp | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/dbms/include/DB/IO/ReadHelpers.h b/dbms/include/DB/IO/ReadHelpers.h index ef4a9eeed0b..12302a33436 100644 --- a/dbms/include/DB/IO/ReadHelpers.h +++ b/dbms/include/DB/IO/ReadHelpers.h @@ -105,6 +105,7 @@ void readVectorBinary(std::vector & v, ReadBuffer & buf, size_t MAX_VECTOR_SI void assertString(const char * s, ReadBuffer & buf); void assertEOF(ReadBuffer & buf); +void assertChar(char symbol, ReadBuffer & buf); inline void assertString(const String & s, ReadBuffer & buf) { diff --git a/dbms/src/IO/ReadHelpers.cpp b/dbms/src/IO/ReadHelpers.cpp index c392ba094c8..c16d547ca9f 100644 --- a/dbms/src/IO/ReadHelpers.cpp +++ b/dbms/src/IO/ReadHelpers.cpp @@ -36,6 +36,16 @@ void assertString(const char * s, ReadBuffer & buf) } } +void assertChar(char symbol, ReadBuffer & buf) +{ + if (buf.eof() || *buf.position() != symbol) + { + char err[2] = {symbol, '\0'}; + throwAtAssertionFailed(err, buf); + } + ++buf.position(); +} + void assertEOF(ReadBuffer & buf) { if (!buf.eof()) From 67944de74e3b5357adb37e779e078f28467b425d Mon Sep 17 00:00:00 2001 From: Pavel Kartavyy Date: Tue, 9 Jun 2015 15:30:30 +0300 Subject: [PATCH 03/67] zookeeper: add createOrUpdate method [#METR-15514] --- libs/libzkutil/include/zkutil/ZooKeeper.h | 3 +++ libs/libzkutil/src/ZooKeeper.cpp | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/libs/libzkutil/include/zkutil/ZooKeeper.h b/libs/libzkutil/include/zkutil/ZooKeeper.h index 2eb6f8a12a0..a402470e8bf 100644 --- a/libs/libzkutil/include/zkutil/ZooKeeper.h +++ b/libs/libzkutil/include/zkutil/ZooKeeper.h @@ -126,6 +126,9 @@ public: void set(const std::string & path, const std::string & data, int32_t version = -1, Stat * stat = nullptr); + /** Создает ноду, если ее не существует. Иначе обновляет */ + void createOrUpdate(const std::string & path, const std::string & data, int32_t mode); + /** Не бросает исключение при следующих ошибках: * - Такой ноды нет. * - У ноды другая версия. diff --git a/libs/libzkutil/src/ZooKeeper.cpp b/libs/libzkutil/src/ZooKeeper.cpp index 506e4198e62..68fcd2269c1 100644 --- a/libs/libzkutil/src/ZooKeeper.cpp +++ b/libs/libzkutil/src/ZooKeeper.cpp @@ -418,6 +418,17 @@ void ZooKeeper::set(const std::string & path, const std::string & data, int32_t check(trySet(path, data, version, stat), path); } +void ZooKeeper::createOrUpdate(const std::string & path, const std::string & data, int32_t mode) +{ + int code = trySet(path, data, -1); + if (code == ZNONODE) + { + create(path, data, mode); + } + else + throw zkutil::KeeperException(code, path); +} + int32_t ZooKeeper::trySet(const std::string & path, const std::string & data, int32_t version, Stat * stat_) { From e201f39f7fd7523af6385414e12115b1fb8da402 Mon Sep 17 00:00:00 2001 From: Pavel Kartavyy Date: Wed, 10 Jun 2015 15:28:42 +0300 Subject: [PATCH 04/67] Zookeeper: style fix --- libs/libzkutil/src/ZooKeeper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/libzkutil/src/ZooKeeper.cpp b/libs/libzkutil/src/ZooKeeper.cpp index 68fcd2269c1..998e57e0a3a 100644 --- a/libs/libzkutil/src/ZooKeeper.cpp +++ b/libs/libzkutil/src/ZooKeeper.cpp @@ -390,7 +390,7 @@ bool ZooKeeper::tryGet(const std::string & path, std::string & res, Stat * stat_ { int32_t code = retry(std::bind(&ZooKeeper::getImpl, this, std::ref(path), std::ref(res), stat_, watch)); - if (!( code == ZOK || + if (!(code == ZOK || code == ZNONODE)) throw KeeperException(code, path); From dd05019fe2d9ac3b8ab9bf5086fb96b34a35ae53 Mon Sep 17 00:00:00 2001 From: Pavel Kartavyy Date: Wed, 10 Jun 2015 15:44:55 +0300 Subject: [PATCH 05/67] cloud: fix error [#METR-15514] --- libs/libzkutil/src/ZooKeeper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/libzkutil/src/ZooKeeper.cpp b/libs/libzkutil/src/ZooKeeper.cpp index 998e57e0a3a..06ed54d65d4 100644 --- a/libs/libzkutil/src/ZooKeeper.cpp +++ b/libs/libzkutil/src/ZooKeeper.cpp @@ -425,7 +425,7 @@ void ZooKeeper::createOrUpdate(const std::string & path, const std::string & dat { create(path, data, mode); } - else + else if (code != ZOK) throw zkutil::KeeperException(code, path); } From 5c7dccebc1edde47f1100991f46b100a0e810660 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2015 21:50:44 +0300 Subject: [PATCH 06/67] dbms: allowed to compare Date and DateTime with strings in IN [#METR-2944]. --- dbms/src/Interpreters/Set.cpp | 34 ++++++++++++++++--- ..._time_with_constant_string_in_in.reference | 8 +++++ ...e_date_time_with_constant_string_in_in.sql | 8 +++++ 3 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00174_compare_date_time_with_constant_string_in_in.reference create mode 100644 dbms/tests/queries/0_stateless/00174_compare_date_time_with_constant_string_in_in.sql diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 08a6c6e0564..1f1b0cca266 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -336,11 +336,37 @@ static Field convertToType(const Field & src, const IDataType & type) } else if (is_date || is_datetime) { - if (src.getType() != Field::Types::UInt64) - throw Exception("Type mismatch in IN section: " + type.getName() + " at left, " - + Field::Types::toString(src.getType()) + " at right"); + if (src.getType() == Field::Types::UInt64) + return src; - return src; + if (src.getType() == Field::Types::String) + { + /// Возможность сравнивать даты и даты-с-временем со строкой. + const String & str = src.get(); + ReadBufferFromString in(str); + + if (is_date) + { + DayNum_t date{}; + readDateText(date, in); + if (!in.eof()) + throw Exception("String is too long for Date: " + str); + + return Field(UInt64(date)); + } + else + { + time_t date_time{}; + readDateTimeText(date_time, in); + if (!in.eof()) + throw Exception("String is too long for DateTime: " + str); + + return Field(UInt64(date_time)); + } + } + + throw Exception("Type mismatch in IN section: " + type.getName() + " at left, " + + Field::Types::toString(src.getType()) + " at right"); } } else diff --git a/dbms/tests/queries/0_stateless/00174_compare_date_time_with_constant_string_in_in.reference b/dbms/tests/queries/0_stateless/00174_compare_date_time_with_constant_string_in_in.reference new file mode 100644 index 00000000000..1173f1db5af --- /dev/null +++ b/dbms/tests/queries/0_stateless/00174_compare_date_time_with_constant_string_in_in.reference @@ -0,0 +1,8 @@ +1 +0 +1 +0 +0 +1 +0 +1 diff --git a/dbms/tests/queries/0_stateless/00174_compare_date_time_with_constant_string_in_in.sql b/dbms/tests/queries/0_stateless/00174_compare_date_time_with_constant_string_in_in.sql new file mode 100644 index 00000000000..565163cfc31 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00174_compare_date_time_with_constant_string_in_in.sql @@ -0,0 +1,8 @@ +SELECT toDate('2015-02-05') IN ('2015-02-04', '2015-02-05'); +SELECT toDate('2015-02-05') IN ('2015-02-04', '2015-02-06'); +SELECT toDateTime('2015-02-03 04:05:06') IN ('2015-02-03 04:05:06', '2015-02-03 05:06:07'); +SELECT toDateTime('2015-02-03 04:05:06') IN ('2015-02-04 04:05:06', '2015-02-03 05:06:07'); +SELECT toDate('2015-02-05') NOT IN ('2015-02-04', '2015-02-05'); +SELECT toDate('2015-02-05') NOT IN ('2015-02-04', '2015-02-06'); +SELECT toDateTime('2015-02-03 04:05:06') NOT IN ('2015-02-03 04:05:06', '2015-02-03 05:06:07'); +SELECT toDateTime('2015-02-03 04:05:06') NOT IN ('2015-02-04 04:05:06', '2015-02-03 05:06:07'); From 73072b58c7b043052002354d7d2945230a5325a1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jun 2015 00:41:47 +0300 Subject: [PATCH 07/67] dbms: fixed comment [#METR-2944]. --- dbms/include/DB/Interpreters/InterpreterSelectQuery.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Interpreters/InterpreterSelectQuery.h b/dbms/include/DB/Interpreters/InterpreterSelectQuery.h index 405691fa966..9152ff9eb5e 100644 --- a/dbms/include/DB/Interpreters/InterpreterSelectQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterSelectQuery.h @@ -100,7 +100,7 @@ private: // Переименовать столбцы каждого запроса цепочки UNION ALL в такие же имена, как в первом запросе. void renameColumns(); - /** Из какой таблицы читать. JOIN-ы не поддерживаются. + /** Из какой таблицы читать. При JOIN, возвращается "левая" таблицы. */ void getDatabaseAndTableNames(String & database_name, String & table_name); From bb83c867fd69e5e19e4f54320eafc0605bb04d41 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jun 2015 00:42:18 +0300 Subject: [PATCH 08/67] dbms: added support for Array arguments of function 'if' (incomplete) [#METR-16700]. --- .../DB/Functions/FunctionsConditional.h | 490 ++++++++++++++++-- 1 file changed, 440 insertions(+), 50 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsConditional.h b/dbms/include/DB/Functions/FunctionsConditional.h index 6c5ae79c3cf..c2f44b47ff3 100644 --- a/dbms/include/DB/Functions/FunctionsConditional.h +++ b/dbms/include/DB/Functions/FunctionsConditional.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -14,7 +15,7 @@ namespace DB /** Функция выбора по условию: if(cond, then, else). * cond - UInt8 - * then, else - либо числа/даты/даты-с-временем, либо строки. + * then, else - числовые типы, для которых есть общий тип, либо даты, даты-с-временем, либо строки, либо массивы таких типов. */ @@ -275,6 +276,224 @@ struct StringIfImpl }; +template +struct NumArrayIfImpl +{ + template + static ALWAYS_INLINE void copy_from_vector( + size_t i, + const PODArray & from_data, const ColumnArray::Offsets_t & from_offsets, ColumnArray::Offset_t from_prev_offset, + PODArray & to_data, ColumnArray::Offsets_t & to_offsets, ColumnArray::Offset_t & to_prev_offset) + { + size_t size_to_write = from_offsets[i] - from_prev_offset; + to_data.resize(to_data.size() + size_to_write); + + for (size_t i = 0; i < size_to_write; ++i) + to_data[to_prev_offset + i] = static_cast(from_data[from_prev_offset + i]); + + to_prev_offset += size_to_write; + to_offsets[i] = to_prev_offset; + } + + template + static ALWAYS_INLINE void copy_from_constant( + size_t i, + const PODArray & from_data, + PODArray & to_data, ColumnArray::Offsets_t & to_offsets, ColumnArray::Offset_t & to_prev_offset) + { + size_t size_to_write = from_data.size(); + to_data.resize(to_data.size() + size_to_write); + memcpy(&to_data[to_prev_offset], from_data.data(), size_to_write * sizeof(from_data[0])); + to_prev_offset += size_to_write; + to_offsets[i] = to_prev_offset; + } + + static void create_result_column( + Block & block, size_t result, + PODArray ** c_data, ColumnArray::Offsets_t ** c_offsets) + { + ColumnVector * col_res_vec = new ColumnVector; + ColumnArray * col_res_array = new ColumnArray(col_res_vec); + block.getByPosition(result).column = col_res_array; + + *c_data = &col_res_vec->getData(); + *c_offsets = &col_res_array->getOffsets(); + } + + + static void vector_vector( + const PODArray & cond, + const PODArray & a_data, const ColumnArray::Offsets_t & a_offsets, + const PODArray & b_data, const ColumnArray::Offsets_t & b_offsets, + Block & block, size_t result) + { + PODArray * c_data = nullptr; + ColumnArray::Offsets_t * c_offsets = nullptr; + create_result_column(block, result, &c_data, &c_offsets); + + size_t size = cond.size(); + c_offsets->resize(size); + c_data->reserve(std::max(a_data.size(), b_data.size())); + + ColumnArray::Offset_t a_prev_offset = 0; + ColumnArray::Offset_t b_prev_offset = 0; + ColumnArray::Offset_t c_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (cond[i]) + copy_from_vector(i, a_data, a_offsets, a_prev_offset, *c_data, *c_offsets, c_prev_offset); + else + copy_from_vector(i, b_data, b_offsets, b_prev_offset, *c_data, *c_offsets, c_prev_offset); + + a_prev_offset = a_offsets[i]; + b_prev_offset = b_offsets[i]; + } + } + + static void vector_constant( + const PODArray & cond, + const PODArray & a_data, const ColumnArray::Offsets_t & a_offsets, + const Array & b, + Block & block, size_t result) + { + PODArray * c_data = nullptr; + ColumnArray::Offsets_t * c_offsets = nullptr; + create_result_column(block, result, &c_data, &c_offsets); + + PODArray b_converted(b.size()); + for (size_t i = 0, size = b.size(); i < size; ++i) + b_converted[i] = b[i].get::Type>(); + + size_t size = cond.size(); + c_offsets->resize(size); + c_data->reserve(a_data.size()); + + ColumnArray::Offset_t a_prev_offset = 0; + ColumnArray::Offset_t c_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (cond[i]) + copy_from_vector(i, a_data, a_offsets, a_prev_offset, *c_data, *c_offsets, c_prev_offset); + else + copy_from_constant(i, b_converted, *c_data, *c_offsets, c_prev_offset); + + a_prev_offset = a_offsets[i]; + } + } + + static void constant_vector( + const PODArray & cond, + const Array & a, + const PODArray & b_data, const ColumnArray::Offsets_t & b_offsets, + Block & block, size_t result) + { + PODArray * c_data = nullptr; + ColumnArray::Offsets_t * c_offsets = nullptr; + create_result_column(block, result, &c_data, &c_offsets); + + PODArray a_converted(a.size()); + for (size_t i = 0, size = a.size(); i < size; ++i) + a_converted[i] = a[i].get::Type>(); + + size_t size = cond.size(); + c_offsets->resize(size); + c_data->reserve(b_data.size()); + + ColumnArray::Offset_t b_prev_offset = 0; + ColumnArray::Offset_t c_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (cond[i]) + copy_from_constant(i, a_converted, *c_data, *c_offsets, c_prev_offset); + else + copy_from_vector(i, b_data, b_offsets, b_prev_offset, *c_data, *c_offsets, c_prev_offset); + + b_prev_offset = b_offsets[i]; + } + } + + static void constant_constant( + const PODArray & cond, + const Array & a, const Array & b, + Block & block, size_t result) + { + PODArray * c_data = nullptr; + ColumnArray::Offsets_t * c_offsets = nullptr; + create_result_column(block, result, &c_data, &c_offsets); + + PODArray a_converted(a.size()); + for (size_t i = 0, size = a.size(); i < size; ++i) + a_converted[i] = a[i].get::Type>(); + + PODArray b_converted(b.size()); + for (size_t i = 0, size = b.size(); i < size; ++i) + b_converted[i] = b[i].get::Type>(); + + size_t size = cond.size(); + c_offsets->resize(size); + c_data->reserve((std::max(a.size(), b.size())) * size); + + ColumnArray::Offset_t c_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (cond[i]) + copy_from_constant(i, a_converted, *c_data, *c_offsets, c_prev_offset); + else + copy_from_constant(i, b_converted, *c_data, *c_offsets, c_prev_offset); + } + } +}; + +template +struct NumArrayIfImpl +{ +private: + static void throw_error() + { + throw Exception("Internal logic error: invalid types of arguments 2 and 3 of if", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } +public: + static void vector_vector( + const PODArray & cond, + const PODArray & a_data, const ColumnArray::Offsets_t & a_offsets, + const PODArray & b_data, const ColumnArray::Offsets_t & b_offsets, + Block & block, size_t result) + { + throw_error(); + } + + static void vector_constant( + const PODArray & cond, + const PODArray & a_data, const ColumnArray::Offsets_t & a_offsets, + const Array & b, + Block & block, size_t result) + { + throw_error(); + } + + static void constant_vector( + const PODArray & cond, + const Array & a, + const PODArray & b_data, const ColumnArray::Offsets_t & b_offsets, + Block & block, size_t result) + { + throw_error(); + } + + static void constant_constant( + const PODArray & cond, + const Array & a, const Array & b, + Block & block, size_t result) + { + throw_error(); + } +}; + + template struct DataTypeFromFieldTypeOrError { @@ -347,8 +566,8 @@ private: size_t result, const ColumnVector * col_left) { - ColumnVector * col_right_vec = typeid_cast *>(&*block.getByPosition(arguments[2]).column); - ColumnConst * col_right_const = typeid_cast *>(&*block.getByPosition(arguments[2]).column); + const ColumnVector * col_right_vec = typeid_cast *>(&*block.getByPosition(arguments[2]).column); + const ColumnConst * col_right_const = typeid_cast *>(&*block.getByPosition(arguments[2]).column); if (!col_right_vec && !col_right_const) return false; @@ -371,8 +590,8 @@ private: size_t result, const ColumnConst * col_left) { - ColumnVector * col_right_vec = typeid_cast *>(&*block.getByPosition(arguments[2]).column); - ColumnConst * col_right_const = typeid_cast *>(&*block.getByPosition(arguments[2]).column); + const ColumnVector * col_right_vec = typeid_cast *>(&*block.getByPosition(arguments[2]).column); + const ColumnConst * col_right_const = typeid_cast *>(&*block.getByPosition(arguments[2]).column); if (!col_right_vec && !col_right_const) return false; @@ -387,10 +606,131 @@ private: return true; } + template + bool executeRightTypeArray( + const ColumnVector * cond_col, + Block & block, + const ColumnNumbers & arguments, + size_t result, + const ColumnArray * col_left_array, + const ColumnVector * col_left) + { + const IColumn * col_right_untyped = block.getByPosition(arguments[2]).column.get(); + + const ColumnArray * col_right_array = typeid_cast(col_right_untyped); + const ColumnConstArray * col_right_const_array = typeid_cast(col_right_untyped); + + if (!col_right_array && !col_right_const_array) + return false; + + typedef typename NumberTraits::ResultOfIf::Type ResultType; + + if (col_right_array) + { + std::cerr << "col_right_array\n"; + + const ColumnVector * col_right_vec = typeid_cast *>(&col_right_array->getData()); + + if (!col_right_vec) + return false; + + std::cerr << "!\n"; + + NumArrayIfImpl::vector_vector( + cond_col->getData(), + col_left->getData(), col_left_array->getOffsets(), + col_right_vec->getData(), col_right_array->getOffsets(), + block, result); + } + else + { + std::cerr << "col_right_const_array\n"; + + NumArrayIfImpl::vector_constant( + cond_col->getData(), + col_left->getData(), col_left_array->getOffsets(), + col_right_const_array->getData(), + block, result); + } + + return true; + } + + template + bool executeConstRightTypeArray( + const ColumnVector * cond_col, + Block & block, + const ColumnNumbers & arguments, + size_t result, + const ColumnConstArray * col_left_const_array) + { + const IColumn * col_right_untyped = block.getByPosition(arguments[2]).column.get(); + + const ColumnArray * col_right_array = typeid_cast(col_right_untyped); + const ColumnConstArray * col_right_const_array = typeid_cast(col_right_untyped); + + if (!col_right_array && !col_right_const_array) + return false; + + typedef typename NumberTraits::ResultOfIf::Type ResultType; + + if (col_right_array) + { + std::cerr << "col_right_array\n"; + + const ColumnVector * col_right_vec = typeid_cast *>(&col_right_array->getData()); + + if (!col_right_vec) + return false; + + std::cerr << "!\n"; + + NumArrayIfImpl::constant_vector( + cond_col->getData(), + col_left_const_array->getData(), + col_right_vec->getData(), col_right_array->getOffsets(), + block, result); + } + else + { + std::cerr << "col_right_const_array\n"; + + NumArrayIfImpl::constant_constant( + cond_col->getData(), + col_left_const_array->getData(), + col_right_const_array->getData(), + block, result); + } + + return true; + } + template bool executeLeftType(const ColumnVector * cond_col, Block & block, const ColumnNumbers & arguments, size_t result) { - if (ColumnVector * col_left = typeid_cast *>(&*block.getByPosition(arguments[1]).column)) + const IColumn * col_left_untyped = block.getByPosition(arguments[1]).column.get(); + + const ColumnVector * col_left = nullptr; + const ColumnConst * col_const_left = nullptr; + const ColumnArray * col_arr_left = nullptr; + const ColumnConstArray * col_const_arr_left = nullptr; + + col_left = typeid_cast *>(col_left_untyped); + if (!col_left) + { + col_const_left = typeid_cast *>(col_left_untyped); + if (!col_const_left) + { + col_arr_left = typeid_cast(col_left_untyped); + + if (col_arr_left) + col_left = typeid_cast *>(&col_arr_left->getData()); + else + col_const_arr_left = typeid_cast(col_left_untyped); + } + } + + if (col_left) { if ( executeRightType(cond_col, block, arguments, result, col_left) || executeRightType(cond_col, block, arguments, result, col_left) @@ -405,21 +745,61 @@ private: return true; else throw Exception("Illegal column " + block.getByPosition(arguments[2]).column->getName() - + " of third argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + + " of third argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); } - else if (ColumnConst * col_left = typeid_cast *>(&*block.getByPosition(arguments[1]).column)) + else if (col_const_left) { - if ( executeConstRightType(cond_col, block, arguments, result, col_left) - || executeConstRightType(cond_col, block, arguments, result, col_left) - || executeConstRightType(cond_col, block, arguments, result, col_left) - || executeConstRightType(cond_col, block, arguments, result, col_left) - || executeConstRightType(cond_col, block, arguments, result, col_left) - || executeConstRightType(cond_col, block, arguments, result, col_left) - || executeConstRightType(cond_col, block, arguments, result, col_left) - || executeConstRightType(cond_col, block, arguments, result, col_left) - || executeConstRightType(cond_col, block, arguments, result, col_left) - || executeConstRightType(cond_col, block, arguments, result, col_left)) + if ( executeConstRightType(cond_col, block, arguments, result, col_const_left) + || executeConstRightType(cond_col, block, arguments, result, col_const_left) + || executeConstRightType(cond_col, block, arguments, result, col_const_left) + || executeConstRightType(cond_col, block, arguments, result, col_const_left) + || executeConstRightType(cond_col, block, arguments, result, col_const_left) + || executeConstRightType(cond_col, block, arguments, result, col_const_left) + || executeConstRightType(cond_col, block, arguments, result, col_const_left) + || executeConstRightType(cond_col, block, arguments, result, col_const_left) + || executeConstRightType(cond_col, block, arguments, result, col_const_left) + || executeConstRightType(cond_col, block, arguments, result, col_const_left)) + return true; + else + throw Exception("Illegal column " + block.getByPosition(arguments[2]).column->getName() + + " of third argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + else if (col_arr_left && col_left) + { + std::cerr << "col_arr_left\n"; + + if ( executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left)) + return true; + else + throw Exception("Illegal column " + block.getByPosition(arguments[2]).column->getName() + + " of third argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + else if (col_const_arr_left) + { + std::cerr << "col_const_arr_left\n"; + + if ( executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left)) return true; else throw Exception("Illegal column " + block.getByPosition(arguments[2]).column->getName() @@ -432,10 +812,10 @@ private: bool executeString(const ColumnVector * cond_col, Block & block, const ColumnNumbers & arguments, size_t result) { - ColumnString * col_then = typeid_cast(&*block.getByPosition(arguments[1]).column); - ColumnString * col_else = typeid_cast(&*block.getByPosition(arguments[2]).column); - ColumnConstString * col_then_const = typeid_cast(&*block.getByPosition(arguments[1]).column); - ColumnConstString * col_else_const = typeid_cast(&*block.getByPosition(arguments[2]).column); + const ColumnString * col_then = typeid_cast(&*block.getByPosition(arguments[1]).column); + const ColumnString * col_else = typeid_cast(&*block.getByPosition(arguments[2]).column); + const ColumnConstString * col_then_const = typeid_cast(&*block.getByPosition(arguments[1]).column); + const ColumnConstString * col_else_const = typeid_cast(&*block.getByPosition(arguments[2]).column); ColumnString * col_res = new ColumnString; block.getByPosition(result).column = col_res; @@ -446,31 +826,31 @@ private: if (col_then && col_else) StringIfImpl::vector_vector( cond_col->getData(), - col_then->getChars(), col_then->getOffsets(), - col_else->getChars(), col_else->getOffsets(), - res_vec, res_offsets); - else if (col_then && col_else_const) - StringIfImpl::vector_constant( - cond_col->getData(), - col_then->getChars(), col_then->getOffsets(), - col_else_const->getData(), - res_vec, res_offsets); - else if (col_then_const && col_else) - StringIfImpl::constant_vector( - cond_col->getData(), - col_then_const->getData(), - col_else->getChars(), col_else->getOffsets(), - res_vec, res_offsets); - else if (col_then_const && col_else_const) - StringIfImpl::constant_constant( - cond_col->getData(), - col_then_const->getData(), - col_else_const->getData(), - res_vec, res_offsets); - else - return false; + col_then->getChars(), col_then->getOffsets(), + col_else->getChars(), col_else->getOffsets(), + res_vec, res_offsets); + else if (col_then && col_else_const) + StringIfImpl::vector_constant( + cond_col->getData(), + col_then->getChars(), col_then->getOffsets(), + col_else_const->getData(), + res_vec, res_offsets); + else if (col_then_const && col_else) + StringIfImpl::constant_vector( + cond_col->getData(), + col_then_const->getData(), + col_else->getChars(), col_else->getOffsets(), + res_vec, res_offsets); + else if (col_then_const && col_else_const) + StringIfImpl::constant_constant( + cond_col->getData(), + col_then_const->getData(), + col_else_const->getData(), + res_vec, res_offsets); + else + return false; - return true; + return true; } public: @@ -492,6 +872,9 @@ public: throw Exception("Illegal type of first argument (condition) of function if. Must be UInt8.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + const DataTypeArray * type_arr1 = typeid_cast(arguments[1].get()); + const DataTypeArray * type_arr2 = typeid_cast(arguments[2].get()); + if (arguments[1]->behavesAsNumber() && arguments[2]->behavesAsNumber()) { DataTypePtr type_res; @@ -509,6 +892,11 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return type_res; } + else if (type_arr1 && type_arr2) + { + /// NOTE Сообщения об ошибках будут относится к типам элементов массивов, что немного некорректно. + return new DataTypeArray(getReturnType({arguments[0], type_arr1->getNestedType(), type_arr2->getNestedType()})); + } else if (arguments[1]->getName() != arguments[2]->getName()) { throw Exception("Incompatible second and third arguments for function " + getName() + ": " @@ -542,6 +930,7 @@ public: cond_col = typeid_cast *>(&*materialized_cond_col); } } + if (cond_col) { if (!( executeLeftType(cond_col, block, arguments, result) @@ -558,11 +947,12 @@ public: throw Exception("Illegal columns " + block.getByPosition(arguments[1]).column->getName() + " and " + block.getByPosition(arguments[2]).column->getName() + " of second (then) and third (else) arguments of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + ErrorCodes::ILLEGAL_COLUMN); } else - throw Exception("Illegal column " + cond_col->getName() + " of first argument of function " + getName() + ". Must be ColumnUInt8 or ColumnConstUInt8.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Illegal column " + cond_col->getName() + " of first argument of function " + getName() + + ". Must be ColumnUInt8 or ColumnConstUInt8.", + ErrorCodes::ILLEGAL_COLUMN); } }; From 0e01dad0a3645a9f5b81fb17b57af6ce5cb732e7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jun 2015 05:02:53 +0300 Subject: [PATCH 09/67] dbms: added support for numeric arrays for arguments of function if [#METR-16700]. --- .../DB/Functions/FunctionsConditional.h | 60 ++-- .../0_stateless/00175_if_num_arrays.reference | 288 ++++++++++++++++++ .../0_stateless/00175_if_num_arrays.sql | 30 ++ 3 files changed, 345 insertions(+), 33 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00175_if_num_arrays.reference create mode 100644 dbms/tests/queries/0_stateless/00175_if_num_arrays.sql diff --git a/dbms/include/DB/Functions/FunctionsConditional.h b/dbms/include/DB/Functions/FunctionsConditional.h index c2f44b47ff3..5a46247a7fb 100644 --- a/dbms/include/DB/Functions/FunctionsConditional.h +++ b/dbms/include/DB/Functions/FunctionsConditional.h @@ -295,10 +295,9 @@ struct NumArrayIfImpl to_offsets[i] = to_prev_offset; } - template static ALWAYS_INLINE void copy_from_constant( size_t i, - const PODArray & from_data, + const PODArray & from_data, PODArray & to_data, ColumnArray::Offsets_t & to_offsets, ColumnArray::Offset_t & to_prev_offset) { size_t size_to_write = from_data.size(); @@ -361,7 +360,7 @@ struct NumArrayIfImpl ColumnArray::Offsets_t * c_offsets = nullptr; create_result_column(block, result, &c_data, &c_offsets); - PODArray b_converted(b.size()); + PODArray b_converted(b.size()); for (size_t i = 0, size = b.size(); i < size; ++i) b_converted[i] = b[i].get::Type>(); @@ -393,7 +392,7 @@ struct NumArrayIfImpl ColumnArray::Offsets_t * c_offsets = nullptr; create_result_column(block, result, &c_data, &c_offsets); - PODArray a_converted(a.size()); + PODArray a_converted(a.size()); for (size_t i = 0, size = a.size(); i < size; ++i) a_converted[i] = a[i].get::Type>(); @@ -424,11 +423,11 @@ struct NumArrayIfImpl ColumnArray::Offsets_t * c_offsets = nullptr; create_result_column(block, result, &c_data, &c_offsets); - PODArray a_converted(a.size()); + PODArray a_converted(a.size()); for (size_t i = 0, size = a.size(); i < size; ++i) a_converted[i] = a[i].get::Type>(); - PODArray b_converted(b.size()); + PODArray b_converted(b.size()); for (size_t i = 0, size = b.size(); i < size; ++i) b_converted[i] = b[i].get::Type>(); @@ -627,15 +626,11 @@ private: if (col_right_array) { - std::cerr << "col_right_array\n"; - const ColumnVector * col_right_vec = typeid_cast *>(&col_right_array->getData()); if (!col_right_vec) return false; - std::cerr << "!\n"; - NumArrayIfImpl::vector_vector( cond_col->getData(), col_left->getData(), col_left_array->getOffsets(), @@ -644,7 +639,9 @@ private: } else { - std::cerr << "col_right_const_array\n"; + if (!typeid_cast::Type *>( + typeid_cast(*col_right_const_array->getDataType()).getNestedType().get())) + return false; NumArrayIfImpl::vector_constant( cond_col->getData(), @@ -676,15 +673,11 @@ private: if (col_right_array) { - std::cerr << "col_right_array\n"; - const ColumnVector * col_right_vec = typeid_cast *>(&col_right_array->getData()); if (!col_right_vec) return false; - std::cerr << "!\n"; - NumArrayIfImpl::constant_vector( cond_col->getData(), col_left_const_array->getData(), @@ -693,7 +686,9 @@ private: } else { - std::cerr << "col_right_const_array\n"; + if (!typeid_cast::Type *>( + typeid_cast(*col_right_const_array->getDataType()).getNestedType().get())) + return false; NumArrayIfImpl::constant_constant( cond_col->getData(), @@ -713,6 +708,7 @@ private: const ColumnVector * col_left = nullptr; const ColumnConst * col_const_left = nullptr; const ColumnArray * col_arr_left = nullptr; + const ColumnVector * col_arr_left_elems = nullptr; const ColumnConstArray * col_const_arr_left = nullptr; col_left = typeid_cast *>(col_left_untyped); @@ -724,7 +720,7 @@ private: col_arr_left = typeid_cast(col_left_untyped); if (col_arr_left) - col_left = typeid_cast *>(&col_arr_left->getData()); + col_arr_left_elems = typeid_cast *>(&col_arr_left->getData()); else col_const_arr_left = typeid_cast(col_left_untyped); } @@ -766,30 +762,28 @@ private: + " of third argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); } - else if (col_arr_left && col_left) + else if (col_arr_left && col_arr_left_elems) { - std::cerr << "col_arr_left\n"; - - if ( executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_left)) + if ( executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_arr_left_elems) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_arr_left_elems) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_arr_left_elems) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_arr_left_elems) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_arr_left_elems) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_arr_left_elems) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_arr_left_elems) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_arr_left_elems) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_arr_left_elems) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, col_arr_left_elems)) return true; else throw Exception("Illegal column " + block.getByPosition(arguments[2]).column->getName() + " of third argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); } - else if (col_const_arr_left) + else if (col_const_arr_left + && typeid_cast::Type *>( + typeid_cast(*col_const_arr_left->getDataType()).getNestedType().get())) { - std::cerr << "col_const_arr_left\n"; - if ( executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) diff --git a/dbms/tests/queries/0_stateless/00175_if_num_arrays.reference b/dbms/tests/queries/0_stateless/00175_if_num_arrays.reference new file mode 100644 index 00000000000..3defad5015a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00175_if_num_arrays.reference @@ -0,0 +1,288 @@ +res +Array(UInt8) +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +res +Array(UInt8) +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +res +Array(UInt8) +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +res +Array(UInt8) +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +[3,4,5] +[1,2] +res +Array(Int64) +[] +[1,2] +[] +[1,2] +[] +[1,2] +[] +[1,2] +[] +[1,2] +res +Array(UInt64) +[] +[1,2] +[0,1] +[1,2] +[0,1,2,3] +[1,2] +[0,1,2,3,4,5] +[1,2] +[0,1,2,3,4,5,6,7] +[1,2] +res +Array(UInt64) +[0,1,2,3,4,5,6,7,8,9] +[0] +[0,1,2,3,4,5,6,7] +[0,1,2] +[0,1,2,3,4,5] +[0,1,2,3,4] +[0,1,2,3] +[0,1,2,3,4,5,6] +[0,1] +[0,1,2,3,4,5,6,7,8] +res +Array(Int32) +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +res +Array(Int16) +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +res +Array(Int32) +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +res +Array(Int64) +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +res +Array(Int32) +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +res +Array(Int16) +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +res +Array(Int32) +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +res +Array(Int64) +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +res +Array(Int32) +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +res +Array(Int16) +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +res +Array(Int32) +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +res +Array(Int64) +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +res +Array(Int32) +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +[300,-500000,500] +[256,257] +res +Array(Int16) +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +[3,4,-5] +[1,2] +res +Array(Int32) +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +[3,4,-5] +[256] +res +Array(Int64) +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +[-1] +[4294967295] +res +Array(Float64) +[] +[1.1,2] +[] +[1.1,2] +[] +[1.1,2] +[] +[1.1,2] +[] +[1.1,2] diff --git a/dbms/tests/queries/0_stateless/00175_if_num_arrays.sql b/dbms/tests/queries/0_stateless/00175_if_num_arrays.sql new file mode 100644 index 00000000000..11cae872ca3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00175_if_num_arrays.sql @@ -0,0 +1,30 @@ +SELECT number % 2 ? [1, 2] : [3, 4, 5] AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? materialize([1, 2]) : [3, 4, 5] AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? [1, 2] : materialize([3, 4, 5]) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? materialize([1, 2]) : materialize([3, 4, 5]) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; + +SELECT number % 2 ? [1, 2] : emptyArrayInt64() AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? [1, 2] : range(number) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? range(number) : range(toUInt64(10 - number)) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; + +SELECT number % 2 ? [256, 257] : [300, -500000, 500] AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? [1, 2] : [3, 4, -5] AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? [256] : [3, 4, -5] AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? [0xFFFFFFFF] : [-1] AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; + +SELECT number % 2 ? materialize([256, 257]) : [300, -500000, 500] AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? materialize([1, 2]) : [3, 4, -5] AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? materialize([256]) : [3, 4, -5] AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? materialize([0xFFFFFFFF]) : [-1] AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; + +SELECT number % 2 ? [256, 257] : materialize([300, -500000, 500]) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? [1, 2] : materialize([3, 4, -5]) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? [256] : materialize([3, 4, -5]) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? [0xFFFFFFFF] : materialize([-1]) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; + +SELECT number % 2 ? materialize([256, 257]) : materialize([300, -500000, 500]) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? materialize([1, 2]) : materialize([3, 4, -5]) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? materialize([256]) : materialize([3, 4, -5]) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; +SELECT number % 2 ? materialize([0xFFFFFFFF]) : materialize([-1]) AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; + +SELECT number % 2 ? [1.1, 2] : emptyArrayInt32() AS res FROM system.numbers LIMIT 10 FORMAT TabSeparatedWithNamesAndTypes; From b6ccb2f6f56e9827effdb9ef355d3909f6443bd5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 00:10:19 +0300 Subject: [PATCH 10/67] dbms: added support for arguments of type Array(String) for function if [#16700]. --- .../DB/Functions/FunctionsConditional.h | 328 ++++++++++++++++-- 1 file changed, 293 insertions(+), 35 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsConditional.h b/dbms/include/DB/Functions/FunctionsConditional.h index 5a46247a7fb..c585e7cceb0 100644 --- a/dbms/include/DB/Functions/FunctionsConditional.h +++ b/dbms/include/DB/Functions/FunctionsConditional.h @@ -493,6 +493,208 @@ public: }; +/** Реализация для массивов строк. + * NOTE: Код слишком сложный, потому что он работает в внутренностями массивов строк. + */ +struct StringArrayIfImpl +{ + static ALWAYS_INLINE void copy_from_vector( + size_t i, + const ColumnString::Chars_t & from_data, + const ColumnString::Offsets_t & from_string_offsets, + const ColumnArray::Offsets_t & from_array_offsets, + const ColumnArray::Offset_t & from_array_prev_offset, + const ColumnString::Offset_t & from_string_prev_offset, + ColumnString::Chars_t & to_data, + ColumnString::Offsets_t & to_string_offsets, + ColumnArray::Offsets_t & to_array_offsets, + ColumnArray::Offset_t & to_array_prev_offset, + ColumnString::Offset_t & to_string_prev_offset) + { + size_t array_size = from_array_offsets[i] - from_array_prev_offset; + + size_t bytes_to_copy = 0; + size_t from_string_prev_offset_local = from_string_prev_offset; + for (size_t j = 0; j < array_size; ++j) + { + size_t string_size = from_string_offsets[from_array_prev_offset + j] - from_string_prev_offset_local; + + to_string_prev_offset += string_size; + to_string_offsets.push_back(to_string_prev_offset); + + from_string_prev_offset_local += string_size; + bytes_to_copy += string_size; + } + + size_t to_data_old_size = to_data.size(); + to_data.resize(to_data_old_size + bytes_to_copy); + memcpy(&to_data[to_data_old_size], &from_data[from_string_prev_offset], bytes_to_copy); + + to_array_prev_offset += array_size; + to_array_offsets[i] = to_array_prev_offset; + } + + static ALWAYS_INLINE void copy_from_constant( + size_t i, + const Array & from_data, + ColumnString::Chars_t & to_data, + ColumnString::Offsets_t & to_string_offsets, + ColumnArray::Offsets_t & to_array_offsets, + ColumnArray::Offset_t & to_array_prev_offset, + ColumnString::Offset_t & to_string_prev_offset) + { + size_t array_size = from_data.size(); + + for (size_t j = 0; j < array_size; ++j) + { + const String & str = from_data[j].get(); + size_t string_size = str.size() + 1; /// Включая 0 на конце. + + to_data.resize(to_string_prev_offset + string_size); + memcpy(&to_data[to_string_prev_offset], str.data(), string_size); + + to_string_prev_offset += string_size; + to_string_offsets.push_back(to_string_prev_offset); + } + + to_array_prev_offset += array_size; + to_array_offsets[i] = to_array_prev_offset; + } + + + static void vector_vector( + const PODArray & cond, + const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_string_offsets, const ColumnArray::Offsets_t & a_array_offsets, + const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_string_offsets, const ColumnArray::Offsets_t & b_array_offsets, + ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_string_offsets, ColumnArray::Offsets_t & c_array_offsets) + { + size_t size = cond.size(); + c_array_offsets.resize(size); + c_string_offsets.reserve(std::max(a_string_offsets.size(), b_string_offsets.size())); + c_data.reserve(std::max(a_data.size(), b_data.size())); + + ColumnArray::Offset_t a_array_prev_offset = 0; + ColumnArray::Offset_t b_array_prev_offset = 0; + ColumnArray::Offset_t c_array_prev_offset = 0; + + ColumnString::Offset_t a_string_prev_offset = 0; + ColumnString::Offset_t b_string_prev_offset = 0; + ColumnString::Offset_t c_string_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (cond[i]) + copy_from_vector(i, + a_data, a_string_offsets, a_array_offsets, a_array_prev_offset, a_string_prev_offset, + c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset); + else + copy_from_vector(i, + b_data, b_string_offsets, b_array_offsets, b_array_prev_offset, b_string_prev_offset, + c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset); + + a_array_prev_offset = a_array_offsets[i]; + b_array_prev_offset = b_array_offsets[i]; + + if (a_array_prev_offset) + a_string_prev_offset = a_string_offsets[a_array_prev_offset - 1]; + + if (b_array_prev_offset) + b_string_prev_offset = b_string_offsets[b_array_prev_offset - 1]; + } + } + + template + static void vector_constant_impl( + const PODArray & cond, + const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_string_offsets, const ColumnArray::Offsets_t & a_array_offsets, + const Array & b, + ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_string_offsets, ColumnArray::Offsets_t & c_array_offsets) + { + size_t size = cond.size(); + c_array_offsets.resize(size); + c_string_offsets.reserve(a_string_offsets.size()); + c_data.reserve(a_data.size()); + + ColumnArray::Offset_t a_array_prev_offset = 0; + ColumnArray::Offset_t c_array_prev_offset = 0; + + ColumnString::Offset_t a_string_prev_offset = 0; + ColumnString::Offset_t c_string_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (reverse != cond[i]) + copy_from_vector(i, + a_data, a_string_offsets, a_array_offsets, a_array_prev_offset, a_string_prev_offset, + c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset); + else + copy_from_constant(i, + b, + c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset); + + a_array_prev_offset = a_array_offsets[i]; + + if (a_array_prev_offset) + a_string_prev_offset = a_string_offsets[a_array_prev_offset - 1]; + } + } + + static void vector_constant( + const PODArray & cond, + const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_string_offsets, const ColumnArray::Offsets_t & a_array_offsets, + const Array & b, + ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_string_offsets, ColumnArray::Offsets_t & c_array_offsets) + { + vector_constant_impl(cond, a_data, a_string_offsets, a_array_offsets, b, c_data, c_string_offsets, c_array_offsets); + } + + static void constant_vector( + const PODArray & cond, + const Array & a, + const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_string_offsets, const ColumnArray::Offsets_t & b_array_offsets, + ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_string_offsets, ColumnArray::Offsets_t & c_array_offsets) + { + vector_constant_impl(cond, b_data, b_string_offsets, b_array_offsets, a, c_data, c_string_offsets, c_array_offsets); + } + + static void constant_constant( + const PODArray & cond, + const Array & a, + const Array & b, + ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_string_offsets, ColumnArray::Offsets_t & c_array_offsets) + { + size_t size = cond.size(); + c_array_offsets.resize(size); + c_string_offsets.reserve(std::max(a.size(), b.size()) * size); + + size_t sum_size_a = 0; + for (const auto & s : a) + sum_size_a += s.get().size() + 1; + + size_t sum_size_b = 0; + for (const auto & s : b) + sum_size_b += s.get().size() + 1; + + c_data.reserve(std::max(sum_size_a, sum_size_b) * size); + + ColumnArray::Offset_t c_array_prev_offset = 0; + ColumnString::Offset_t c_string_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (cond[i]) + copy_from_constant(i, + a, + c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset); + else + copy_from_constant(i, + b, + c_data, c_string_offsets, c_array_offsets, c_array_prev_offset, c_string_prev_offset); + } + } +}; + + template struct DataTypeFromFieldTypeOrError { @@ -806,45 +1008,101 @@ private: bool executeString(const ColumnVector * cond_col, Block & block, const ColumnNumbers & arguments, size_t result) { - const ColumnString * col_then = typeid_cast(&*block.getByPosition(arguments[1]).column); - const ColumnString * col_else = typeid_cast(&*block.getByPosition(arguments[2]).column); - const ColumnConstString * col_then_const = typeid_cast(&*block.getByPosition(arguments[1]).column); - const ColumnConstString * col_else_const = typeid_cast(&*block.getByPosition(arguments[2]).column); + const IColumn * col_then_untyped = block.getByPosition(arguments[1]).column.get(); + const IColumn * col_else_untyped = block.getByPosition(arguments[2]).column.get(); - ColumnString * col_res = new ColumnString; - block.getByPosition(result).column = col_res; + const ColumnString * col_then = typeid_cast(col_then_untyped); + const ColumnString * col_else = typeid_cast(col_else_untyped); + const ColumnConstString * col_then_const = typeid_cast(col_then_untyped); + const ColumnConstString * col_else_const = typeid_cast(col_else_untyped); - ColumnString::Chars_t & res_vec = col_res->getChars(); - ColumnString::Offsets_t & res_offsets = col_res->getOffsets(); + if ((col_then || col_then_const) && (col_else || col_else_const)) + { + ColumnString * col_res = new ColumnString; + block.getByPosition(result).column = col_res; - if (col_then && col_else) - StringIfImpl::vector_vector( - cond_col->getData(), - col_then->getChars(), col_then->getOffsets(), - col_else->getChars(), col_else->getOffsets(), - res_vec, res_offsets); - else if (col_then && col_else_const) - StringIfImpl::vector_constant( - cond_col->getData(), - col_then->getChars(), col_then->getOffsets(), - col_else_const->getData(), - res_vec, res_offsets); - else if (col_then_const && col_else) - StringIfImpl::constant_vector( - cond_col->getData(), - col_then_const->getData(), - col_else->getChars(), col_else->getOffsets(), - res_vec, res_offsets); - else if (col_then_const && col_else_const) - StringIfImpl::constant_constant( - cond_col->getData(), - col_then_const->getData(), - col_else_const->getData(), - res_vec, res_offsets); - else - return false; + ColumnString::Chars_t & res_vec = col_res->getChars(); + ColumnString::Offsets_t & res_offsets = col_res->getOffsets(); - return true; + if (col_then && col_else) + StringIfImpl::vector_vector( + cond_col->getData(), + col_then->getChars(), col_then->getOffsets(), + col_else->getChars(), col_else->getOffsets(), + res_vec, res_offsets); + else if (col_then && col_else_const) + StringIfImpl::vector_constant( + cond_col->getData(), + col_then->getChars(), col_then->getOffsets(), + col_else_const->getData(), + res_vec, res_offsets); + else if (col_then_const && col_else) + StringIfImpl::constant_vector( + cond_col->getData(), + col_then_const->getData(), + col_else->getChars(), col_else->getOffsets(), + res_vec, res_offsets); + else if (col_then_const && col_else_const) + StringIfImpl::constant_constant( + cond_col->getData(), + col_then_const->getData(), + col_else_const->getData(), + res_vec, res_offsets); + else + return false; + + return true; + } + + const ColumnArray * col_arr_then = typeid_cast(col_then_untyped); + const ColumnArray * col_arr_else = typeid_cast(col_else_untyped); + const ColumnConstArray * col_arr_then_const = typeid_cast(col_then_untyped); + const ColumnConstArray * col_arr_else_const = typeid_cast(col_else_untyped); + const ColumnString * col_then_elements = col_arr_then ? typeid_cast(&col_arr_then->getData()) : nullptr; + const ColumnString * col_else_elements = col_arr_else ? typeid_cast(&col_arr_else->getData()) : nullptr; + + if (((col_arr_then && col_then_elements) || col_arr_then_const) + && ((col_arr_else && col_else_elements) || col_arr_else_const)) + { + ColumnString * col_res_elements = new ColumnString; + ColumnArray * col_res = new ColumnArray(col_res_elements); + block.getByPosition(result).column = col_res; + + ColumnString::Chars_t & res_chars = col_res_elements->getChars(); + ColumnString::Offsets_t & res_string_offsets = col_res_elements->getOffsets(); + ColumnArray::Offsets_t & res_array_offsets = col_res->getOffsets(); + + if (col_then_elements && col_else_elements) + StringArrayIfImpl::vector_vector( + cond_col->getData(), + col_then_elements->getChars(), col_then_elements->getOffsets(), col_arr_then->getOffsets(), + col_else_elements->getChars(), col_else_elements->getOffsets(), col_arr_else->getOffsets(), + res_chars, res_string_offsets, res_array_offsets); + else if (col_then_elements && col_arr_else_const) + StringArrayIfImpl::vector_constant( + cond_col->getData(), + col_then_elements->getChars(), col_then_elements->getOffsets(), col_arr_then->getOffsets(), + col_arr_else_const->getData(), + res_chars, res_string_offsets, res_array_offsets); + else if (col_arr_then_const && col_else_elements) + StringArrayIfImpl::constant_vector( + cond_col->getData(), + col_arr_then_const->getData(), + col_else_elements->getChars(), col_else_elements->getOffsets(), col_arr_else->getOffsets(), + res_chars, res_string_offsets, res_array_offsets); + else if (col_arr_then_const && col_arr_else_const) + StringArrayIfImpl::constant_constant( + cond_col->getData(), + col_arr_then_const->getData(), + col_arr_else_const->getData(), + res_chars, res_string_offsets, res_array_offsets); + else + return false; + + return true; + } + + return false; } public: From fc9bbaa2c11afb9a1bba4e9a53f5d579e68d332a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 00:11:03 +0300 Subject: [PATCH 11/67] dbms: added test [#METR-16700]. --- .../00176_if_string_arrays.reference | 100 ++++++++++++++++++ .../0_stateless/00176_if_string_arrays.sql | 12 +++ 2 files changed, 112 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00176_if_string_arrays.reference create mode 100644 dbms/tests/queries/0_stateless/00176_if_string_arrays.sql diff --git a/dbms/tests/queries/0_stateless/00176_if_string_arrays.reference b/dbms/tests/queries/0_stateless/00176_if_string_arrays.reference new file mode 100644 index 00000000000..cccc0352df9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00176_if_string_arrays.reference @@ -0,0 +1,100 @@ +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +['abc'] +['Hello','World'] +[] +['Hello','','World!'] +[] +['Hello','','World!'] +[] +['Hello','','World!'] +[] +['Hello','','World!'] +[] +['Hello','','World!'] +[] +['Hello','','World!'] +[] +['Hello','','World!'] +[] +['Hello','','World!'] +[] +['Hello','','World!'] +[] +['Hello','','World!'] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] +['',''] +[''] diff --git a/dbms/tests/queries/0_stateless/00176_if_string_arrays.sql b/dbms/tests/queries/0_stateless/00176_if_string_arrays.sql new file mode 100644 index 00000000000..4f752b47cad --- /dev/null +++ b/dbms/tests/queries/0_stateless/00176_if_string_arrays.sql @@ -0,0 +1,12 @@ +SELECT number % 2 ? ['Hello', 'World'] : ['abc'] FROM system.numbers LIMIT 10; +SELECT number % 2 ? materialize(['Hello', 'World']) : ['abc'] FROM system.numbers LIMIT 10; +SELECT number % 2 ? ['Hello', 'World'] : materialize(['abc']) FROM system.numbers LIMIT 10; +SELECT number % 2 ? materialize(['Hello', 'World']) : materialize(['abc']) FROM system.numbers LIMIT 10; + +SELECT number % 2 ? ['Hello', '', 'World!'] : emptyArrayString() FROM system.numbers LIMIT 10; +SELECT number % 2 ? materialize(['Hello', '', 'World!']) : emptyArrayString() FROM system.numbers LIMIT 10; + +SELECT number % 2 ? [''] : ['', ''] FROM system.numbers LIMIT 10; +SELECT number % 2 ? materialize(['']) : ['', ''] FROM system.numbers LIMIT 10; +SELECT number % 2 ? [''] : materialize(['', '']) FROM system.numbers LIMIT 10; +SELECT number % 2 ? materialize(['']) : materialize(['', '']) FROM system.numbers LIMIT 10; From 1b61065f50aae883d32d3f312b7a59a665973c8d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 00:34:15 +0300 Subject: [PATCH 12/67] dbms: more uniform logging of query, logging client IP address [#METR-16277]. --- dbms/src/Interpreters/executeQuery.cpp | 14 +++++++++++--- dbms/src/Server/TCPHandler.cpp | 4 ---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index bd9636fce01..7c6b84d706d 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -24,6 +24,14 @@ static void checkLimits(const IAST & ast, const Limits & limits) } +static void logQuery(const String & query, const Context & context) +{ + String logged_query = query; + std::replace(logged_query.begin(), logged_query.end(), '\n', ' '); + LOG_DEBUG(&Logger::get("executeQuery"), "(from " << context.getIPAddress().toString() << ") " << logged_query); +} + + void executeQuery( ReadBuffer & istr, WriteBuffer & ostr, @@ -73,9 +81,7 @@ void executeQuery( String query(begin, query_size); - String logged_query = query; - std::replace(logged_query.begin(), logged_query.end(), '\n', ' '); - LOG_DEBUG(&Logger::get("executeQuery"), logged_query); + logQuery(query, context); /// Положим запрос в список процессов. Но запрос SHOW PROCESSLIST класть не будем. ProcessList::EntryPtr process_list_entry; @@ -124,6 +130,8 @@ BlockIO executeQuery( ParserQuery parser; ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), ""); + logQuery(query, context); + /// Проверка ограничений. checkLimits(*ast, context.getSettingsRef().limits); diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp index 233918f14a5..395fb769baa 100644 --- a/dbms/src/Server/TCPHandler.cpp +++ b/dbms/src/Server/TCPHandler.cpp @@ -568,10 +568,6 @@ void TCPHandler::receiveQuery() state.compression = Protocol::Compression::Enum(compression); readStringBinary(state.query, *in); - - LOG_DEBUG(log, "Query ID: " << state.query_id); - LOG_DEBUG(log, "Query: " << state.query); - LOG_DEBUG(log, "Requested stage: " << QueryProcessingStage::toString(stage)); } From 9245f053ca93efcfa32576d591a70b6b849e6f19 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 03:27:25 +0300 Subject: [PATCH 13/67] dbms: log query and IP address in case of syntax error [#METR-16277]. --- dbms/src/Interpreters/executeQuery.cpp | 114 ++++++++++++++----------- 1 file changed, 62 insertions(+), 52 deletions(-) diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 7c6b84d706d..94907ee4d42 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -32,6 +32,62 @@ static void logQuery(const String & query, const Context & context) } +/** Распарсить запрос. Записать его в лог вместе с IP адресом клиента. + * Проверить ограничения. Записать запрос в ProcessList. + */ +static std::tuple prepareQuery( + IParser::Pos begin, IParser::Pos end, Context & context, bool internal) +{ + ProfileEvents::increment(ProfileEvents::Query); + + ParserQuery parser; + ASTPtr ast; + size_t query_size; + size_t max_query_size = context.getSettingsRef().max_query_size; + + try + { + ast = parseQuery(parser, begin, end, ""); + + /// Засунем запрос в строку. Она выводится в лог и в processlist. Если запрос INSERT, то не будем включать данные для вставки. + query_size = ast->range.second - ast->range.first; + + if (max_query_size && query_size > max_query_size) + throw Exception("Query is too large (" + toString(query_size) + ")." + " max_query_size = " + toString(max_query_size), ErrorCodes::QUERY_IS_TOO_LARGE); + } + catch (...) + { + /// Всё равно логгируем запрос. + logQuery(String(begin, begin + std::min(end - begin, static_cast(max_query_size))), context); + + throw; + } + + String query(begin, query_size); + + logQuery(query, context); + + /// Проверка ограничений. + checkLimits(*ast, context.getSettingsRef().limits); + + /// Положим запрос в список процессов. Но запрос SHOW PROCESSLIST класть не будем. + ProcessList::EntryPtr process_list_entry; + if (!internal && nullptr == typeid_cast(&*ast)) + { + process_list_entry = context.getProcessList().insert( + query, context.getUser(), context.getCurrentQueryId(), context.getIPAddress(), + context.getSettingsRef().limits.max_memory_usage, + context.getSettingsRef().queue_max_wait_ms.totalMilliseconds(), + context.getSettingsRef().replace_running_query); + + context.setProcessListElement(&process_list_entry->get()); + } + + return std::make_tuple(ast, process_list_entry); +} + + void executeQuery( ReadBuffer & istr, WriteBuffer & ostr, @@ -40,10 +96,6 @@ void executeQuery( bool internal, QueryProcessingStage::Enum stage) { - ProfileEvents::increment(ProfileEvents::Query); - - ParserQuery parser; - PODArray parse_buf; const char * begin; const char * end; @@ -52,7 +104,7 @@ void executeQuery( if (istr.buffer().size() == 0) istr.next(); - size_t max_query_size = context.getSettings().max_query_size; + size_t max_query_size = context.getSettingsRef().max_query_size; if (istr.buffer().end() - istr.position() >= static_cast(max_query_size)) { @@ -70,34 +122,10 @@ void executeQuery( end = begin + parse_buf.size(); } - ASTPtr ast = parseQuery(parser, begin, end, ""); - - /// Засунем запрос в строку. Она выводится в лог и в processlist. Если запрос INSERT, то не будем включать данные для вставки. - size_t query_size = ast->range.second - ast->range.first; - - if (query_size > max_query_size) - throw Exception("Query is too large (" + toString(query_size) + ")." - " max_query_size = " + toString(max_query_size), ErrorCodes::QUERY_IS_TOO_LARGE); - - String query(begin, query_size); - - logQuery(query, context); - - /// Положим запрос в список процессов. Но запрос SHOW PROCESSLIST класть не будем. + ASTPtr ast; ProcessList::EntryPtr process_list_entry; - if (!internal && nullptr == typeid_cast(&*ast)) - { - process_list_entry = context.getProcessList().insert( - query, context.getUser(), context.getCurrentQueryId(), context.getIPAddress(), - context.getSettingsRef().limits.max_memory_usage, - context.getSettingsRef().queue_max_wait_ms.totalMilliseconds(), - context.getSettingsRef().replace_running_query); - context.setProcessListElement(&process_list_entry->get()); - } - - /// Проверка ограничений. - checkLimits(*ast, context.getSettingsRef().limits); + std::tie(ast, process_list_entry) = prepareQuery(begin, end, context, internal); QuotaForIntervals & quota = context.getQuota(); time_t current_time = time(0); @@ -125,15 +153,10 @@ BlockIO executeQuery( bool internal, QueryProcessingStage::Enum stage) { - ProfileEvents::increment(ProfileEvents::Query); + ASTPtr ast; + ProcessList::EntryPtr process_list_entry; - ParserQuery parser; - ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), ""); - - logQuery(query, context); - - /// Проверка ограничений. - checkLimits(*ast, context.getSettingsRef().limits); + std::tie(ast, process_list_entry) = prepareQuery(query.data(), query.data() + query.size(), context, internal); QuotaForIntervals & quota = context.getQuota(); time_t current_time = time(0); @@ -142,19 +165,6 @@ BlockIO executeQuery( BlockIO res; - /// Положим запрос в список процессов. Но запрос SHOW PROCESSLIST класть не будем. - ProcessList::EntryPtr process_list_entry; - if (!internal && nullptr == typeid_cast(&*ast)) - { - process_list_entry = context.getProcessList().insert( - query, context.getUser(), context.getCurrentQueryId(), context.getIPAddress(), - context.getSettingsRef().limits.max_memory_usage, - context.getSettingsRef().queue_max_wait_ms.totalMilliseconds(), - context.getSettingsRef().replace_running_query); - - context.setProcessListElement(&process_list_entry->get()); - } - try { InterpreterQuery interpreter(ast, context, stage); From e27323dd2eaf4e0738421dee77860b624e80256e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 03:34:39 +0300 Subject: [PATCH 14/67] dbms: addition to prev. revision [#METR-16277]. --- dbms/src/Interpreters/executeQuery.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 94907ee4d42..2d3809e5edd 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -66,7 +66,8 @@ static std::tuple prepareQuery( String query(begin, query_size); - logQuery(query, context); + if (!internal) + logQuery(query, context); /// Проверка ограничений. checkLimits(*ast, context.getSettingsRef().limits); From 071932075869eace433cd69281111d2ebed5742b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 05:11:05 +0300 Subject: [PATCH 15/67] dbms: improvement [#METR-2944]. --- .../PushingToViewsBlockOutputStream.h | 2 +- dbms/include/DB/Interpreters/IInterpreter.h | 23 ++ .../DB/Interpreters/InterpreterAlterQuery.h | 5 +- .../DB/Interpreters/InterpreterCheckQuery.h | 8 +- .../DB/Interpreters/InterpreterCreateQuery.h | 19 +- .../Interpreters/InterpreterDescribeQuery.h | 19 +- .../DB/Interpreters/InterpreterDropQuery.h | 7 +- .../DB/Interpreters/InterpreterExistsQuery.h | 19 +- .../DB/Interpreters/InterpreterFactory.h | 19 ++ .../DB/Interpreters/InterpreterInsertQuery.h | 11 +- .../Interpreters/InterpreterOptimizeQuery.h | 6 +- .../DB/Interpreters/InterpreterQuery.h | 50 ---- .../DB/Interpreters/InterpreterRenameQuery.h | 5 +- .../DB/Interpreters/InterpreterSelectQuery.h | 10 +- .../DB/Interpreters/InterpreterSetQuery.h | 7 +- .../Interpreters/InterpreterShowCreateQuery.h | 19 +- .../InterpreterShowProcesslistQuery.h | 14 +- .../Interpreters/InterpreterShowTablesQuery.h | 8 +- .../DB/Interpreters/InterpreterUseQuery.h | 6 +- dbms/include/DB/Interpreters/executeQuery.h | 2 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 6 +- .../Interpreters/InterpreterAlterQuery.cpp | 6 +- .../Interpreters/InterpreterCheckQuery.cpp | 19 +- .../Interpreters/InterpreterCreateQuery.cpp | 10 +- .../src/Interpreters/InterpreterDropQuery.cpp | 8 +- dbms/src/Interpreters/InterpreterFactory.cpp | 117 +++++++++ .../Interpreters/InterpreterInsertQuery.cpp | 65 +---- dbms/src/Interpreters/InterpreterQuery.cpp | 227 ------------------ .../Interpreters/InterpreterRenameQuery.cpp | 4 +- .../Interpreters/InterpreterSelectQuery.cpp | 29 +-- .../InterpreterShowTablesQuery.cpp | 10 - dbms/src/Interpreters/executeQuery.cpp | 154 +++++++----- dbms/src/Interpreters/loadMetadata.cpp | 3 +- dbms/src/Storages/StorageBuffer.cpp | 2 +- dbms/src/Storages/StorageDistributed.cpp | 2 +- dbms/src/Storages/StorageMaterializedView.cpp | 23 +- 36 files changed, 376 insertions(+), 568 deletions(-) create mode 100644 dbms/include/DB/Interpreters/IInterpreter.h create mode 100644 dbms/include/DB/Interpreters/InterpreterFactory.h delete mode 100644 dbms/include/DB/Interpreters/InterpreterQuery.h create mode 100644 dbms/src/Interpreters/InterpreterFactory.cpp delete mode 100644 dbms/src/Interpreters/InterpreterQuery.cpp diff --git a/dbms/include/DB/DataStreams/PushingToViewsBlockOutputStream.h b/dbms/include/DB/DataStreams/PushingToViewsBlockOutputStream.h index 0c1f12bd3b4..ee98d1fa853 100644 --- a/dbms/include/DB/DataStreams/PushingToViewsBlockOutputStream.h +++ b/dbms/include/DB/DataStreams/PushingToViewsBlockOutputStream.h @@ -41,7 +41,7 @@ public: { BlockInputStreamPtr from = new OneBlockInputStream(block); InterpreterSelectQuery select(queries[i], context, QueryProcessingStage::Complete, 0, from); - BlockInputStreamPtr data = new MaterializingBlockInputStream(select.execute()); + BlockInputStreamPtr data = new MaterializingBlockInputStream(select.execute().in); copyData(*data, *children[i]); } diff --git a/dbms/include/DB/Interpreters/IInterpreter.h b/dbms/include/DB/Interpreters/IInterpreter.h new file mode 100644 index 00000000000..a2f2bc92081 --- /dev/null +++ b/dbms/include/DB/Interpreters/IInterpreter.h @@ -0,0 +1,23 @@ +#pragma once + +#include + + +namespace DB +{ + +/** Интерфейс интерпретаторов разных запросов. + */ +class IInterpreter +{ +public: + /** Для запросов, возвращающих результат (SELECT и похожие), устанавливает в BlockIO поток, из которого можно будет читать этот результат. + * Для запросов, принимающих данные (INSERT), устанавливает в BlockIO поток, куда можно писать данные. + * Для запросов, которые не требуют данные и ничего не возвращают, BlockIO будет пустым. + */ + virtual BlockIO execute() = 0; + + virtual ~IInterpreter() {} +}; + +} diff --git a/dbms/include/DB/Interpreters/InterpreterAlterQuery.h b/dbms/include/DB/Interpreters/InterpreterAlterQuery.h index 4ead0b62955..0b22933e7d0 100644 --- a/dbms/include/DB/Interpreters/InterpreterAlterQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterAlterQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace DB @@ -13,12 +14,12 @@ namespace DB /** Позволяет добавить или удалить столбец в таблице. * Также позволяет осуществить манипуляции с партициями таблиц семейства MergeTree. */ -class InterpreterAlterQuery +class InterpreterAlterQuery : public IInterpreter { public: InterpreterAlterQuery(ASTPtr query_ptr_, Context & context_); - void execute(); + BlockIO execute() override; /** Изменяет список столбцов в метаданных таблицы на диске. Нужно вызывать под TableStructureLock соответствующей таблицы. */ diff --git a/dbms/include/DB/Interpreters/InterpreterCheckQuery.h b/dbms/include/DB/Interpreters/InterpreterCheckQuery.h index 48c6092352e..921cef24f81 100644 --- a/dbms/include/DB/Interpreters/InterpreterCheckQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterCheckQuery.h @@ -1,22 +1,22 @@ #pragma once #include +#include #include namespace DB { -class InterpreterCheckQuery +class InterpreterCheckQuery : public IInterpreter { public: InterpreterCheckQuery(ASTPtr query_ptr_, Context & context_); - BlockInputStreamPtr execute(); - DB::Block getSampleBlock(); + BlockIO execute() override; private: ASTPtr query_ptr; Context context; - DB::Block result; + Block result; }; } diff --git a/dbms/include/DB/Interpreters/InterpreterCreateQuery.h b/dbms/include/DB/Interpreters/InterpreterCreateQuery.h index 49a739eedbc..7382b797147 100644 --- a/dbms/include/DB/Interpreters/InterpreterCreateQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterCreateQuery.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -11,7 +12,7 @@ namespace DB /** Позволяет создать новую таблицу, или создать объект уже существующей таблицы, или создать БД, или создать объект уже существующей БД */ -class InterpreterCreateQuery +class InterpreterCreateQuery : public IInterpreter { public: InterpreterCreateQuery(ASTPtr query_ptr_, Context & context_); @@ -21,7 +22,19 @@ public: * assume_metadata_exists - не проверять наличие файла с метаданными и не создавать его * (для случая выполнения запроса из существующего файла с метаданными). */ - StoragePtr execute(bool assume_metadata_exists = false); + BlockIO execute() override + { + executeImpl(false); + return {}; + } + + /** assume_metadata_exists - не проверять наличие файла с метаданными и не создавать его + * (для случая выполнения запроса из существующего файла с метаданными). + */ + void executeLoadExisting() + { + executeImpl(true); + } /// Список столбцов с типами в AST. static ASTPtr formatColumns(const NamesAndTypesList & columns); @@ -32,6 +45,8 @@ public: const ColumnDefaults & column_defaults); private: + void executeImpl(bool assume_metadata_exists); + /// AST в список столбцов с типами. Столбцы типа Nested развернуты в список настоящих столбцов. using ColumnsAndDefaults = std::pair; ColumnsAndDefaults parseColumns(ASTPtr expression_list); diff --git a/dbms/include/DB/Interpreters/InterpreterDescribeQuery.h b/dbms/include/DB/Interpreters/InterpreterDescribeQuery.h index 4b91eea4cc5..81eb95abb08 100644 --- a/dbms/include/DB/Interpreters/InterpreterDescribeQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterDescribeQuery.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -20,13 +21,13 @@ namespace DB /** Вернуть названия и типы столбцов указанной таблицы. */ -class InterpreterDescribeQuery +class InterpreterDescribeQuery : public IInterpreter { public: InterpreterDescribeQuery(ASTPtr query_ptr_, Context & context_) : query_ptr(query_ptr_), context(context_) {} - BlockIO execute() + BlockIO execute() override { BlockIO res; res.in = executeImpl(); @@ -35,20 +36,6 @@ public: return res; } - BlockInputStreamPtr executeAndFormat(WriteBuffer & buf) - { - Block sample = getSampleBlock(); - ASTPtr format_ast = typeid_cast(*query_ptr).format; - String format_name = format_ast ? typeid_cast(*format_ast).name : context.getDefaultFormat(); - - BlockInputStreamPtr in = executeImpl(); - BlockOutputStreamPtr out = context.getFormatFactory().getOutput(format_name, buf, sample); - - copyData(*in, *out); - - return in; - } - private: ASTPtr query_ptr; Context context; diff --git a/dbms/include/DB/Interpreters/InterpreterDropQuery.h b/dbms/include/DB/Interpreters/InterpreterDropQuery.h index 2593fa6a02b..d2edf1b35f1 100644 --- a/dbms/include/DB/Interpreters/InterpreterDropQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterDropQuery.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -10,13 +11,13 @@ namespace DB /** Позволяет удалить таблицу вместе со всеми данными (DROP), или удалить информацию о таблице из сервера (DETACH). */ -class InterpreterDropQuery +class InterpreterDropQuery : public IInterpreter { public: InterpreterDropQuery(ASTPtr query_ptr_, Context & context_); - + /// Удаляет таблицу. - void execute(); + BlockIO execute() override; /// Удаляет таблицу, уже отцепленную от контекста (Context::detach). static void dropDetachedTable(String database_name, StoragePtr table, Context & context); diff --git a/dbms/include/DB/Interpreters/InterpreterExistsQuery.h b/dbms/include/DB/Interpreters/InterpreterExistsQuery.h index 348afc595af..4b7d8247529 100644 --- a/dbms/include/DB/Interpreters/InterpreterExistsQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterExistsQuery.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -17,13 +18,13 @@ namespace DB /** Проверить, существует ли таблица. Вернуть одну строку с одним столбцом result типа UInt8 со значением 0 или 1. */ -class InterpreterExistsQuery +class InterpreterExistsQuery : public IInterpreter { public: InterpreterExistsQuery(ASTPtr query_ptr_, Context & context_) : query_ptr(query_ptr_), context(context_) {} - BlockIO execute() + BlockIO execute() override { BlockIO res; res.in = executeImpl(); @@ -32,20 +33,6 @@ public: return res; } - BlockInputStreamPtr executeAndFormat(WriteBuffer & buf) - { - Block sample = getSampleBlock(); - ASTPtr format_ast = typeid_cast(*query_ptr).format; - String format_name = format_ast ? typeid_cast(*format_ast).name : context.getDefaultFormat(); - - BlockInputStreamPtr in = executeImpl(); - BlockOutputStreamPtr out = context.getFormatFactory().getOutput(format_name, buf, sample); - - copyData(*in, *out); - - return in; - } - private: ASTPtr query_ptr; Context context; diff --git a/dbms/include/DB/Interpreters/InterpreterFactory.h b/dbms/include/DB/Interpreters/InterpreterFactory.h new file mode 100644 index 00000000000..46268817652 --- /dev/null +++ b/dbms/include/DB/Interpreters/InterpreterFactory.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class InterpreterFactory +{ +public: + static SharedPtr get( + ASTPtr & query, + Context & context, + QueryProcessingStage::Enum stage = QueryProcessingStage::Complete); +}; + +} diff --git a/dbms/include/DB/Interpreters/InterpreterInsertQuery.h b/dbms/include/DB/Interpreters/InterpreterInsertQuery.h index daa0d7ad87d..dd90e1df342 100644 --- a/dbms/include/DB/Interpreters/InterpreterInsertQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterInsertQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -11,23 +12,17 @@ namespace DB /** Интерпретирует запрос INSERT. */ -class InterpreterInsertQuery +class InterpreterInsertQuery : public IInterpreter { public: InterpreterInsertQuery(ASTPtr query_ptr_, Context & context_); - /** Выполнить запрос. - * remaining_data_istr, если не nullptr, может содержать нераспарсенные данные для вставки. - * (заранее может быть считан в оперативку для парсинга лишь небольшой кусок запроса, который содержит не все данные) - */ - void execute(ReadBuffer * remaining_data_istr); - /** Подготовить запрос к выполнению. Вернуть потоки блоков * - поток, в который можно писать данные для выполнения запроса, если INSERT; * - поток, из которого можно читать результат выполнения запроса, если SELECT и подобные; * Или ничего, если запрос INSERT SELECT (самодостаточный запрос - не принимает входные данные, не отдаёт результат). */ - BlockIO execute(); + BlockIO execute() override; private: StoragePtr getTable(); diff --git a/dbms/include/DB/Interpreters/InterpreterOptimizeQuery.h b/dbms/include/DB/Interpreters/InterpreterOptimizeQuery.h index 03110a3efca..f36ecb1b05e 100644 --- a/dbms/include/DB/Interpreters/InterpreterOptimizeQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterOptimizeQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -11,7 +12,7 @@ namespace DB /** Просто вызвать метод optimize у таблицы. */ -class InterpreterOptimizeQuery +class InterpreterOptimizeQuery : public IInterpreter { public: InterpreterOptimizeQuery(ASTPtr query_ptr_, Context & context_) @@ -19,12 +20,13 @@ public: { } - void execute() + BlockIO execute() override { const ASTOptimizeQuery & ast = typeid_cast(*query_ptr); StoragePtr table = context.getTable(ast.database, ast.table); auto table_lock = table->lockStructure(true); table->optimize(context.getSettings()); + return {}; } private: diff --git a/dbms/include/DB/Interpreters/InterpreterQuery.h b/dbms/include/DB/Interpreters/InterpreterQuery.h deleted file mode 100644 index 0f928bebb73..00000000000 --- a/dbms/include/DB/Interpreters/InterpreterQuery.h +++ /dev/null @@ -1,50 +0,0 @@ -#pragma once - -#include -#include -#include - - -namespace DB -{ - - -/** Интерпретирует произвольный запрос. - */ -class InterpreterQuery -{ -public: - InterpreterQuery(ASTPtr query_ptr_, Context & context_, QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete); - - /** Выполнить запрос. - * - * ostr - куда писать результат выполнения запроса, если он есть. - * - * remaining_data_istr, если не nullptr, может содержать нераспарсенный остаток запроса с данными. - * (заранее может быть считан в оперативку для парсинга лишь небольшой кусок запроса, который содержит не все данные) - * - * В query_plan, - * после выполнения запроса, может быть записан BlockInputStreamPtr, - * использовавшийся при выполнении запроса, - * чтобы можно было получить информацию о том, как выполнялся запрос. - */ - void execute(WriteBuffer & ostr, ReadBuffer * remaining_data_istr, BlockInputStreamPtr & query_plan); - - /** Подготовить запрос к выполнению. Вернуть потоки блоков, используя которые можно выполнить запрос. - */ - BlockIO execute(); - -private: - ASTPtr query_ptr; - Context context; - QueryProcessingStage::Enum stage; - - void throwIfReadOnly() - { - if (context.getSettingsRef().limits.readonly) - throw Exception("Cannot execute query in readonly mode", ErrorCodes::READONLY); - } -}; - - -} diff --git a/dbms/include/DB/Interpreters/InterpreterRenameQuery.h b/dbms/include/DB/Interpreters/InterpreterRenameQuery.h index 7013b0015ea..b82b7eb73f0 100644 --- a/dbms/include/DB/Interpreters/InterpreterRenameQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterRenameQuery.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -10,11 +11,11 @@ namespace DB /** Переименовать одну или несколько таблиц. */ -class InterpreterRenameQuery +class InterpreterRenameQuery : public IInterpreter { public: InterpreterRenameQuery(ASTPtr query_ptr_, Context & context_); - void execute(); + BlockIO execute() override; private: ASTPtr query_ptr; diff --git a/dbms/include/DB/Interpreters/InterpreterSelectQuery.h b/dbms/include/DB/Interpreters/InterpreterSelectQuery.h index 9152ff9eb5e..6efbc6f4ceb 100644 --- a/dbms/include/DB/Interpreters/InterpreterSelectQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterSelectQuery.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -15,7 +16,7 @@ class SubqueryForSet; /** Интерпретирует запрос SELECT. Возвращает поток блоков с результатами выполнения запроса до стадии to_stage. */ -class InterpreterSelectQuery +class InterpreterSelectQuery : public IInterpreter { public: /** to_stage @@ -66,17 +67,12 @@ public: /** Выполнить запрос, возможно являющиийся цепочкой UNION ALL. * Получить поток блоков для чтения */ - BlockInputStreamPtr execute(); + BlockIO execute() override; /** Выполнить запрос без объединения потоков, если это возможно. */ const BlockInputStreams & executeWithoutUnion(); - /** Выполнить запрос, записать результат в нужном формате в buf. - * BlockInputStreamPtr возвращается, чтобы можно было потом получить информацию о плане выполнения запроса. - */ - BlockInputStreamPtr executeAndFormat(WriteBuffer & buf); - DataTypes getReturnTypes(); Block getSampleBlock(); diff --git a/dbms/include/DB/Interpreters/InterpreterSetQuery.h b/dbms/include/DB/Interpreters/InterpreterSetQuery.h index ac8474862a0..0899452863a 100644 --- a/dbms/include/DB/Interpreters/InterpreterSetQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterSetQuery.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -10,20 +11,20 @@ namespace DB /** Установить один или несколько параметров, для сессии или глобально... или для текущего запроса. */ -class InterpreterSetQuery +class InterpreterSetQuery : public IInterpreter { public: InterpreterSetQuery(ASTPtr query_ptr_, Context & context_) : query_ptr(query_ptr_), context(context_) {} - /** Обычный запрос SET. Задать настройку на сессию или глобальную (если указано GLOBAL). */ - void execute() + BlockIO execute() override { ASTSetQuery & ast = typeid_cast(*query_ptr); Context & target = ast.global ? context.getGlobalContext() : context.getSessionContext(); executeImpl(ast, target); + return {}; } /** Задать настроку для текущего контекста (контекста запроса). diff --git a/dbms/include/DB/Interpreters/InterpreterShowCreateQuery.h b/dbms/include/DB/Interpreters/InterpreterShowCreateQuery.h index da1e381e3d1..a2aebb46d52 100644 --- a/dbms/include/DB/Interpreters/InterpreterShowCreateQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterShowCreateQuery.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -18,13 +19,13 @@ namespace DB /** Вернуть одну строку с одним столбцом statement типа String с текстом запроса, создающего указанную таблицу. */ -class InterpreterShowCreateQuery +class InterpreterShowCreateQuery : public IInterpreter { public: InterpreterShowCreateQuery(ASTPtr query_ptr_, Context & context_) : query_ptr(query_ptr_), context(context_) {} - BlockIO execute() + BlockIO execute() override { BlockIO res; res.in = executeImpl(); @@ -33,20 +34,6 @@ public: return res; } - BlockInputStreamPtr executeAndFormat(WriteBuffer & buf) - { - Block sample = getSampleBlock(); - ASTPtr format_ast = typeid_cast(*query_ptr).format; - String format_name = format_ast ? typeid_cast(*format_ast).name : context.getDefaultFormat(); - - BlockInputStreamPtr in = executeImpl(); - BlockOutputStreamPtr out = context.getFormatFactory().getOutput(format_name, buf, sample); - - copyData(*in, *out); - - return in; - } - private: ASTPtr query_ptr; Context context; diff --git a/dbms/include/DB/Interpreters/InterpreterShowProcesslistQuery.h b/dbms/include/DB/Interpreters/InterpreterShowProcesslistQuery.h index 1133dbe64d0..3f4ea5ce487 100644 --- a/dbms/include/DB/Interpreters/InterpreterShowProcesslistQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterShowProcesslistQuery.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -14,26 +15,17 @@ namespace DB /** Вернуть список запросов, исполняющихся прямо сейчас. */ -class InterpreterShowProcesslistQuery +class InterpreterShowProcesslistQuery : public IInterpreter { public: InterpreterShowProcesslistQuery(ASTPtr query_ptr_, Context & context_) : query_ptr(query_ptr_), context(context_) {} - BlockIO execute() + BlockIO execute() override { return executeQuery(getRewrittenQuery(), context, true); } - BlockInputStreamPtr executeAndFormat(WriteBuffer & buf) - { - String query = getRewrittenQuery(); - ReadBufferFromString in(query); - BlockInputStreamPtr query_plan; - executeQuery(in, buf, context, query_plan, true); - return query_plan; - } - private: ASTPtr query_ptr; Context context; diff --git a/dbms/include/DB/Interpreters/InterpreterShowTablesQuery.h b/dbms/include/DB/Interpreters/InterpreterShowTablesQuery.h index 358b188827a..f93fd497302 100644 --- a/dbms/include/DB/Interpreters/InterpreterShowTablesQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterShowTablesQuery.h @@ -1,8 +1,7 @@ #pragma once -#include - #include +#include namespace DB @@ -12,13 +11,12 @@ namespace DB /** Вывести список имён таблиц/баз данных по некоторым условиям. * Интерпретирует запрос путём замены его на запрос SELECT из таблицы system.tables или system.databases. */ -class InterpreterShowTablesQuery +class InterpreterShowTablesQuery : public IInterpreter { public: InterpreterShowTablesQuery(ASTPtr query_ptr_, Context & context_); - BlockIO execute(); - BlockInputStreamPtr executeAndFormat(WriteBuffer & buf); + BlockIO execute() override; private: ASTPtr query_ptr; diff --git a/dbms/include/DB/Interpreters/InterpreterUseQuery.h b/dbms/include/DB/Interpreters/InterpreterUseQuery.h index 48986bdef08..dbc2bd7b52a 100644 --- a/dbms/include/DB/Interpreters/InterpreterUseQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterUseQuery.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -10,16 +11,17 @@ namespace DB /** Выбрать БД по-умолчанию для сессии. */ -class InterpreterUseQuery +class InterpreterUseQuery : public IInterpreter { public: InterpreterUseQuery(ASTPtr query_ptr_, Context & context_) : query_ptr(query_ptr_), context(context_) {} - void execute() + BlockIO execute() override { const String & new_database = typeid_cast(*query_ptr).database; context.getSessionContext().setCurrentDatabase(new_database); + return {}; } private: diff --git a/dbms/include/DB/Interpreters/executeQuery.h b/dbms/include/DB/Interpreters/executeQuery.h index 92c59677734..9cbdda714c0 100644 --- a/dbms/include/DB/Interpreters/executeQuery.h +++ b/dbms/include/DB/Interpreters/executeQuery.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 5b4d58b65f8..56cfd614b8c 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -768,7 +768,7 @@ void ExpressionAnalyzer::addExternalStorage(ASTPtr & subquery_or_table_name) StoragePtr external_storage = StorageMemory::create(external_table_name, columns); external_tables[external_table_name] = external_storage; - subqueries_for_sets[external_table_name].source = interpreter->execute(); + subqueries_for_sets[external_table_name].source = interpreter->execute().in; subqueries_for_sets[external_table_name].source_sample = interpreter->getSampleBlock(); subqueries_for_sets[external_table_name].table = external_storage; @@ -842,7 +842,7 @@ void ExpressionAnalyzer::makeSet(ASTFunction * node, const Block & sample_block) if (!subquery_for_set.source) { auto interpreter = interpretSubquery(arg, context, subquery_depth); - subquery_for_set.source = new LazyBlockInputStream([interpreter]() mutable { return interpreter->execute(); }); + subquery_for_set.source = new LazyBlockInputStream([interpreter]() mutable { return interpreter->execute().in; }); subquery_for_set.source_sample = interpreter->getSampleBlock(); /** Зачем используется LazyBlockInputStream? @@ -1594,7 +1594,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (!subquery_for_set.source) { auto interpreter = interpretSubquery(ast_join.table, context, subquery_depth, required_joined_columns); - subquery_for_set.source = new LazyBlockInputStream([interpreter]() mutable { return interpreter->execute(); }); + subquery_for_set.source = new LazyBlockInputStream([interpreter]() mutable { return interpreter->execute().in; }); subquery_for_set.source_sample = interpreter->getSampleBlock(); } diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index c5e140abe0b..27275435ac8 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -27,7 +27,7 @@ InterpreterAlterQuery::InterpreterAlterQuery(ASTPtr query_ptr_, Context & contex { } -void InterpreterAlterQuery::execute() +BlockIO InterpreterAlterQuery::execute() { auto & alter = typeid_cast(*query_ptr); const String & table_name = alter.table; @@ -64,11 +64,13 @@ void InterpreterAlterQuery::execute() } if (alter_commands.empty()) - return; + return {}; alter_commands.validate(table.get(), context); table->alter(alter_commands, database_name, table_name, context); + + return {}; } void InterpreterAlterQuery::parseAlter( diff --git a/dbms/src/Interpreters/InterpreterCheckQuery.cpp b/dbms/src/Interpreters/InterpreterCheckQuery.cpp index cc85f33c74c..d78542a4963 100644 --- a/dbms/src/Interpreters/InterpreterCheckQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCheckQuery.cpp @@ -4,13 +4,14 @@ #include #include -using namespace DB; +namespace DB +{ InterpreterCheckQuery::InterpreterCheckQuery(DB::ASTPtr query_ptr_, DB::Context& context_) : query_ptr(query_ptr_), context(context_) { } -BlockInputStreamPtr InterpreterCheckQuery::execute() +BlockIO InterpreterCheckQuery::execute() { ASTCheckQuery & alter = typeid_cast(*query_ptr); String & table_name = alter.table; @@ -18,16 +19,14 @@ BlockInputStreamPtr InterpreterCheckQuery::execute() StoragePtr table = context.getTable(database_name, table_name); - result = getSampleBlock(); + result = Block{{ new ColumnUInt8, new DataTypeUInt8, "result" }}; result.getByPosition(0).column->insert(Field(UInt64(table->checkData()))); - return BlockInputStreamPtr(new OneBlockInputStream(result)); + BlockIO res; + res.in = new OneBlockInputStream(result); + res.in_sample = result; + + return res; } -Block InterpreterCheckQuery::getSampleBlock() -{ - DB::Block b; - ColumnPtr column(new ColumnUInt8); - b.insert(ColumnWithNameAndType(column, new DataTypeUInt8, "result")); - return b; } diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 0ba822511b2..21bb0288ca0 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -42,7 +42,7 @@ InterpreterCreateQuery::InterpreterCreateQuery(ASTPtr query_ptr_, Context & cont } -StoragePtr InterpreterCreateQuery::execute(bool assume_metadata_exists) +void InterpreterCreateQuery::executeImpl(bool assume_metadata_exists) { String path = context.getPath(); String current_database = context.getCurrentDatabase(); @@ -80,8 +80,6 @@ StoragePtr InterpreterCreateQuery::execute(bool assume_metadata_exists) if (!create.if_not_exists || !context.isDatabaseExist(database_name)) context.addDatabase(database_name); - - return StoragePtr(); } SharedPtr interpreter_select; @@ -118,7 +116,7 @@ StoragePtr InterpreterCreateQuery::execute(bool assume_metadata_exists) if (context.isTableExist(database_name, table_name)) { if (create.if_not_exists) - return context.getTable(database_name, table_name); + return; else throw Exception("Table " + database_name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } @@ -251,11 +249,9 @@ StoragePtr InterpreterCreateQuery::execute(bool assume_metadata_exists) /// Если запрос CREATE SELECT, то вставим в таблицу данные if (create.select && storage_name != "View" && (storage_name != "MaterializedView" || create.is_populate)) { - BlockInputStreamPtr from = new MaterializingBlockInputStream(interpreter_select->execute()); + BlockInputStreamPtr from = new MaterializingBlockInputStream(interpreter_select->execute().in); copyData(*from, *res->write(query_ptr)); } - - return res; } InterpreterCreateQuery::ColumnsAndDefaults InterpreterCreateQuery::parseColumns(ASTPtr expression_list) diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index 1fa7faabdce..47571e67dde 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -16,7 +16,7 @@ InterpreterDropQuery::InterpreterDropQuery(ASTPtr query_ptr_, Context & context_ } -void InterpreterDropQuery::execute() +BlockIO InterpreterDropQuery::execute() { String path = context.getPath(); String current_database = context.getCurrentDatabase(); @@ -43,7 +43,7 @@ void InterpreterDropQuery::execute() if (table) tables_to_drop.push_back(table); else - return; + return {}; } else { @@ -52,7 +52,7 @@ void InterpreterDropQuery::execute() if (!drop.if_exists) context.assertDatabaseExists(database_name); else if (!context.isDatabaseExist(database_name)) - return; + return {}; Tables tables = context.getDatabases()[database_name]; @@ -111,6 +111,8 @@ void InterpreterDropQuery::execute() Poco::File(data_path).remove(false); Poco::File(metadata_path).remove(false); } + + return {}; } void InterpreterDropQuery::dropDetachedTable(String database_name, StoragePtr table, Context & context) diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp new file mode 100644 index 00000000000..1af0d9c94a9 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterFactory.cpp @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + +static void throwIfReadOnly(Context & context) +{ + if (context.getSettingsRef().limits.readonly) + throw Exception("Cannot execute query in readonly mode", ErrorCodes::READONLY); +} + + +SharedPtr InterpreterFactory::get(ASTPtr & query, Context & context, QueryProcessingStage::Enum stage) +{ + if (typeid_cast(query.get())) + { + return new InterpreterSelectQuery(query, context, stage); + } + else if (typeid_cast(query.get())) + { + throwIfReadOnly(context); + return new InterpreterInsertQuery(query, context); + } + else if (typeid_cast(query.get())) + { + throwIfReadOnly(context); + return new InterpreterCreateQuery(query, context); + } + else if (typeid_cast(query.get())) + { + throwIfReadOnly(context); + return new InterpreterDropQuery(query, context); + } + else if (typeid_cast(query.get())) + { + throwIfReadOnly(context); + return new InterpreterRenameQuery(query, context); + } + else if (typeid_cast(query.get())) + { + return new InterpreterShowTablesQuery(query, context); + } + else if (typeid_cast(query.get())) + { + return new InterpreterUseQuery(query, context); + } + else if (typeid_cast(query.get())) + { + /// readonly проверяется внутри InterpreterSetQuery + return new InterpreterSetQuery(query, context); + } + else if (typeid_cast(query.get())) + { + throwIfReadOnly(context); + return new InterpreterOptimizeQuery(query, context); + } + else if (typeid_cast(query.get())) + { + return new InterpreterExistsQuery(query, context); + } + else if (typeid_cast(query.get())) + { + return new InterpreterShowCreateQuery(query, context); + } + else if (typeid_cast(query.get())) + { + return new InterpreterDescribeQuery(query, context); + } + else if (typeid_cast(query.get())) + { + return new InterpreterShowProcesslistQuery(query, context); + } + else if (typeid_cast(query.get())) + { + throwIfReadOnly(context); + return new InterpreterAlterQuery(query, context); + } + else if (typeid_cast(query.get())) + { + return new InterpreterCheckQuery(query, context); + } + else + throw Exception("Unknown type of query: " + query->getID(), ErrorCodes::UNKNOWN_TYPE_OF_QUERY); +} + +} diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 04ab185b338..e328218a77a 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -64,69 +64,6 @@ Block InterpreterInsertQuery::getSampleBlock() return res; } -void InterpreterInsertQuery::execute(ReadBuffer * remaining_data_istr) -{ - ASTInsertQuery & query = typeid_cast(*query_ptr); - StoragePtr table = getTable(); - - auto table_lock = table->lockStructure(true); - - /** @note looks suspicious, first we ask to create block from NamesAndTypesList (internally in ITableDeclaration), - * then we compose the same list from the resulting block */ - NamesAndTypesListPtr required_columns = new NamesAndTypesList(table->getColumnsList()); - - /// Создаем кортеж из нескольких стримов, в которые будем писать данные. - BlockOutputStreamPtr out{ - new ProhibitColumnsBlockOutputStream{ - new AddingDefaultBlockOutputStream{ - new MaterializingBlockOutputStream{ - new PushingToViewsBlockOutputStream{query.database, query.table, context, query_ptr} - }, - required_columns, table->column_defaults, context, context.getSettingsRef().strict_insert_defaults - }, - table->materialized_columns - } - }; - - /// Какой тип запроса: INSERT VALUES | INSERT FORMAT | INSERT SELECT? - if (!query.select) - { - - String format = query.format; - if (format.empty()) - format = "Values"; - - /// Данные могут содержаться в распарсенной (query.data) и ещё не распарсенной (remaining_data_istr) части запроса. - - ConcatReadBuffer::ReadBuffers buffers; - ReadBuffer buf1(const_cast(query.data), query.data ? query.end - query.data : 0, 0); - - if (query.data) - buffers.push_back(&buf1); - buffers.push_back(remaining_data_istr); - - /** NOTE Нельзя читать из remaining_data_istr до того, как прочтём всё между query.data и query.end. - * - потому что query.data может ссылаться на кусок памяти, использующийся в качестве буфера в remaining_data_istr. - */ - - ConcatReadBuffer istr(buffers); - Block sample = getSampleBlock(); - - BlockInputStreamPtr in{ - context.getFormatFactory().getInput( - format, istr, sample, context.getSettings().max_insert_block_size)}; - - copyData(*in, *out); - } - else - { - InterpreterSelectQuery interpreter_select(query.select, context); - BlockInputStreamPtr in{interpreter_select.execute()}; - - copyData(*in, *out); - } -} - BlockIO InterpreterInsertQuery::execute() { @@ -161,7 +98,7 @@ BlockIO InterpreterInsertQuery::execute() else { InterpreterSelectQuery interpreter_select{query.select, context}; - BlockInputStreamPtr in{interpreter_select.execute()}; + BlockInputStreamPtr in{interpreter_select.execute().in}; res.in = new NullAndDoCopyBlockInputStream{in, out}; } diff --git a/dbms/src/Interpreters/InterpreterQuery.cpp b/dbms/src/Interpreters/InterpreterQuery.cpp deleted file mode 100644 index 4fb1f748266..00000000000 --- a/dbms/src/Interpreters/InterpreterQuery.cpp +++ /dev/null @@ -1,227 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - - -InterpreterQuery::InterpreterQuery(ASTPtr query_ptr_, Context & context_, QueryProcessingStage::Enum stage_) - : query_ptr(query_ptr_), context(context_), stage(stage_) -{ -} - - -void InterpreterQuery::execute(WriteBuffer & ostr, ReadBuffer * remaining_data_istr, BlockInputStreamPtr & query_plan) -{ - if (typeid_cast(&*query_ptr)) - { - InterpreterSelectQuery interpreter(query_ptr, context, stage); - query_plan = interpreter.executeAndFormat(ostr); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterInsertQuery interpreter(query_ptr, context); - interpreter.execute(remaining_data_istr); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterCreateQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterDropQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterRenameQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterShowTablesQuery interpreter(query_ptr, context); - query_plan = interpreter.executeAndFormat(ostr); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterUseQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - /// readonly проверяется внутри InterpreterSetQuery - InterpreterSetQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterOptimizeQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterExistsQuery interpreter(query_ptr, context); - query_plan = interpreter.executeAndFormat(ostr); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterShowCreateQuery interpreter(query_ptr, context); - query_plan = interpreter.executeAndFormat(ostr); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterDescribeQuery interpreter(query_ptr, context); - query_plan = interpreter.executeAndFormat(ostr); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterShowProcesslistQuery interpreter(query_ptr, context); - query_plan = interpreter.executeAndFormat(ostr); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterAlterQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterCheckQuery interpreter(query_ptr, context); - query_plan = interpreter.execute(); - } - else - throw Exception("Unknown type of query: " + query_ptr->getID(), ErrorCodes::UNKNOWN_TYPE_OF_QUERY); -} - - -BlockIO InterpreterQuery::execute() -{ - BlockIO res; - - if (typeid_cast(&*query_ptr)) - { - InterpreterSelectQuery interpreter(query_ptr, context, stage); - res.in = interpreter.execute(); - res.in_sample = interpreter.getSampleBlock(); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterInsertQuery interpreter(query_ptr, context); - res = interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterCreateQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterDropQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterRenameQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterShowTablesQuery interpreter(query_ptr, context); - res = interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterUseQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - /// readonly проверяется внутри InterpreterSetQuery - InterpreterSetQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterOptimizeQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterExistsQuery interpreter(query_ptr, context); - res = interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterShowCreateQuery interpreter(query_ptr, context); - res = interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterDescribeQuery interpreter(query_ptr, context); - res = interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterShowProcesslistQuery interpreter(query_ptr, context); - res = interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - throwIfReadOnly(); - InterpreterAlterQuery interpreter(query_ptr, context); - interpreter.execute(); - } - else if (typeid_cast(&*query_ptr)) - { - InterpreterCheckQuery interpreter(query_ptr, context); - res.in = interpreter.execute(); - res.in_sample = interpreter.getSampleBlock(); - } - else - throw Exception("Unknown type of query: " + query_ptr->getID(), ErrorCodes::UNKNOWN_TYPE_OF_QUERY); - - return res; -} - - -} diff --git a/dbms/src/Interpreters/InterpreterRenameQuery.cpp b/dbms/src/Interpreters/InterpreterRenameQuery.cpp index 31cc72f995f..388c95b5820 100644 --- a/dbms/src/Interpreters/InterpreterRenameQuery.cpp +++ b/dbms/src/Interpreters/InterpreterRenameQuery.cpp @@ -58,7 +58,7 @@ struct RenameDescription }; -void InterpreterRenameQuery::execute() +BlockIO InterpreterRenameQuery::execute() { String path = context.getPath(); String current_database = context.getCurrentDatabase(); @@ -151,6 +151,8 @@ void InterpreterRenameQuery::execute() /// Удаляем старый файл с метаданными. Poco::File(elem.from_metadata_path).remove(); } + + return {}; } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index a1dd0cfc74d..7c77564b9aa 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -297,12 +297,17 @@ Block InterpreterSelectQuery::getSampleBlock() } -BlockInputStreamPtr InterpreterSelectQuery::execute() +BlockIO InterpreterSelectQuery::execute() { (void) executeWithoutUnion(); if (streams.empty()) - return new NullBlockInputStream; + { + BlockIO res; + res.in = new NullBlockInputStream; + res.in_sample = getSampleBlock(); + return res; + } executeUnion(streams); @@ -326,7 +331,11 @@ BlockInputStreamPtr InterpreterSelectQuery::execute() } } - return streams[0]; + BlockIO res; + res.in = streams[0]; + res.in_sample = getSampleBlock(); + + return res; } const BlockInputStreams & InterpreterSelectQuery::executeWithoutUnion() @@ -1011,20 +1020,6 @@ void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(BlockInputStreams & } -BlockInputStreamPtr InterpreterSelectQuery::executeAndFormat(WriteBuffer & buf) -{ - Block sample = getSampleBlock(); - String format_name = query.format ? typeid_cast(*query.format).name : context.getDefaultFormat(); - - BlockInputStreamPtr in = execute(); - BlockOutputStreamPtr out = context.getFormatFactory().getOutput(format_name, buf, sample); - - copyData(*in, *out); - - return in; -} - - void InterpreterSelectQuery::ignoreWithTotals() { query.group_by_with_totals = false; diff --git a/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp b/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp index 2e699a8122a..c9652c73e7c 100644 --- a/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -52,14 +52,4 @@ BlockIO InterpreterShowTablesQuery::execute() } -BlockInputStreamPtr InterpreterShowTablesQuery::executeAndFormat(WriteBuffer & buf) -{ - String query = getRewrittenQuery(); - ReadBufferFromString in(query); - BlockInputStreamPtr query_plan; - executeQuery(in, buf, context, query_plan, true); - return query_plan; -} - - } diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 2d3809e5edd..16ef92c4a4f 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -1,13 +1,19 @@ #include -#include +#include #include +#include +#include + #include #include +#include #include #include + #include +#include #include @@ -32,11 +38,12 @@ static void logQuery(const String & query, const Context & context) } -/** Распарсить запрос. Записать его в лог вместе с IP адресом клиента. - * Проверить ограничения. Записать запрос в ProcessList. - */ -static std::tuple prepareQuery( - IParser::Pos begin, IParser::Pos end, Context & context, bool internal) +static std::tuple executeQueryImpl( + IParser::Pos begin, + IParser::Pos end, + Context & context, + bool internal, + QueryProcessingStage::Enum stage) { ProfileEvents::increment(ProfileEvents::Query); @@ -72,6 +79,11 @@ static std::tuple prepareQuery( /// Проверка ограничений. checkLimits(*ast, context.getSettingsRef().limits); + QuotaForIntervals & quota = context.getQuota(); + time_t current_time = time(0); + + quota.checkExceeded(current_time); + /// Положим запрос в список процессов. Но запрос SHOW PROCESSLIST класть не будем. ProcessList::EntryPtr process_list_entry; if (!internal && nullptr == typeid_cast(&*ast)) @@ -85,7 +97,37 @@ static std::tuple prepareQuery( context.setProcessListElement(&process_list_entry->get()); } - return std::make_tuple(ast, process_list_entry); + BlockIO res; + + try + { + auto interpreter = InterpreterFactory::get(ast, context, stage); + res = interpreter->execute(); + + /// Держим элемент списка процессов до конца обработки запроса. + res.process_list_entry = process_list_entry; + } + catch (...) + { + quota.addError(current_time); + throw; + } + + quota.addQuery(current_time); + + return std::make_tuple(ast, res); +} + + +BlockIO executeQuery( + const String & query, + Context & context, + bool internal, + QueryProcessingStage::Enum stage) +{ + BlockIO streams; + std::tie(std::ignore, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context, internal, stage); + return streams; } @@ -124,65 +166,55 @@ void executeQuery( } ASTPtr ast; - ProcessList::EntryPtr process_list_entry; + BlockIO streams; - std::tie(ast, process_list_entry) = prepareQuery(begin, end, context, internal); + std::tie(ast, streams) = executeQueryImpl(begin, end, context, internal, stage); - QuotaForIntervals & quota = context.getQuota(); - time_t current_time = time(0); - - quota.checkExceeded(current_time); - - try + if (streams.out) { - InterpreterQuery interpreter(ast, context, stage); - interpreter.execute(ostr, &istr, query_plan); - } - catch (...) - { - quota.addError(current_time); - throw; + const ASTInsertQuery * ast_insert_query = dynamic_cast(ast.get()); + + if (!ast_insert_query) + throw Exception("Logical error: query requires data to insert, but it is not INSERT query", ErrorCodes::LOGICAL_ERROR); + + String format = ast_insert_query->format; + if (format.empty()) + format = "Values"; + + /// Данные могут содержаться в распарсенной (query.data) и ещё не распарсенной (remaining_data_istr) части запроса. + + ConcatReadBuffer::ReadBuffers buffers; + ReadBuffer buf1(const_cast(ast_insert_query->data), ast_insert_query->data ? ast_insert_query->end - ast_insert_query->data : 0, 0); + + if (ast_insert_query->data) + buffers.push_back(&buf1); + buffers.push_back(&istr); + + /** NOTE Нельзя читать из istr до того, как прочтём всё между query.data и query.end. + * - потому что query.data может ссылаться на кусок памяти, использующийся в качестве буфера в istr. + */ + + ConcatReadBuffer data_istr(buffers); + + BlockInputStreamPtr in{ + context.getFormatFactory().getInput( + format, data_istr, streams.out_sample, context.getSettings().max_insert_block_size)}; + + copyData(*in, *streams.out); } - quota.addQuery(current_time); + if (streams.in) + { + const ASTQueryWithOutput * ast_query_with_output = dynamic_cast(ast.get()); + + String format_name = ast_query_with_output && ast_query_with_output->format + ? typeid_cast(*ast_query_with_output->format).name + : context.getDefaultFormat(); + + BlockOutputStreamPtr out = context.getFormatFactory().getOutput(format_name, ostr, streams.in_sample); + + copyData(*streams.in, *out); + } } - -BlockIO executeQuery( - const String & query, - Context & context, - bool internal, - QueryProcessingStage::Enum stage) -{ - ASTPtr ast; - ProcessList::EntryPtr process_list_entry; - - std::tie(ast, process_list_entry) = prepareQuery(query.data(), query.data() + query.size(), context, internal); - - QuotaForIntervals & quota = context.getQuota(); - time_t current_time = time(0); - - quota.checkExceeded(current_time); - - BlockIO res; - - try - { - InterpreterQuery interpreter(ast, context, stage); - res = interpreter.execute(); - - /// Держим элемент списка процессов до конца обработки запроса. - res.process_list_entry = process_list_entry; - } - catch (...) - { - quota.addError(current_time); - throw; - } - - quota.addQuery(current_time); - return res; -} - - } diff --git a/dbms/src/Interpreters/loadMetadata.cpp b/dbms/src/Interpreters/loadMetadata.cpp index 5ffbb905535..3e1de61040f 100644 --- a/dbms/src/Interpreters/loadMetadata.cpp +++ b/dbms/src/Interpreters/loadMetadata.cpp @@ -30,8 +30,7 @@ static void executeCreateQuery(const String & query, Context & context, const St ast_create_query.attach = true; ast_create_query.database = database; - InterpreterCreateQuery interpreter(ast, context); - interpreter.execute(true); + InterpreterCreateQuery(ast, context).executeLoadExisting(); } diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 2ed9176f2e2..ce17582f163 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -130,7 +130,7 @@ BlockInputStreams StorageBuffer::read( */ if (processed_stage > QueryProcessingStage::FetchColumns) for (auto & stream : streams_from_buffers) - stream = InterpreterSelectQuery(query, context, processed_stage, 0, stream).execute(); + stream = InterpreterSelectQuery(query, context, processed_stage, 0, stream).execute().in; streams_from_dst.insert(streams_from_dst.end(), streams_from_buffers.begin(), streams_from_buffers.end()); return streams_from_dst; diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index fad8e7cdf52..a807f8d1f69 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -190,7 +190,7 @@ BlockInputStreams StorageDistributed::read( * Если этого не делать, то в разных потоках будут получаться разные типы (Const и не-Const) столбцов, * а это не разрешено, так как весь код исходит из допущения, что в потоке блоков все типы одинаковые. */ - res.emplace_back(new MaterializingBlockInputStream(interpreter.execute())); + res.emplace_back(new MaterializingBlockInputStream(interpreter.execute().in)); } } diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index 910bedfc1e1..b55e36b3d8f 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -43,22 +43,23 @@ StorageMaterializedView::StorageMaterializedView( { ASTCreateQuery & create = typeid_cast(*query_); + auto inner_table_name = getInnerTableName(); + /// Если запрос ATTACH, то к этому моменту внутренняя таблица уже должна быть подключена. if (attach_) { - if (!context.isTableExist(database_name, getInnerTableName())) + if (!data) throw Exception("Inner table is not attached yet." " Materialized view: " + database_name + "." + table_name + "." - " Inner table: " + database_name + "." + getInnerTableName() + ".", + " Inner table: " + database_name + "." + inner_table_name + ".", DB::ErrorCodes::LOGICAL_ERROR); - data = context.getTable(database_name, getInnerTableName()); } else { /// Составим запрос для создания внутреннего хранилища. ASTCreateQuery * manual_create_query = new ASTCreateQuery(); manual_create_query->database = database_name; - manual_create_query->table = getInnerTableName(); + manual_create_query->table = inner_table_name; manual_create_query->columns = create.columns; manual_create_query->children.push_back(manual_create_query->columns); ASTPtr ast_create_query = manual_create_query; @@ -78,7 +79,9 @@ StorageMaterializedView::StorageMaterializedView( /// Выполним запрос. InterpreterCreateQuery create_interpreter(ast_create_query, context); - data = create_interpreter.execute(); + create_interpreter.execute(); + + data = context.getTable(database_name, inner_table_name); } } @@ -115,14 +118,18 @@ BlockOutputStreamPtr StorageMaterializedView::write(ASTPtr query) void StorageMaterializedView::drop() { - context.getGlobalContext().removeDependency(DatabaseAndTableName(select_database_name, select_table_name), DatabaseAndTableName(database_name, table_name)); + context.getGlobalContext().removeDependency( + DatabaseAndTableName(select_database_name, select_table_name), + DatabaseAndTableName(database_name, table_name)); - if (context.tryGetTable(database_name, getInnerTableName())) + auto inner_table_name = getInnerTableName(); + + if (context.tryGetTable(database_name, inner_table_name)) { /// Состваляем и выполняем запрос drop для внутреннего хранилища. ASTDropQuery *drop_query = new ASTDropQuery; drop_query->database = database_name; - drop_query->table = getInnerTableName(); + drop_query->table = inner_table_name; ASTPtr ast_drop_query = drop_query; InterpreterDropQuery drop_interpreter(ast_drop_query, context); drop_interpreter.execute(); From 29bb4c0f2e65da7f2051676382c58d7f5a228af1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 05:15:26 +0300 Subject: [PATCH 16/67] dbms: addition to prev. revision [#METR-2944]. --- dbms/src/Interpreters/InterpreterFactory.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp index 1af0d9c94a9..2674e09d743 100644 --- a/dbms/src/Interpreters/InterpreterFactory.cpp +++ b/dbms/src/Interpreters/InterpreterFactory.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include From c757ab96c0a22e1efb14726ee020a2e361bd0995 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 05:25:50 +0300 Subject: [PATCH 17/67] dbms: addition to prev. revision [#METR-2944]. --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 21bb0288ca0..23f4b96bd79 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -80,6 +80,8 @@ void InterpreterCreateQuery::executeImpl(bool assume_metadata_exists) if (!create.if_not_exists || !context.isDatabaseExist(database_name)) context.addDatabase(database_name); + + return; } SharedPtr interpreter_select; From 580b77c1528bfc7f8546417cedbf00e88e09895a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 05:30:50 +0300 Subject: [PATCH 18/67] dbms: addition to prev. revision [#METR-2944]. --- dbms/src/Interpreters/InterpreterCheckQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/InterpreterCheckQuery.cpp b/dbms/src/Interpreters/InterpreterCheckQuery.cpp index d78542a4963..c165ec72bae 100644 --- a/dbms/src/Interpreters/InterpreterCheckQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCheckQuery.cpp @@ -24,7 +24,7 @@ BlockIO InterpreterCheckQuery::execute() BlockIO res; res.in = new OneBlockInputStream(result); - res.in_sample = result; + res.in_sample = result.cloneEmpty(); return res; } From 09010e81f15950d89ee797e905fa0f636d8c9bd6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 05:38:38 +0300 Subject: [PATCH 19/67] dbms: added test [#METR-2944]. --- .../00177_inserts_through_http_parts.reference | 10 ++++++++++ .../0_stateless/00177_inserts_through_http_parts.sh | 11 +++++++++++ 2 files changed, 21 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.reference create mode 100755 dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh diff --git a/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.reference b/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.reference new file mode 100644 index 00000000000..f00c965d830 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.reference @@ -0,0 +1,10 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 diff --git a/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh b/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh new file mode 100755 index 00000000000..4c7834d605b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +curl 'http://localhost:8123/?query=DROP+TABLE' -d 'IF EXISTS test.insert' +curl 'http://localhost:8123/?query=CREATE' -d 'TABLE test.insert (x UInt8) ENGINE = Memory' +curl 'http://localhost:8123/' -d 'INSERT INTO test.insert VALUES (1),(2)' +curl 'http://localhost:8123/?query=INSERT+INTO+test.insert+VALUES' -d '(3),(4)' +curl 'http://localhost:8123/?query=INSERT+INTO+test.insert' -d 'VALUES (5),(6)' +curl 'http://localhost:8123/?query=INSERT+INTO+test.insert+VALUES+(7)' -d ',(8)' +curl 'http://localhost:8123/?query=INSERT+INTO+test.insert+VALUES+(9),(10)' -d ' ' +curl 'http://localhost:8123/' -d 'SELECT x FROM test.insert ORDER BY x' +curl 'http://localhost:8123/?query=DROP+TABLE' -d 'test.insert' From ba29065d300119de79e5fc55824b6b18a0c59202 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 05:41:35 +0300 Subject: [PATCH 20/67] dbms: fixed comment [#METR-2944]. --- dbms/src/Interpreters/executeQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 16ef92c4a4f..b8d79f5751e 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -181,7 +181,7 @@ void executeQuery( if (format.empty()) format = "Values"; - /// Данные могут содержаться в распарсенной (query.data) и ещё не распарсенной (remaining_data_istr) части запроса. + /// Данные могут содержаться в распарсенной (ast_insert_query.data) и ещё не распарсенной (istr) части запроса. ConcatReadBuffer::ReadBuffers buffers; ReadBuffer buf1(const_cast(ast_insert_query->data), ast_insert_query->data ? ast_insert_query->end - ast_insert_query->data : 0, 0); @@ -190,7 +190,7 @@ void executeQuery( buffers.push_back(&buf1); buffers.push_back(&istr); - /** NOTE Нельзя читать из istr до того, как прочтём всё между query.data и query.end. + /** NOTE Нельзя читать из istr до того, как прочтём всё между ast_insert_query.data и ast_insert_query.end. * - потому что query.data может ссылаться на кусок памяти, использующийся в качестве буфера в istr. */ From 0180cda4efdf70d2ae6403f39ae0e8e0991a479c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 18 Jun 2015 16:38:50 +0300 Subject: [PATCH 21/67] dbms: modified test [#METR-2944]. --- .../00177_inserts_through_http_parts.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh b/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh index 4c7834d605b..788f2bda147 100755 --- a/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh +++ b/dbms/tests/queries/0_stateless/00177_inserts_through_http_parts.sh @@ -1,11 +1,11 @@ #!/bin/bash -curl 'http://localhost:8123/?query=DROP+TABLE' -d 'IF EXISTS test.insert' -curl 'http://localhost:8123/?query=CREATE' -d 'TABLE test.insert (x UInt8) ENGINE = Memory' -curl 'http://localhost:8123/' -d 'INSERT INTO test.insert VALUES (1),(2)' -curl 'http://localhost:8123/?query=INSERT+INTO+test.insert+VALUES' -d '(3),(4)' -curl 'http://localhost:8123/?query=INSERT+INTO+test.insert' -d 'VALUES (5),(6)' -curl 'http://localhost:8123/?query=INSERT+INTO+test.insert+VALUES+(7)' -d ',(8)' -curl 'http://localhost:8123/?query=INSERT+INTO+test.insert+VALUES+(9),(10)' -d ' ' -curl 'http://localhost:8123/' -d 'SELECT x FROM test.insert ORDER BY x' -curl 'http://localhost:8123/?query=DROP+TABLE' -d 'test.insert' +curl -sS 'http://localhost:8123/?query=DROP+TABLE' -d 'IF EXISTS test.insert' +curl -sS 'http://localhost:8123/?query=CREATE' -d 'TABLE test.insert (x UInt8) ENGINE = Memory' +curl -sS 'http://localhost:8123/' -d 'INSERT INTO test.insert VALUES (1),(2)' +curl -sS 'http://localhost:8123/?query=INSERT+INTO+test.insert+VALUES' -d '(3),(4)' +curl -sS 'http://localhost:8123/?query=INSERT+INTO+test.insert' -d 'VALUES (5),(6)' +curl -sS 'http://localhost:8123/?query=INSERT+INTO+test.insert+VALUES+(7)' -d ',(8)' +curl -sS 'http://localhost:8123/?query=INSERT+INTO+test.insert+VALUES+(9),(10)' -d ' ' +curl -sS 'http://localhost:8123/' -d 'SELECT x FROM test.insert ORDER BY x' +curl -sS 'http://localhost:8123/?query=DROP+TABLE' -d 'test.insert' From 001ffdfa18e33b44ebe446b7606e9f16dce3d259 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 19 Jun 2015 05:34:47 +0300 Subject: [PATCH 22/67] dbms: fixed error with higher order functions [#METR-16894]. --- dbms/include/DB/Columns/ColumnReplicated.h | 25 ------------------- dbms/include/DB/Columns/IColumnDummy.h | 7 +----- .../DB/Functions/FunctionsHigherOrder.h | 3 +-- .../DB/Functions/FunctionsMiscellaneous.h | 19 ++++++-------- dbms/src/Functions/FunctionsMiscellaneous.cpp | 1 + .../00178_function_replicate.reference | 10 ++++++++ .../0_stateless/00178_function_replicate.sql | 9 +++++++ ...th_common_expressions_and_filter.reference | 5 ++++ ...das_with_common_expressions_and_filter.sql | 3 +++ 9 files changed, 38 insertions(+), 44 deletions(-) delete mode 100644 dbms/include/DB/Columns/ColumnReplicated.h create mode 100644 dbms/tests/queries/0_stateless/00178_function_replicate.reference create mode 100644 dbms/tests/queries/0_stateless/00178_function_replicate.sql create mode 100644 dbms/tests/queries/0_stateless/00179_lambdas_with_common_expressions_and_filter.reference create mode 100644 dbms/tests/queries/0_stateless/00179_lambdas_with_common_expressions_and_filter.sql diff --git a/dbms/include/DB/Columns/ColumnReplicated.h b/dbms/include/DB/Columns/ColumnReplicated.h deleted file mode 100644 index 32abf953c67..00000000000 --- a/dbms/include/DB/Columns/ColumnReplicated.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ - -/** Содержит промежуточные данные для вычисления выражений в функциях высшего порядка. - * Это - вложенный столбец произвольного размера. - * Сам ColumnReplicated притворяется, как столбец указанного в конструкторе размера. - */ -class ColumnReplicated final : public IColumnDummy -{ -public: - ColumnReplicated(size_t s_, ColumnPtr nested_) : IColumnDummy(s_), nested(nested_) {} - std::string getName() const override { return "ColumnReplicated"; } - ColumnPtr cloneDummy(size_t s_) const override { return new ColumnReplicated(s_, nested); } - - ColumnPtr & getData() { return nested; } -private: - ColumnPtr nested; -}; - -} diff --git a/dbms/include/DB/Columns/IColumnDummy.h b/dbms/include/DB/Columns/IColumnDummy.h index 509c56b14a8..d9c559f68f8 100644 --- a/dbms/include/DB/Columns/IColumnDummy.h +++ b/dbms/include/DB/Columns/IColumnDummy.h @@ -41,12 +41,7 @@ public: ColumnPtr filter(const Filter & filt) const override { - size_t new_size = 0; - for (Filter::const_iterator it = filt.begin(); it != filt.end(); ++it) - if (*it) - ++new_size; - - return cloneDummy(new_size); + return cloneDummy(countBytesInFilter(filt)); } ColumnPtr permute(const Permutation & perm, size_t limit) const override diff --git a/dbms/include/DB/Functions/FunctionsHigherOrder.h b/dbms/include/DB/Functions/FunctionsHigherOrder.h index 1194a849855..d04bf89620f 100644 --- a/dbms/include/DB/Functions/FunctionsHigherOrder.h +++ b/dbms/include/DB/Functions/FunctionsHigherOrder.h @@ -5,7 +5,6 @@ #include #include -#include #include #include @@ -580,7 +579,7 @@ public: ColumnWithNameAndType replicated_column = block.getByPosition(prerequisites[prerequisite_index]); replicated_column.name = name; - replicated_column.column = typeid_cast(*replicated_column.column).getData(); + replicated_column.column = typeid_cast(*replicated_column.column).getDataPtr(); temp_block.insert(replicated_column); ++prerequisite_index; diff --git a/dbms/include/DB/Functions/FunctionsMiscellaneous.h b/dbms/include/DB/Functions/FunctionsMiscellaneous.h index de7d94274dd..06320362501 100644 --- a/dbms/include/DB/Functions/FunctionsMiscellaneous.h +++ b/dbms/include/DB/Functions/FunctionsMiscellaneous.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -51,9 +50,8 @@ namespace DB * arrayJoin(arr) - особая функция - выполнить её напрямую нельзя; * используется только чтобы получить тип результата соответствующего выражения. * - * replicate(x, arr) - копирует x столько раз, сколько элементов в массиве arr; - * например: replicate(1, ['a', 'b', 'c']) = 1, 1, 1. - * не предназначена для пользователя, а используется только как prerequisites для функций высшего порядка. + * replicate(x, arr) - создаёт массив такого же размера как arr, все элементы которого равны x; + * например: replicate(1, ['a', 'b', 'c']) = [1, 1, 1]. * * sleep(n) - спит n секунд каждый блок. * @@ -570,18 +568,15 @@ public: }; -/** Размножает столбец (первый аргумент) по количеству элементов в массиве (втором аргументе). - * Не предназначена для внешнего использования. - * Так как возвращаемый столбец будет иметь несовпадающий размер с исходными, - * то результат не может быть потом использован в том же блоке, что и аргументы. +/** Создаёт массив, размножая столбец (первый аргумент) по количеству элементов в массиве (втором аргументе). * Используется только в качестве prerequisites для функций высшего порядка. */ class FunctionReplicate : public IFunction { +public: static constexpr auto name = "replicate"; static IFunction * create(const Context & context) { return new FunctionReplicate; } - /// Получить имя функции. String getName() const { @@ -600,7 +595,7 @@ class FunctionReplicate : public IFunction if (!array_type) throw Exception("Second argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return arguments[0]->clone(); + return new DataTypeArray(arguments[0]->clone()); } /// Выполнить функцию над блоком. @@ -620,7 +615,9 @@ class FunctionReplicate : public IFunction array_column = typeid_cast(&*temp_column); } - block.getByPosition(result).column = new ColumnReplicated(first_column->size(), first_column->replicate(array_column->getOffsets())); + block.getByPosition(result).column = new ColumnArray( + first_column->replicate(array_column->getOffsets()), + array_column->getOffsetsColumn()); } }; diff --git a/dbms/src/Functions/FunctionsMiscellaneous.cpp b/dbms/src/Functions/FunctionsMiscellaneous.cpp index 59dd89481f1..e2a478933c1 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/FunctionsMiscellaneous.cpp @@ -326,6 +326,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/dbms/tests/queries/0_stateless/00178_function_replicate.reference b/dbms/tests/queries/0_stateless/00178_function_replicate.reference new file mode 100644 index 00000000000..4fdec92dcf3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00178_function_replicate.reference @@ -0,0 +1,10 @@ +0 [] [] [] [] [] +1 [0] [1] ['1'] [[0]] [['0']] +2 [0,1] [2,2] ['2','2'] [[0,1],[0,1]] [['0','1'],['0','1']] +3 [0,1,2] [3,3,3] ['3','3','3'] [[0,1,2],[0,1,2],[0,1,2]] [['0','1','2'],['0','1','2'],['0','1','2']] +4 [0,1,2,3] [4,4,4,4] ['4','4','4','4'] [[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3]] [['0','1','2','3'],['0','1','2','3'],['0','1','2','3'],['0','1','2','3']] +5 [0,1,2,3,4] [5,5,5,5,5] ['5','5','5','5','5'] [[0,1,2,3,4],[0,1,2,3,4],[0,1,2,3,4],[0,1,2,3,4],[0,1,2,3,4]] [['0','1','2','3','4'],['0','1','2','3','4'],['0','1','2','3','4'],['0','1','2','3','4'],['0','1','2','3','4']] +6 [0,1,2,3,4,5] [6,6,6,6,6,6] ['6','6','6','6','6','6'] [[0,1,2,3,4,5],[0,1,2,3,4,5],[0,1,2,3,4,5],[0,1,2,3,4,5],[0,1,2,3,4,5],[0,1,2,3,4,5]] [['0','1','2','3','4','5'],['0','1','2','3','4','5'],['0','1','2','3','4','5'],['0','1','2','3','4','5'],['0','1','2','3','4','5'],['0','1','2','3','4','5']] +7 [0,1,2,3,4,5,6] [7,7,7,7,7,7,7] ['7','7','7','7','7','7','7'] [[0,1,2,3,4,5,6],[0,1,2,3,4,5,6],[0,1,2,3,4,5,6],[0,1,2,3,4,5,6],[0,1,2,3,4,5,6],[0,1,2,3,4,5,6],[0,1,2,3,4,5,6]] [['0','1','2','3','4','5','6'],['0','1','2','3','4','5','6'],['0','1','2','3','4','5','6'],['0','1','2','3','4','5','6'],['0','1','2','3','4','5','6'],['0','1','2','3','4','5','6'],['0','1','2','3','4','5','6']] +8 [0,1,2,3,4,5,6,7] [8,8,8,8,8,8,8,8] ['8','8','8','8','8','8','8','8'] [[0,1,2,3,4,5,6,7],[0,1,2,3,4,5,6,7],[0,1,2,3,4,5,6,7],[0,1,2,3,4,5,6,7],[0,1,2,3,4,5,6,7],[0,1,2,3,4,5,6,7],[0,1,2,3,4,5,6,7],[0,1,2,3,4,5,6,7]] [['0','1','2','3','4','5','6','7'],['0','1','2','3','4','5','6','7'],['0','1','2','3','4','5','6','7'],['0','1','2','3','4','5','6','7'],['0','1','2','3','4','5','6','7'],['0','1','2','3','4','5','6','7'],['0','1','2','3','4','5','6','7'],['0','1','2','3','4','5','6','7']] +9 [0,1,2,3,4,5,6,7,8] [9,9,9,9,9,9,9,9,9] ['9','9','9','9','9','9','9','9','9'] [[0,1,2,3,4,5,6,7,8],[0,1,2,3,4,5,6,7,8],[0,1,2,3,4,5,6,7,8],[0,1,2,3,4,5,6,7,8],[0,1,2,3,4,5,6,7,8],[0,1,2,3,4,5,6,7,8],[0,1,2,3,4,5,6,7,8],[0,1,2,3,4,5,6,7,8],[0,1,2,3,4,5,6,7,8]] [['0','1','2','3','4','5','6','7','8'],['0','1','2','3','4','5','6','7','8'],['0','1','2','3','4','5','6','7','8'],['0','1','2','3','4','5','6','7','8'],['0','1','2','3','4','5','6','7','8'],['0','1','2','3','4','5','6','7','8'],['0','1','2','3','4','5','6','7','8'],['0','1','2','3','4','5','6','7','8'],['0','1','2','3','4','5','6','7','8']] diff --git a/dbms/tests/queries/0_stateless/00178_function_replicate.sql b/dbms/tests/queries/0_stateless/00178_function_replicate.sql new file mode 100644 index 00000000000..13ce1c24364 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00178_function_replicate.sql @@ -0,0 +1,9 @@ +SELECT + number, + range(number) AS arr, + replicate(number, arr), + replicate(toString(number), arr), + replicate(range(number), arr), + replicate(arrayMap(x -> toString(x), range(number)), arr) +FROM system.numbers +LIMIT 10; diff --git a/dbms/tests/queries/0_stateless/00179_lambdas_with_common_expressions_and_filter.reference b/dbms/tests/queries/0_stateless/00179_lambdas_with_common_expressions_and_filter.reference new file mode 100644 index 00000000000..eb4ff6138fd --- /dev/null +++ b/dbms/tests/queries/0_stateless/00179_lambdas_with_common_expressions_and_filter.reference @@ -0,0 +1,5 @@ +[0] +[0,1,2] +[0,1,2,3,4] +[0,1,2,3,4,5,6] +[0,1,2,3,4,5,6,7,8] diff --git a/dbms/tests/queries/0_stateless/00179_lambdas_with_common_expressions_and_filter.sql b/dbms/tests/queries/0_stateless/00179_lambdas_with_common_expressions_and_filter.sql new file mode 100644 index 00000000000..b5eefa57a9f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00179_lambdas_with_common_expressions_and_filter.sql @@ -0,0 +1,3 @@ +SELECT arrayMap(x -> number != -1 ? x : 0, arr) +FROM (SELECT number, range(number) AS arr FROM system.numbers LIMIT 10) +WHERE number % 2 = 1 AND arrayExists(x -> number != -1, arr); From 6f3d8f0efc465e734918f974ba9f1ce49f226436 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Jun 2015 09:06:04 +0300 Subject: [PATCH 23/67] dbms: added simple priorities system [#METR-16911]. --- dbms/include/DB/DataStreams/BlockIO.h | 7 +- .../DataStreams/IProfilingBlockInputStream.h | 6 +- dbms/include/DB/Interpreters/ProcessList.h | 133 ++++++------------ .../include/DB/Interpreters/QueryPriorities.h | 117 +++++++++++++++ dbms/include/DB/Interpreters/Settings.h | 3 + dbms/src/DataStreams/BlockIO.cpp | 9 ++ .../IProfilingBlockInputStream.cpp | 3 +- dbms/src/Interpreters/ProcessList.cpp | 72 ++++++++++ dbms/src/Interpreters/executeQuery.cpp | 10 +- dbms/src/Server/Server.cpp | 1 + dbms/src/Storages/StorageSystemProcesses.cpp | 1 + 11 files changed, 266 insertions(+), 96 deletions(-) create mode 100644 dbms/include/DB/Interpreters/QueryPriorities.h create mode 100644 dbms/src/DataStreams/BlockIO.cpp create mode 100644 dbms/src/Interpreters/ProcessList.cpp diff --git a/dbms/include/DB/DataStreams/BlockIO.h b/dbms/include/DB/DataStreams/BlockIO.h index 6c3c83a63bd..b0e69bbb27d 100644 --- a/dbms/include/DB/DataStreams/BlockIO.h +++ b/dbms/include/DB/DataStreams/BlockIO.h @@ -2,12 +2,13 @@ #include #include -#include namespace DB { +class ProcessListEntry; + struct BlockIO { /** process_list_entry должен уничтожаться позже, чем in и out, @@ -15,7 +16,7 @@ struct BlockIO * (MemoryTracker * current_memory_tracker), * которая может использоваться до уничтожения in и out. */ - ProcessList::EntryPtr process_list_entry; + std::shared_ptr process_list_entry; BlockInputStreamPtr in; BlockOutputStreamPtr out; @@ -38,6 +39,8 @@ struct BlockIO return *this; } + + ~BlockIO(); }; } diff --git a/dbms/include/DB/DataStreams/IProfilingBlockInputStream.h b/dbms/include/DB/DataStreams/IProfilingBlockInputStream.h index 0b8fbe16a62..95f16118c3c 100644 --- a/dbms/include/DB/DataStreams/IProfilingBlockInputStream.h +++ b/dbms/include/DB/DataStreams/IProfilingBlockInputStream.h @@ -3,7 +3,6 @@ #include #include -#include #include #include @@ -14,6 +13,7 @@ namespace DB { class QuotaForIntervals; +class ProcessListElement; /** Смотрит за тем, как работает источник блоков. @@ -82,7 +82,7 @@ public: * На основе этой информации будет проверяться квота, и некоторые ограничения. * Также эта информация будет доступна в запросе SHOW PROCESSLIST. */ - void setProcessListElement(ProcessList::Element * elem); + void setProcessListElement(ProcessListElement * elem); /** Установить информацию о приблизительном общем количестве строк, которых нужно прочитать. */ @@ -154,7 +154,7 @@ protected: BlockStreamProfileInfo info; std::atomic is_cancelled{false}; ProgressCallback progress_callback; - ProcessList::Element * process_list_elem = nullptr; + ProcessListElement * process_list_elem = nullptr; bool enabled_extremes = false; diff --git a/dbms/include/DB/Interpreters/ProcessList.h b/dbms/include/DB/Interpreters/ProcessList.h index 40aba52e069..7bdbb4e3e95 100644 --- a/dbms/include/DB/Interpreters/ProcessList.h +++ b/dbms/include/DB/Interpreters/ProcessList.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB @@ -36,13 +37,16 @@ struct ProcessListElement MemoryTracker memory_tracker; + QueryPriorities::Handle priority_handle; + bool is_cancelled = false; ProcessListElement(const String & query_, const String & user_, const String & query_id_, const Poco::Net::IPAddress & ip_address_, - size_t max_memory_usage) - : query(query_), user(user_), query_id(query_id_), ip_address(ip_address_), memory_tracker(max_memory_usage) + size_t max_memory_usage, QueryPriorities::Handle && priority_handle_) + : query(query_), user(user_), query_id(query_id_), ip_address(ip_address_), memory_tracker(max_memory_usage), + priority_handle(std::move(priority_handle_)) { current_memory_tracker = &memory_tracker; } @@ -55,126 +59,81 @@ struct ProcessListElement bool update(const Progress & value) { progress.incrementPiecewiseAtomically(value); + + if (priority_handle) + priority_handle->waitIfNeed(std::chrono::seconds(1)); /// NOTE Можно сделать настраиваемым таймаут. + return !is_cancelled; } }; +class ProcessList; + + +/// Держит итератор на список, и удаляет элемент из списка в деструкторе. +class ProcessListEntry +{ +private: + using Container = std::list; + + ProcessList & parent; + Container::iterator it; +public: + ProcessListEntry(ProcessList & parent_, Container::iterator it_) + : parent(parent_), it(it_) {} + + ~ProcessListEntry(); + + ProcessListElement * operator->() { return &*it; } + const ProcessListElement * operator->() const { return &*it; } + + ProcessListElement & get() { return *it; } + const ProcessListElement & get() const { return *it; } +}; + + class ProcessList { - friend class Entry; + friend class ProcessListEntry; public: using Element = ProcessListElement; + using Entry = ProcessListEntry; /// list, чтобы итераторы не инвалидировались. NOTE: можно заменить на cyclic buffer, но почти незачем. - typedef std::list Containter; + using Container = std::list; /// Query_id -> Element * - typedef std::unordered_map QueryToElement; + using QueryToElement = std::unordered_map; /// User -> Query_id -> Element * - typedef std::unordered_map UserToQueries; + using UserToQueries = std::unordered_map; private: mutable Poco::FastMutex mutex; mutable Poco::Condition have_space; /// Количество одновременно выполняющихся запросов стало меньше максимального. - Containter cont; + Container cont; size_t cur_size; /// В C++03 std::list::size не O(1). size_t max_size; /// Если 0 - не ограничено. Иначе, если пытаемся добавить больше - кидается исключение. UserToQueries user_to_queries; - - /// Держит итератор на список, и удаляет элемент из списка в деструкторе. - class Entry - { - private: - ProcessList & parent; - Containter::iterator it; - public: - Entry(ProcessList & parent_, Containter::iterator it_) - : parent(parent_), it(it_) {} - - ~Entry() - { - Poco::ScopedLock lock(parent.mutex); - - /// В случае, если запрос отменяется, данные о нем удаляются из мапа в момент отмены. - if (!it->is_cancelled && !it->query_id.empty()) - { - UserToQueries::iterator queries = parent.user_to_queries.find(it->user); - if (queries != parent.user_to_queries.end()) - { - QueryToElement::iterator element = queries->second.find(it->query_id); - if (element != queries->second.end()) - queries->second.erase(element); - } - } - - parent.cont.erase(it); - --parent.cur_size; - parent.have_space.signal(); - } - - Element * operator->() { return &*it; } - const Element * operator->() const { return &*it; } - - Element & get() { return *it; } - const Element & get() const { return *it; } - }; + QueryPriorities priorities; public: ProcessList(size_t max_size_ = 0) : cur_size(0), max_size(max_size_) {} - typedef Poco::SharedPtr EntryPtr; + typedef std::shared_ptr EntryPtr; /** Зарегистрировать выполняющийся запрос. Возвращает refcounted объект, который удаляет запрос из списка при уничтожении. * Если выполняющихся запросов сейчас слишком много - ждать не более указанного времени. * Если времени не хватило - кинуть исключение. */ EntryPtr insert(const String & query_, const String & user_, const String & query_id_, const Poco::Net::IPAddress & ip_address_, - size_t max_memory_usage = 0, size_t max_wait_milliseconds = DEFAULT_QUERIES_QUEUE_WAIT_TIME_MS, bool replace_running_query = false) - { - EntryPtr res; - - { - Poco::ScopedLock lock(mutex); - - if (max_size && cur_size >= max_size && (!max_wait_milliseconds || !have_space.tryWait(mutex, max_wait_milliseconds))) - throw Exception("Too much simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MUCH_SIMULTANEOUS_QUERIES); - - if (!query_id_.empty()) - { - UserToQueries::iterator queries = user_to_queries.find(user_); - - if (queries != user_to_queries.end()) - { - QueryToElement::iterator element = queries->second.find(query_id_); - if (element != queries->second.end()) - { - if (!replace_running_query) - throw Exception("Query with id = " + query_id_ + " is already running.", - ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING); - element->second->is_cancelled = true; - /// В случае если запрос отменяется, данные о нем удаляются из мапа в момент отмены. - queries->second.erase(element); - } - } - } - - ++cur_size; - - res = new Entry(*this, cont.emplace(cont.end(), query_, user_, query_id_, ip_address_, max_memory_usage)); - - if (!query_id_.empty()) - user_to_queries[user_][query_id_] = &res->get(); - } - - return res; - } + size_t max_memory_usage, size_t max_wait_milliseconds, bool replace_running_query, QueryPriorities::Priority priority); /// Количество одновременно выполняющихся запросов. size_t size() const { return cur_size; } /// Получить текущее состояние (копию) списка запросов. - Containter get() const + Container get() const { Poco::ScopedLock lock(mutex); return cont; diff --git a/dbms/include/DB/Interpreters/QueryPriorities.h b/dbms/include/DB/Interpreters/QueryPriorities.h new file mode 100644 index 00000000000..5239fe69631 --- /dev/null +++ b/dbms/include/DB/Interpreters/QueryPriorities.h @@ -0,0 +1,117 @@ +#pragma once + +#include +#include +#include +#include +#include + + +/** Реализует приоритеты запросов. + * Позволяет приостанавливать выполнение запроса, если выполняется хотя бы один более приоритетный запрос. + * + * Величина приоритета - целое число, чем меньше - тем больше приоритет. + * + * Приоритет 0 считается особенным - запросы с таким приоритетом выполняются всегда, + * не зависят от других запросов и не влияют на другие запросы. + * То есть 0 означает - не использовать приоритеты. + * + * NOTE Возможности сделать лучше: + * - реализовать ограничение на максимальное количество запросов с таким приоритетом. + */ +class QueryPriorities +{ +public: + using Priority = int; + +private: + friend struct Handle; + + using Count = int; + + /// Количество выполняющихся сейчас запросов с заданным приоритетом. + using Container = std::map; + + std::mutex mutex; + std::condition_variable condvar; + Container container; + + + /** Если есть более приоритетные запросы - спать, пока они не перестанут быть или не истечёт таймаут. + * Возвращает true, если более приоритетные запросы исчезли на момент возврата из функции, false, если истёк таймаут. + */ + template + bool waitIfNeed(Priority priority, Duration timeout) + { + if (0 == priority) + return true; + + std::unique_lock lock(mutex); + + while (true) + { + /// Если ли хотя бы один более приоритетный запрос? + bool found = false; + for (const auto & value : container) + { + if (value.first >= priority) + break; + + if (value.second > 0) + { + found = true; + break; + } + } + + if (!found) + return true; + + if (std::cv_status::timeout == condvar.wait_for(lock, timeout)) + return false; + } + } + +public: + struct HandleImpl + { + private: + QueryPriorities & parent; + QueryPriorities::Container::value_type & value; + + public: + HandleImpl(QueryPriorities & parent_, QueryPriorities::Container::value_type & value_) + : parent(parent_), value(value_) {} + + ~HandleImpl() + { + { + std::lock_guard lock(parent.mutex); + --value.second; + } + parent.condvar.notify_all(); + } + + template + bool waitIfNeed(Duration timeout) + { + return parent.waitIfNeed(value.first, timeout); + } + }; + + using Handle = std::shared_ptr; + + /** Зарегистрировать, что запрос с заданным приоритетом выполняется. + * Возвращается объект, в деструкторе которого, запись о запросе удаляется. + */ + Handle insert(Priority priority) + { + if (0 == priority) + return {}; + + std::lock_guard lock(mutex); + auto it = container.emplace(priority, 0).first; + ++it->second; + return std::make_shared(*this, *it); + } +}; diff --git a/dbms/include/DB/Interpreters/Settings.h b/dbms/include/DB/Interpreters/Settings.h index e178e784936..8d799249f71 100644 --- a/dbms/include/DB/Interpreters/Settings.h +++ b/dbms/include/DB/Interpreters/Settings.h @@ -132,6 +132,9 @@ struct Settings \ /** Позволяет выбирать метод сжатия данных при записи */\ M(SettingCompressionMethod, network_compression_method, CompressionMethod::LZ4) \ + \ + /** Приоритет запроса. 1 - самый высокий, больше - ниже; 0 - не использовать приоритеты. */ \ + M(SettingUInt64, priority, 0) \ /// Всевозможные ограничения на выполнение запроса. Limits limits; diff --git a/dbms/src/DataStreams/BlockIO.cpp b/dbms/src/DataStreams/BlockIO.cpp new file mode 100644 index 00000000000..83b0fac54e2 --- /dev/null +++ b/dbms/src/DataStreams/BlockIO.cpp @@ -0,0 +1,9 @@ +#include +#include + +namespace DB +{ + +BlockIO::~BlockIO() = default; + +} diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp index e3a14875e2b..93216ea73f4 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp @@ -5,6 +5,7 @@ #include #include +#include #include @@ -320,7 +321,7 @@ void IProfilingBlockInputStream::setProgressCallback(ProgressCallback callback) } -void IProfilingBlockInputStream::setProcessListElement(ProcessList::Element * elem) +void IProfilingBlockInputStream::setProcessListElement(ProcessListElement * elem) { process_list_elem = elem; diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp new file mode 100644 index 00000000000..e15bef4002f --- /dev/null +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -0,0 +1,72 @@ +#include + +namespace DB +{ + + +ProcessList::EntryPtr ProcessList::insert( + const String & query_, const String & user_, const String & query_id_, const Poco::Net::IPAddress & ip_address_, + size_t max_memory_usage, size_t max_wait_milliseconds, bool replace_running_query, QueryPriorities::Priority priority) +{ + EntryPtr res; + + { + Poco::ScopedLock lock(mutex); + + if (max_size && cur_size >= max_size && (!max_wait_milliseconds || !have_space.tryWait(mutex, max_wait_milliseconds))) + throw Exception("Too much simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MUCH_SIMULTANEOUS_QUERIES); + + if (!query_id_.empty()) + { + UserToQueries::iterator queries = user_to_queries.find(user_); + + if (queries != user_to_queries.end()) + { + QueryToElement::iterator element = queries->second.find(query_id_); + if (element != queries->second.end()) + { + if (!replace_running_query) + throw Exception("Query with id = " + query_id_ + " is already running.", + ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING); + element->second->is_cancelled = true; + /// В случае если запрос отменяется, данные о нем удаляются из мапа в момент отмены. + queries->second.erase(element); + } + } + } + + ++cur_size; + + res.reset(new Entry(*this, cont.emplace(cont.end(), + query_, user_, query_id_, ip_address_, max_memory_usage, priorities.insert(priority)))); + + if (!query_id_.empty()) + user_to_queries[user_][query_id_] = &res->get(); + } + + return res; +} + + +ProcessListEntry::~ProcessListEntry() +{ + Poco::ScopedLock lock(parent.mutex); + + /// В случае, если запрос отменяется, данные о нем удаляются из мапа в момент отмены. + if (!it->is_cancelled && !it->query_id.empty()) + { + ProcessList::UserToQueries::iterator queries = parent.user_to_queries.find(it->user); + if (queries != parent.user_to_queries.end()) + { + ProcessList::QueryToElement::iterator element = queries->second.find(it->query_id); + if (element != queries->second.end()) + queries->second.erase(element); + } + } + + parent.cont.erase(it); + --parent.cur_size; + parent.have_space.signal(); +} + +} diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index b8d79f5751e..91fdca2b2ad 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -14,6 +14,7 @@ #include #include +#include #include @@ -88,11 +89,14 @@ static std::tuple executeQueryImpl( ProcessList::EntryPtr process_list_entry; if (!internal && nullptr == typeid_cast(&*ast)) { + const Settings & settings = context.getSettingsRef(); + process_list_entry = context.getProcessList().insert( query, context.getUser(), context.getCurrentQueryId(), context.getIPAddress(), - context.getSettingsRef().limits.max_memory_usage, - context.getSettingsRef().queue_max_wait_ms.totalMilliseconds(), - context.getSettingsRef().replace_running_query); + settings.limits.max_memory_usage, + settings.queue_max_wait_ms.totalMilliseconds(), + settings.replace_running_query, + settings.priority); context.setProcessListElement(&process_list_entry->get()); } diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 09124ad946f..2188ae8a15c 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Storages/StorageSystemProcesses.cpp b/dbms/src/Storages/StorageSystemProcesses.cpp index 4baa62436a2..ed083c6d9da 100644 --- a/dbms/src/Storages/StorageSystemProcesses.cpp +++ b/dbms/src/Storages/StorageSystemProcesses.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include From 978aba0f4caa8d985aac0994d9bfb27034a2f830 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 11 Jun 2015 16:04:45 +0300 Subject: [PATCH 24/67] Merge --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 88e9dbd49c5..5bb155a3b06 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -340,8 +340,10 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongThreads( const Names & virt_columns, const Settings & settings) { - size_t min_marks_for_concurrent_read = (settings.merge_tree_min_rows_for_concurrent_read + data.index_granularity - 1) / data.index_granularity; - size_t max_marks_to_use_cache = (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; + const size_t min_marks_for_concurrent_read = + (settings.merge_tree_min_rows_for_concurrent_read + data.index_granularity - 1) / data.index_granularity; + const size_t max_marks_to_use_cache = + (settings.merge_tree_max_rows_to_use_cache + data.index_granularity - 1) / data.index_granularity; /// На всякий случай перемешаем куски. std::random_shuffle(parts.begin(), parts.end()); @@ -354,12 +356,9 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongThreads( /// Пусть отрезки будут перечислены справа налево, чтобы можно было выбрасывать самый левый отрезок с помощью pop_back(). std::reverse(parts[i].ranges.begin(), parts[i].ranges.end()); - sum_marks_in_parts[i] = 0; - for (size_t j = 0; j < parts[i].ranges.size(); ++j) - { - MarkRange & range = parts[i].ranges[j]; + for (const auto & range : parts[i].ranges) sum_marks_in_parts[i] += range.end - range.begin; - } + sum_marks += sum_marks_in_parts[i]; } @@ -370,7 +369,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongThreads( if (sum_marks > 0) { - size_t min_marks_per_thread = (sum_marks - 1) / threads + 1; + const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1; for (size_t i = 0; i < threads && !parts.empty(); ++i) { @@ -415,10 +414,11 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongThreads( throw Exception("Unexpected end of ranges while spreading marks among threads", ErrorCodes::LOGICAL_ERROR); MarkRange & range = part.ranges.back(); - size_t marks_in_range = range.end - range.begin; - size_t marks_to_get_from_range = std::min(marks_in_range, need_marks); - ranges_to_get_from_part.push_back(MarkRange(range.begin, range.begin + marks_to_get_from_range)); + const size_t marks_in_range = range.end - range.begin; + const size_t marks_to_get_from_range = std::min(marks_in_range, need_marks); + + ranges_to_get_from_part.emplace_back(range.begin, range.begin + marks_to_get_from_range); range.begin += marks_to_get_from_range; marks_in_part -= marks_to_get_from_range; need_marks -= marks_to_get_from_range; From be68461fc6f53a5eb896c37a8aed4b33b496fd00 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 22 Jun 2015 18:45:30 +0300 Subject: [PATCH 25/67] dbms: correctly determine mark to end of previous block [#MTRSADMIN-1093] --- .../MergeTree/MergeTreePartChecker.cpp | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp b/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp index c8e8a84f3fe..3bd49d17231 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB @@ -115,6 +116,12 @@ struct Stream readIntBinary(mrk_mark.offset_in_compressed_file, mrk_hashing_buf); readIntBinary(mrk_mark.offset_in_decompressed_block, mrk_hashing_buf); + /// На всякий случай, сохраним смещение в файле и размер предыдущего блока. + SCOPE_EXIT( + prev_offset_in_compressed_file = mrk_mark.offset_in_compressed_file; + prev_buffer_size = uncompressed_hashing_buf.buffer().size(); + ); + bool has_alternative_mark = false; MarkInCompressedFile alternative_data_mark; MarkInCompressedFile data_mark; @@ -138,6 +145,18 @@ struct Stream if (uncompressed_hashing_buf.eof()) return; } + else if (uncompressed_hashing_buf.offset() == 0) + { + /// Восстановим засечку на конец предыдущего блока по сохраненным данным + has_alternative_mark = true; + alternative_data_mark.offset_in_compressed_file = prev_offset_in_compressed_file; + alternative_data_mark.offset_in_decompressed_block = prev_buffer_size; + + if (mrk_mark == alternative_data_mark) + return; + } + + std::cout << "mrk_mark " << mrk_mark.offset_in_compressed_file << ' ' << mrk_mark.offset_in_decompressed_block << std::endl; data_mark.offset_in_compressed_file = compressed_hashing_buf.count() - uncompressing_buf.getSizeCompressed(); data_mark.offset_in_decompressed_block = uncompressed_hashing_buf.offset(); @@ -161,6 +180,10 @@ struct Stream checksums.files[name + ".mrk"] = MergeTreeData::DataPart::Checksums::Checksum( mrk_hashing_buf.count(), mrk_hashing_buf.getHash()); } + +private: + size_t prev_offset_in_compressed_file{}; + size_t prev_buffer_size{}; }; /// Возвращает количество строк. Добавляет в checksums чексуммы всех файлов столбца. From 2ba22a4a8a430f299bcd40565c51516ffcce9d39 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 22 Jun 2015 21:02:45 +0300 Subject: [PATCH 26/67] dbms: ActiveDataPartSet::level no longer participates in ::contains and ::operator<. --- dbms/include/DB/Storages/MergeTree/ActiveDataPartSet.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dbms/include/DB/Storages/MergeTree/ActiveDataPartSet.h b/dbms/include/DB/Storages/MergeTree/ActiveDataPartSet.h index ec4509298b7..3cfdc8d8942 100644 --- a/dbms/include/DB/Storages/MergeTree/ActiveDataPartSet.h +++ b/dbms/include/DB/Storages/MergeTree/ActiveDataPartSet.h @@ -42,9 +42,6 @@ public: if (right != rhs.right) return right < rhs.right; - if (level != rhs.level) - return level < rhs.level; - return false; } @@ -53,7 +50,6 @@ public: { return left_month == rhs.left_month /// Куски за разные месяцы не объединяются && right_month == rhs.right_month - && level > rhs.level && left_date <= rhs.left_date && right_date >= rhs.right_date && left <= rhs.left From f012d95b7a07cab1714e646ff7a9de4e5b65e239 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 22 Jun 2015 21:23:33 +0300 Subject: [PATCH 27/67] dbms: implement alter table detach unreplicated partition [#MTRSADMIN-1093] --- .../DB/Storages/StorageReplicatedMergeTree.h | 2 +- dbms/src/Parsers/ParserAlterQuery.cpp | 6 ++++++ .../src/Storages/StorageReplicatedMergeTree.cpp | 17 +++++++---------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/dbms/include/DB/Storages/StorageReplicatedMergeTree.h b/dbms/include/DB/Storages/StorageReplicatedMergeTree.h index 43b59d70627..57a55b9566c 100644 --- a/dbms/include/DB/Storages/StorageReplicatedMergeTree.h +++ b/dbms/include/DB/Storages/StorageReplicatedMergeTree.h @@ -134,7 +134,7 @@ public: void getStatus(Status & res, bool with_zk_fields = true); private: - void dropUnreplicatedPartition(const Field & partition, const Settings & settings); + void dropUnreplicatedPartition(const Field & partition, bool detach, const Settings & settings); friend class ReplicatedMergeTreeBlockOutputStream; friend class ReplicatedMergeTreeRestartingThread; diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index f3dd0c02d9b..cf933805184 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -150,6 +150,12 @@ bool ParserAlterQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pa { ws.ignore(pos, end); + if (s_unreplicated.ignore(pos, end, max_parsed_pos, expected)) + { + params.unreplicated = true; + ws.ignore(pos, end); + } + if (!s_partition.ignore(pos, end, max_parsed_pos, expected)) return false; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 1cd57a70216..b9fb09b4e42 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2224,7 +2224,7 @@ static String getFakePartNameForDrop(const String & month_name, UInt64 left, UIn } -void StorageReplicatedMergeTree::dropUnreplicatedPartition(const Field & partition, const Settings & settings) +void StorageReplicatedMergeTree::dropUnreplicatedPartition(const Field & partition, const bool detach, const Settings & settings) { if (!unreplicated_data) return; @@ -2247,10 +2247,13 @@ void StorageReplicatedMergeTree::dropUnreplicatedPartition(const Field & partiti LOG_DEBUG(log, "Removing unreplicated part " << part->name); ++removed_parts; - unreplicated_data->replaceParts({part}, {}, false); + if (detach) + unreplicated_data->renameAndDetachPart(part, ""); + else + unreplicated_data->replaceParts({part}, {}, false); } - LOG_INFO(log, "Removed " << removed_parts << " unreplicated parts inside " << apply_visitor(FieldVisitorToString(), partition) << "."); + LOG_INFO(log, (detach ? "Detached " : "Removed ") << removed_parts << " unreplicated parts inside " << apply_visitor(FieldVisitorToString(), partition) << "."); } @@ -2258,13 +2261,7 @@ void StorageReplicatedMergeTree::dropPartition(const Field & field, bool detach, { if (unreplicated) { - if (detach) - throw Exception{ - "DETACH UNREPLICATED PATITION not supported", - ErrorCodes::LOGICAL_ERROR - }; - - dropUnreplicatedPartition(field, settings); + dropUnreplicatedPartition(field, detach, settings); return; } From 8ab76848d7c72c53b4d5c5ccd0bfc7566724a275 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 23 Jun 2015 00:26:03 +0300 Subject: [PATCH 28/67] dbms: QueryLog: development [#METR-16946]. --- dbms/include/DB/Interpreters/QueryLog.h | 245 ++++++++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 dbms/include/DB/Interpreters/QueryLog.h diff --git a/dbms/include/DB/Interpreters/QueryLog.h b/dbms/include/DB/Interpreters/QueryLog.h new file mode 100644 index 00000000000..50f21c31b34 --- /dev/null +++ b/dbms/include/DB/Interpreters/QueryLog.h @@ -0,0 +1,245 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + + +/** Позволяет логгировать информацию о выполнении запросов: + * - о начале выполнения запроса; + * - метрики производительности, после выполнения запроса; + * - об ошибках при выполнении запроса. + * + * Логгирование производится асинхронно. Данные передаются в очередь, откуда их читает отдельный поток. + * Этот поток записывает лог в предназначенную для этого таблицу не чаще, чем с заданной периодичностью. + */ + +/** Что логгировать. + * Структура может меняться при изменении версии сервера. + * Если при первой записи обнаруживается, что имеющаяся таблица с логами имеет неподходящую стрктуру, + * то эта таблица переименовывается (откладывается в сторону) и создаётся новая таблица. + */ +struct QueryLogElement +{ + enum Type + { + SHUTDOWN = 0, /// Эта запись имеет служебное значение. + QUERY_START = 1, + QUERY_FINISH = 2, + }; + + enum Interface + { + TCP = 1, + HTTP = 2, + OLAP_HTTP = 3, + }; + + enum HTTPMethod + { + UNKNOWN = 0, + GET = 1, + POST = 2, + }; + + Type type; + + /// В зависимости от типа, не все поля могут быть заполнены. + + time_t event_time; + time_t query_start_time; + UInt64 query_duration_ms; + + UInt64 read_rows; + UInt64 read_bytes; + + UInt64 result_rows; + UInt64 result_bytes; + + String query; + + Interface interface; + HTTPMethod http_method; + Poco::Net::IPAddress ip_address; + String user; + String query_id; +}; + + +#define DBMS_QUERY_LOG_QUEUE_SIZE 1024 + + +class QueryLog : private boost::noncopyable +{ +public: + + /** Передаётся имя таблицы, в которую писать лог. + * Если таблица не существует, то она создаётся с движком MergeTree, с ключём по event_time. + * Если таблица существует, то проверяется, подходящая ли у неё структура. + * Если структура подходящая, то будет использоваться эта таблица. + * Если нет - то существующая таблица переименовывается в такую же, но с добавлением суффикса _N на конце, + * где N - минимальное число, начиная с 1 такое, что таблицы с таким именем ещё нет; + * и создаётся новая таблица, как будто существующей таблицы не было. + */ + QueryLog(Context & context_, const String & database_name_, const String & table_name_, size_t flush_interval_milliseconds_) + : context(context_), database_name(database_name_), table_name(table_name_), flush_interval_milliseconds(flush_interval_milliseconds_) + { + data.reserve(DBMS_QUERY_LOG_QUEUE_SIZE); + + // TODO + + saving_thread = std::thread([this] { threadFunction(); }); + } + + ~QueryLog() + { + /// Говорим потоку, что надо завершиться. + QueryLogElement elem; + elem.type = QueryLogElement::SHUTDOWN; + queue.push(elem); + + saving_thread.join(); + } + + /** Добавить запись в лог. + * Сохранение в таблицу делается асинхронно, и в случае сбоя, запись может никуда не попасть. + */ + void add(const QueryLogElement & element) + { + /// Здесь может быть блокировка. Возможно, в случае переполнения очереди, лучше сразу кидать эксепшен. Или даже отказаться от логгирования запроса. + queue.push(element); + } + +private: + Context & context; + const String database_name; + const String table_name; + StoragePtr table; + const size_t flush_interval_milliseconds; + + /// Очередь всё-таки ограничена. Но размер достаточно большой, чтобы не блокироваться во всех нормальных ситуациях. + ConcurrentBoundedQueue queue {DBMS_QUERY_LOG_QUEUE_SIZE}; + + /** Данные, которые были вынуты из очереди. Здесь данные накапливаются, пока не пройдёт достаточное количество времени. + * Можно было бы использовать двойную буферизацию, но предполагается, + * что запись в таблицу с логом будет быстрее, чем обработка большой пачки запросов. + */ + std::vector data; + + /** В этом потоке данные вынимаются из queue, складываются в data, а затем вставляются в таблицу. + */ + std::thread saving_thread; + + + void threadFunction() + { + Stopwatch time_after_last_write; + bool first = true; + + while (true) + { + try + { + if (first) + { + time_after_last_write.restart(); + first = false; + } + + QueryLogElement element; + bool has_element = false; + + if (data.empty()) + { + element = queue.pop(); + has_element = true; + } + else + { + size_t milliseconds_elapsed = time_after_last_write.elapsed() / 1000000; + if (milliseconds_elapsed < flush_interval_milliseconds) + has_element = queue.tryPop(element, flush_interval_milliseconds - milliseconds_elapsed); + } + + if (has_element) + { + if (element.type = QueryLogElement::SHUTDOWN) + { + flush(); + break; + } + else + data.push_back(element); + } + + size_t milliseconds_elapsed = time_after_last_write.elapsed() / 1000000; + if (milliseconds_elapsed >= flush_interval_milliseconds) + { + /// Записываем данные в таблицу. + flush(); + time_after_last_write.restart(); + } + } + catch (...) + { + /// В случае ошибки теряем накопленные записи, чтобы не блокироваться. + data.clear(); + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + } + + Block createBlock() + { + return { + {new ColumnUInt8, new DataTypeUInt8, "type"}, + {new ColumnUInt32, new DataTypeDateTime, "event_time"}, + {new ColumnUInt32, new DataTypeDateTime, "query_start_time"}, + }; + + /* time_t event_time; + time_t query_start_time; + UInt64 query_duration_ms; + + UInt64 read_rows; + UInt64 read_bytes; + + UInt64 result_rows; + UInt64 result_bytes; + + String query; + + Interface interface; + HTTPMethod http_method; + Poco::Net::IPAddress ip_address; + String user; + String query_id;*/ + } + + void flush() + { + try + { + Block block = createBlock(); + + // TODO Формирование блока и запись. + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + data.clear(); + } +}; + + +} From 7f2fddeed21591c3356c016d24d7200c23b7dd25 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 23 Jun 2015 02:17:49 +0300 Subject: [PATCH 29/67] dbms: fixed error in materialized views after refactoring [#METR-16945]. --- dbms/src/Storages/StorageMaterializedView.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index b55e36b3d8f..b8f14a4e8e3 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -48,6 +48,7 @@ StorageMaterializedView::StorageMaterializedView( /// Если запрос ATTACH, то к этому моменту внутренняя таблица уже должна быть подключена. if (attach_) { + data = context.tryGetTable(database_name, inner_table_name); if (!data) throw Exception("Inner table is not attached yet." " Materialized view: " + database_name + "." + table_name + "." From f3bcb40a45f32d8eaa2e7e4fa9138edf068671a3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 23 Jun 2015 02:54:25 +0300 Subject: [PATCH 30/67] dbms: added test to prev. revision [#METR-16945]. --- .../00180_attach_materialized_view.reference | 0 .../0_stateless/00180_attach_materialized_view.sql | 12 ++++++++++++ 2 files changed, 12 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00180_attach_materialized_view.reference create mode 100644 dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql diff --git a/dbms/tests/queries/0_stateless/00180_attach_materialized_view.reference b/dbms/tests/queries/0_stateless/00180_attach_materialized_view.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql b/dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql new file mode 100644 index 00000000000..81b162ee4dd --- /dev/null +++ b/dbms/tests/queries/0_stateless/00180_attach_materialized_view.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS test.t; +DROP TABLE IF EXISTS test.mv; +DROP TABLE IF EXISTS test.`.inner.mv`; + +CREATE TABLE test.t (x UInt8) ENGINE = Null; +CREATE MATERIALIZED VIEW test.mv AS SELECT * FROM test.t; + +DETACH TABLE test.mv; +ATTACH MATERIALIZED VIEW test.mv AS SELECT * FROM test.t; + +DROP TABLE test.t; +DROP TABLE test.mv; From 3874bef1fcf95413e4f0674aea9d2c7b610c1f66 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 23 Jun 2015 19:35:04 +0300 Subject: [PATCH 31/67] dbms: Server: Added unit test. [#METR-16188] --- ...1_aggregate_functions_statistics.reference | 14 ++ .../00181_aggregate_functions_statistics.sql | 143 ++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00181_aggregate_functions_statistics.reference create mode 100644 dbms/tests/queries/0_stateless/00181_aggregate_functions_statistics.sql diff --git a/dbms/tests/queries/0_stateless/00181_aggregate_functions_statistics.reference b/dbms/tests/queries/0_stateless/00181_aggregate_functions_statistics.reference new file mode 100644 index 00000000000..ce706690f1a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00181_aggregate_functions_statistics.reference @@ -0,0 +1,14 @@ +inf +0 +inf +0 +0 +0 +0 +0 +inf +0 +0 +0 +inf +0 diff --git a/dbms/tests/queries/0_stateless/00181_aggregate_functions_statistics.sql b/dbms/tests/queries/0_stateless/00181_aggregate_functions_statistics.sql new file mode 100644 index 00000000000..344b02b2d98 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00181_aggregate_functions_statistics.sql @@ -0,0 +1,143 @@ +DROP TABLE IF EXISTS series; + +CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory; + +/* Тестовые данные */ + +INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3); + +/* varSamp */ + +SELECT varSamp(x_value) FROM (SELECT x_value FROM series LIMIT 0); +SELECT varSamp(x_value) FROM (SELECT x_value FROM series LIMIT 1); + +SELECT round(abs(res1 - res2), 6) FROM +( +SELECT + varSamp(x_value) AS res1, + (sum(x_value * x_value) - ((sum(x_value) * sum(x_value)) / count())) / (count() - 1) AS res2 +FROM series +); + +/* stddevSamp */ + +SELECT stddevSamp(x_value) FROM (SELECT x_value FROM series LIMIT 0); +SELECT stddevSamp(x_value) FROM (SELECT x_value FROM series LIMIT 1); + +SELECT round(abs(res1 - res2), 6) FROM +( +SELECT + stddevSamp(x_value) AS res1, + sqrt((sum(x_value * x_value) - ((sum(x_value) * sum(x_value)) / count())) / (count() - 1)) AS res2 +FROM series +); + +/* varPop */ + +SELECT varPop(x_value) FROM (SELECT x_value FROM series LIMIT 0); +SELECT varPop(x_value) FROM (SELECT x_value FROM series LIMIT 1); + +SELECT round(abs(res1 - res2), 6) FROM +( +SELECT + varPop(x_value) AS res1, + (sum(x_value * x_value) - ((sum(x_value) * sum(x_value)) / count())) / count() AS res2 +FROM series +); + +/* stddevPop */ + +SELECT stddevPop(x_value) FROM (SELECT x_value FROM series LIMIT 0); +SELECT stddevPop(x_value) FROM (SELECT x_value FROM series LIMIT 1); + +SELECT round(abs(res1 - res2), 6) FROM +( +SELECT + stddevPop(x_value) AS res1, + sqrt((sum(x_value * x_value) - ((sum(x_value) * sum(x_value)) / count())) / count()) AS res2 +FROM series +); + +/* covarSamp */ + +SELECT covarSamp(x_value, y_value) FROM (SELECT x_value, y_value FROM series LIMIT 0); +SELECT covarSamp(x_value, y_value) FROM (SELECT x_value, y_value FROM series LIMIT 1); + +SELECT round(abs(COVAR1 - COVAR2), 6) +FROM +( + SELECT + arrayJoin([1]) AS ID2, + covarSamp(x_value, y_value) AS COVAR1 + FROM series +) ANY INNER JOIN +( + SELECT + arrayJoin([1]) AS ID2, + sum(VAL) / (count() - 1) AS COVAR2 + FROM + ( + SELECT (X - AVG_X) * (Y - AVG_Y) AS VAL + FROM + ( + SELECT + toUInt32(arrayJoin([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])) AS ID, + avg(x_value) AS AVG_X, + avg(y_value) AS AVG_Y + FROM series + ) ANY INNER JOIN + ( + SELECT + i AS ID, + x_value AS X, + y_value AS Y + FROM series + ) USING ID + ) +) USING ID2; + +/* covarPop */ + +SELECT covarPop(x_value, y_value) FROM (SELECT x_value, y_value FROM series LIMIT 0); +SELECT covarPop(x_value, y_value) FROM (SELECT x_value, y_value FROM series LIMIT 1); + +SELECT round(abs(COVAR1 - COVAR2), 6) +FROM +( + SELECT + arrayJoin([1]) AS ID2, + covarPop(x_value, y_value) AS COVAR1 + FROM series +) ANY INNER JOIN +( + SELECT + arrayJoin([1]) AS ID2, + sum(VAL) / count() AS COVAR2 + FROM + ( + SELECT (X - AVG_X) * (Y - AVG_Y) AS VAL + FROM + ( + SELECT + toUInt32(arrayJoin([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])) AS ID, + avg(x_value) AS AVG_X, + avg(y_value) AS AVG_Y + FROM series + ) ANY INNER JOIN + ( + SELECT + i AS ID, + x_value AS X, + y_value AS Y + FROM series + ) USING ID + ) +) USING ID2; + +/* corr */ + +SELECT corr(x_value, y_value) FROM (SELECT x_value, y_value FROM series LIMIT 0); +SELECT corr(x_value, y_value) FROM (SELECT x_value, y_value FROM series LIMIT 1); + +SELECT round(abs(corr(x_value, y_value) - covarPop(x_value, y_value) / (stddevPop(x_value) * stddevPop(y_value))), 6) FROM series; + From 4ecc523ea5cd0eba844b6256b72d35203d9cf7fd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jun 2015 00:24:06 +0300 Subject: [PATCH 32/67] dbms: fixed error with using comparison of Date or DateTime with constant string in index [#METR-2944]. --- .../DB/Storages/MergeTree/PKCondition.h | 106 +------------- dbms/src/Storages/MergeTree/PKCondition.cpp | 130 ++++++++++++++++++ 2 files changed, 132 insertions(+), 104 deletions(-) diff --git a/dbms/include/DB/Storages/MergeTree/PKCondition.h b/dbms/include/DB/Storages/MergeTree/PKCondition.h index 4e33612fc9f..47d34413db3 100644 --- a/dbms/include/DB/Storages/MergeTree/PKCondition.h +++ b/dbms/include/DB/Storages/MergeTree/PKCondition.h @@ -15,115 +15,13 @@ namespace DB { -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wsign-compare" - -/** Более точное сравнение. - * Отличается от Field::operator< и Field::operator== тем, что сравнивает значения разных числовых типов между собой. - * Правила сравнения - такие же, что и в FunctionsComparison. - * В том числе, сравнение знаковых и беззнаковых оставляем UB. - */ -class FieldVisitorAccurateEquals : public StaticVisitor -{ -public: - bool operator() (const Null & l, const Null & r) const { return true; } - bool operator() (const Null & l, const UInt64 & r) const { return false; } - bool operator() (const Null & l, const Int64 & r) const { return false; } - bool operator() (const Null & l, const Float64 & r) const { return false; } - bool operator() (const Null & l, const String & r) const { return false; } - bool operator() (const Null & l, const Array & r) const { return false; } - - bool operator() (const UInt64 & l, const Null & r) const { return false; } - bool operator() (const UInt64 & l, const UInt64 & r) const { return l == r; } - bool operator() (const UInt64 & l, const Int64 & r) const { return l == r; } - bool operator() (const UInt64 & l, const Float64 & r) const { return l == r; } - bool operator() (const UInt64 & l, const String & r) const { return false; } - bool operator() (const UInt64 & l, const Array & r) const { return false; } - - bool operator() (const Int64 & l, const Null & r) const { return false; } - bool operator() (const Int64 & l, const UInt64 & r) const { return l == r; } - bool operator() (const Int64 & l, const Int64 & r) const { return l == r; } - bool operator() (const Int64 & l, const Float64 & r) const { return l == r; } - bool operator() (const Int64 & l, const String & r) const { return false; } - bool operator() (const Int64 & l, const Array & r) const { return false; } - - bool operator() (const Float64 & l, const Null & r) const { return false; } - bool operator() (const Float64 & l, const UInt64 & r) const { return l == r; } - bool operator() (const Float64 & l, const Int64 & r) const { return l == r; } - bool operator() (const Float64 & l, const Float64 & r) const { return l == r; } - bool operator() (const Float64 & l, const String & r) const { return false; } - bool operator() (const Float64 & l, const Array & r) const { return false; } - - bool operator() (const String & l, const Null & r) const { return false; } - bool operator() (const String & l, const UInt64 & r) const { return false; } - bool operator() (const String & l, const Int64 & r) const { return false; } - bool operator() (const String & l, const Float64 & r) const { return false; } - bool operator() (const String & l, const String & r) const { return l == r; } - bool operator() (const String & l, const Array & r) const { return false; } - - bool operator() (const Array & l, const Null & r) const { return false; } - bool operator() (const Array & l, const UInt64 & r) const { return false; } - bool operator() (const Array & l, const Int64 & r) const { return false; } - bool operator() (const Array & l, const Float64 & r) const { return false; } - bool operator() (const Array & l, const String & r) const { return false; } - bool operator() (const Array & l, const Array & r) const { return l == r; } -}; - -class FieldVisitorAccurateLess : public StaticVisitor -{ -public: - bool operator() (const Null & l, const Null & r) const { return false; } - bool operator() (const Null & l, const UInt64 & r) const { return true; } - bool operator() (const Null & l, const Int64 & r) const { return true; } - bool operator() (const Null & l, const Float64 & r) const { return true; } - bool operator() (const Null & l, const String & r) const { return true; } - bool operator() (const Null & l, const Array & r) const { return true; } - - bool operator() (const UInt64 & l, const Null & r) const { return false; } - bool operator() (const UInt64 & l, const UInt64 & r) const { return l < r; } - bool operator() (const UInt64 & l, const Int64 & r) const { return l < r; } - bool operator() (const UInt64 & l, const Float64 & r) const { return l < r; } - bool operator() (const UInt64 & l, const String & r) const { return true; } - bool operator() (const UInt64 & l, const Array & r) const { return true; } - - bool operator() (const Int64 & l, const Null & r) const { return false; } - bool operator() (const Int64 & l, const UInt64 & r) const { return l < r; } - bool operator() (const Int64 & l, const Int64 & r) const { return l < r; } - bool operator() (const Int64 & l, const Float64 & r) const { return l < r; } - bool operator() (const Int64 & l, const String & r) const { return true; } - bool operator() (const Int64 & l, const Array & r) const { return true; } - - bool operator() (const Float64 & l, const Null & r) const { return false; } - bool operator() (const Float64 & l, const UInt64 & r) const { return l < r; } - bool operator() (const Float64 & l, const Int64 & r) const { return l < r; } - bool operator() (const Float64 & l, const Float64 & r) const { return l < r; } - bool operator() (const Float64 & l, const String & r) const { return true; } - bool operator() (const Float64 & l, const Array & r) const { return true; } - - bool operator() (const String & l, const Null & r) const { return false; } - bool operator() (const String & l, const UInt64 & r) const { return false; } - bool operator() (const String & l, const Int64 & r) const { return false; } - bool operator() (const String & l, const Float64 & r) const { return false; } - bool operator() (const String & l, const String & r) const { return l < r; } - bool operator() (const String & l, const Array & r) const { return true; } - - bool operator() (const Array & l, const Null & r) const { return false; } - bool operator() (const Array & l, const UInt64 & r) const { return false; } - bool operator() (const Array & l, const Int64 & r) const { return false; } - bool operator() (const Array & l, const Float64 & r) const { return false; } - bool operator() (const Array & l, const String & r) const { return false; } - bool operator() (const Array & l, const Array & r) const { return l < r; } -}; - -#pragma GCC diagnostic pop - /** Диапазон с открытыми или закрытыми концами; возможно, неограниченный. */ struct Range { private: - static bool equals(const Field & lhs, const Field & rhs) { return apply_visitor(FieldVisitorAccurateEquals(), lhs, rhs); } - static bool less(const Field & lhs, const Field & rhs) { return apply_visitor(FieldVisitorAccurateLess(), lhs, rhs); } + static bool equals(const Field & lhs, const Field & rhs); + static bool less(const Field & lhs, const Field & rhs); public: Field left; /// левая граница, если есть diff --git a/dbms/src/Storages/MergeTree/PKCondition.cpp b/dbms/src/Storages/MergeTree/PKCondition.cpp index ab680230d44..445a47b3f81 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.cpp +++ b/dbms/src/Storages/MergeTree/PKCondition.cpp @@ -8,6 +8,136 @@ namespace DB { + +/// Преобразование строки с датой или датой-с-временем в UInt64, содержащим числовое значение даты или даты-с-временем. +UInt64 stringToDateOrDateTime(const String & s) +{ + ReadBufferFromString in(s); + + if (s.size() == strlen("YYYY-MM-DD")) + { + DayNum_t date{}; + readDateText(date, in); + return UInt64(date); + } + else + { + time_t date_time{}; + readDateTimeText(date_time, in); + if (!in.eof()) + throw Exception("String is too long for DateTime: " + s); + return UInt64(date_time); + } +} + + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-compare" + +/** Более точное сравнение. + * Отличается от Field::operator< и Field::operator== тем, что сравнивает значения разных числовых типов между собой. + * Правила сравнения - такие же, что и в FunctionsComparison. + * В том числе, сравнение знаковых и беззнаковых оставляем UB. + */ +class FieldVisitorAccurateEquals : public StaticVisitor +{ +public: + bool operator() (const Null & l, const Null & r) const { return true; } + bool operator() (const Null & l, const UInt64 & r) const { return false; } + bool operator() (const Null & l, const Int64 & r) const { return false; } + bool operator() (const Null & l, const Float64 & r) const { return false; } + bool operator() (const Null & l, const String & r) const { return false; } + bool operator() (const Null & l, const Array & r) const { return false; } + + bool operator() (const UInt64 & l, const Null & r) const { return false; } + bool operator() (const UInt64 & l, const UInt64 & r) const { return l == r; } + bool operator() (const UInt64 & l, const Int64 & r) const { return l == r; } + bool operator() (const UInt64 & l, const Float64 & r) const { return l == r; } + bool operator() (const UInt64 & l, const String & r) const { return l == stringToDateOrDateTime(r); } + bool operator() (const UInt64 & l, const Array & r) const { return false; } + + bool operator() (const Int64 & l, const Null & r) const { return false; } + bool operator() (const Int64 & l, const UInt64 & r) const { return l == r; } + bool operator() (const Int64 & l, const Int64 & r) const { return l == r; } + bool operator() (const Int64 & l, const Float64 & r) const { return l == r; } + bool operator() (const Int64 & l, const String & r) const { return false; } + bool operator() (const Int64 & l, const Array & r) const { return false; } + + bool operator() (const Float64 & l, const Null & r) const { return false; } + bool operator() (const Float64 & l, const UInt64 & r) const { return l == r; } + bool operator() (const Float64 & l, const Int64 & r) const { return l == r; } + bool operator() (const Float64 & l, const Float64 & r) const { return l == r; } + bool operator() (const Float64 & l, const String & r) const { return false; } + bool operator() (const Float64 & l, const Array & r) const { return false; } + + bool operator() (const String & l, const Null & r) const { return false; } + bool operator() (const String & l, const UInt64 & r) const { return stringToDateOrDateTime(l) == r; } + bool operator() (const String & l, const Int64 & r) const { return false; } + bool operator() (const String & l, const Float64 & r) const { return false; } + bool operator() (const String & l, const String & r) const { return l == r; } + bool operator() (const String & l, const Array & r) const { return false; } + + bool operator() (const Array & l, const Null & r) const { return false; } + bool operator() (const Array & l, const UInt64 & r) const { return false; } + bool operator() (const Array & l, const Int64 & r) const { return false; } + bool operator() (const Array & l, const Float64 & r) const { return false; } + bool operator() (const Array & l, const String & r) const { return false; } + bool operator() (const Array & l, const Array & r) const { return l == r; } +}; + +class FieldVisitorAccurateLess : public StaticVisitor +{ +public: + bool operator() (const Null & l, const Null & r) const { return false; } + bool operator() (const Null & l, const UInt64 & r) const { return true; } + bool operator() (const Null & l, const Int64 & r) const { return true; } + bool operator() (const Null & l, const Float64 & r) const { return true; } + bool operator() (const Null & l, const String & r) const { return true; } + bool operator() (const Null & l, const Array & r) const { return true; } + + bool operator() (const UInt64 & l, const Null & r) const { return false; } + bool operator() (const UInt64 & l, const UInt64 & r) const { return l < r; } + bool operator() (const UInt64 & l, const Int64 & r) const { return l < r; } + bool operator() (const UInt64 & l, const Float64 & r) const { return l < r; } + bool operator() (const UInt64 & l, const String & r) const { return l < stringToDateOrDateTime(r); } + bool operator() (const UInt64 & l, const Array & r) const { return true; } + + bool operator() (const Int64 & l, const Null & r) const { return false; } + bool operator() (const Int64 & l, const UInt64 & r) const { return l < r; } + bool operator() (const Int64 & l, const Int64 & r) const { return l < r; } + bool operator() (const Int64 & l, const Float64 & r) const { return l < r; } + bool operator() (const Int64 & l, const String & r) const { return true; } + bool operator() (const Int64 & l, const Array & r) const { return true; } + + bool operator() (const Float64 & l, const Null & r) const { return false; } + bool operator() (const Float64 & l, const UInt64 & r) const { return l < r; } + bool operator() (const Float64 & l, const Int64 & r) const { return l < r; } + bool operator() (const Float64 & l, const Float64 & r) const { return l < r; } + bool operator() (const Float64 & l, const String & r) const { return true; } + bool operator() (const Float64 & l, const Array & r) const { return true; } + + bool operator() (const String & l, const Null & r) const { return false; } + bool operator() (const String & l, const UInt64 & r) const { return stringToDateOrDateTime(l) < r; } + bool operator() (const String & l, const Int64 & r) const { return false; } + bool operator() (const String & l, const Float64 & r) const { return false; } + bool operator() (const String & l, const String & r) const { return l < r; } + bool operator() (const String & l, const Array & r) const { return true; } + + bool operator() (const Array & l, const Null & r) const { return false; } + bool operator() (const Array & l, const UInt64 & r) const { return false; } + bool operator() (const Array & l, const Int64 & r) const { return false; } + bool operator() (const Array & l, const Float64 & r) const { return false; } + bool operator() (const Array & l, const String & r) const { return false; } + bool operator() (const Array & l, const Array & r) const { return l < r; } +}; + +#pragma GCC diagnostic pop + + +inline bool Range::equals(const Field & lhs, const Field & rhs) { return apply_visitor(FieldVisitorAccurateEquals(), lhs, rhs); } +inline bool Range::less(const Field & lhs, const Field & rhs) { return apply_visitor(FieldVisitorAccurateLess(), lhs, rhs); } + + PKCondition::PKCondition(ASTPtr query, const Context & context_, const NamesAndTypesList & all_columns, const SortDescription & sort_descr_) : sort_descr(sort_descr_) { From 9dd999708c02d4501fec7c5e28d9aab29432288e Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 25 Jun 2015 16:03:46 +0300 Subject: [PATCH 33/67] Merge --- dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp b/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp index 3bd49d17231..a6c10f8d05a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp @@ -156,8 +156,6 @@ struct Stream return; } - std::cout << "mrk_mark " << mrk_mark.offset_in_compressed_file << ' ' << mrk_mark.offset_in_decompressed_block << std::endl; - data_mark.offset_in_compressed_file = compressed_hashing_buf.count() - uncompressing_buf.getSizeCompressed(); data_mark.offset_in_decompressed_block = uncompressed_hashing_buf.offset(); From a522013939ce24e4f475bee8a68e7b4a1f38af9b Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 25 Jun 2015 20:38:54 +0300 Subject: [PATCH 34/67] Merge --- dbms/include/DB/Parsers/ASTQueryWithOutput.h | 10 +++++++--- dbms/include/DB/Parsers/ASTSelectQuery.h | 3 +++ dbms/src/Client/Client.cpp | 4 ++-- dbms/src/Interpreters/executeQuery.cpp | 4 ++-- dbms/src/Parsers/ASTSelectQuery.cpp | 9 +++++++++ dbms/src/Parsers/ParserSelectQuery.cpp | 11 ++++++++++- .../0_stateless/00098_k_union_all.reference | 18 ++++++++++++++++++ .../queries/0_stateless/00098_k_union_all.sql | 3 +++ 8 files changed, 54 insertions(+), 8 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00098_k_union_all.reference create mode 100644 dbms/tests/queries/0_stateless/00098_k_union_all.sql diff --git a/dbms/include/DB/Parsers/ASTQueryWithOutput.h b/dbms/include/DB/Parsers/ASTQueryWithOutput.h index 2b5723249f2..92cba621eab 100644 --- a/dbms/include/DB/Parsers/ASTQueryWithOutput.h +++ b/dbms/include/DB/Parsers/ASTQueryWithOutput.h @@ -5,10 +5,9 @@ namespace DB { - - + /** Запрос с секцией FORMAT. - */ + */ class ASTQueryWithOutput : public IAST { public: @@ -16,6 +15,11 @@ public: ASTQueryWithOutput() = default; ASTQueryWithOutput(const StringRange range_) : IAST(range_) {} + + /** Возвращает указатель на формат. Если типом объекта является ASTSelectQuery, + * то эта функция возвращает указатель на формат из последнего SELECT'а цепочки UNION ALL. + */ + virtual const IAST * getFormat() const { return format.get(); } }; diff --git a/dbms/include/DB/Parsers/ASTSelectQuery.h b/dbms/include/DB/Parsers/ASTSelectQuery.h index 2d5f9cba3a2..095e9436238 100644 --- a/dbms/include/DB/Parsers/ASTSelectQuery.h +++ b/dbms/include/DB/Parsers/ASTSelectQuery.h @@ -34,6 +34,9 @@ public: ASTPtr clone() const override; + /// Возвращает указатель на формат из последнего SELECT'а цепочки UNION ALL. + const IAST * getFormat() const override; + public: bool distinct = false; ASTPtr select_expression_list; diff --git a/dbms/src/Client/Client.cpp b/dbms/src/Client/Client.cpp index 103ba626a4c..136e9aa4325 100644 --- a/dbms/src/Client/Client.cpp +++ b/dbms/src/Client/Client.cpp @@ -850,11 +850,11 @@ private: /// Формат может быть указан в запросе. if (ASTQueryWithOutput * query_with_output = dynamic_cast(&*parsed_query)) { - if (query_with_output->format) + if (query_with_output->getFormat() != nullptr) { if (has_vertical_output_suffix) throw Exception("Output format already specified", ErrorCodes::CLIENT_OUTPUT_FORMAT_SPECIFIED); - if (ASTIdentifier * id = typeid_cast(&*query_with_output->format)) + if (const ASTIdentifier * id = typeid_cast(query_with_output->getFormat())) current_format = id->name; } } diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 91fdca2b2ad..b122fc24eea 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -211,8 +211,8 @@ void executeQuery( { const ASTQueryWithOutput * ast_query_with_output = dynamic_cast(ast.get()); - String format_name = ast_query_with_output && ast_query_with_output->format - ? typeid_cast(*ast_query_with_output->format).name + String format_name = ast_query_with_output && (ast_query_with_output->getFormat() != nullptr) + ? typeid_cast(*ast_query_with_output->getFormat()).name : context.getDefaultFormat(); BlockOutputStreamPtr out = context.getFormatFactory().getOutput(format_name, ostr, streams.in_sample); diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index f98ec0fa39b..8e8cc668c50 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -172,5 +172,14 @@ ASTPtr ASTSelectQuery::clone() const return ptr; } + +const IAST * ASTSelectQuery::getFormat() const +{ + const ASTSelectQuery * query = this; + while (!query->next_union_all.isNull()) + query = static_cast(query->next_union_all.get()); + return query->format.get(); +} + }; diff --git a/dbms/src/Parsers/ParserSelectQuery.cpp b/dbms/src/Parsers/ParserSelectQuery.cpp index 91e806ba436..8a70bdc4a53 100644 --- a/dbms/src/Parsers/ParserSelectQuery.cpp +++ b/dbms/src/Parsers/ParserSelectQuery.cpp @@ -8,7 +8,6 @@ #include #include - namespace DB { @@ -296,6 +295,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p ws.ignore(pos, end); } + bool has_format = false; + /// FORMAT format_name if (s_format.ignore(pos, end, max_parsed_pos, expected)) { @@ -308,6 +309,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p typeid_cast(*select_query->format).kind = ASTIdentifier::Format; ws.ignore(pos, end); + has_format = true; } // UNION ALL select query @@ -317,6 +319,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p if (s_all.ignore(pos, end, max_parsed_pos, expected)) { + if (has_format) + { + /// FORMAT может быть задан только в последнем запросе цепочки UNION ALL. + expected = "FORMAT only in the last SELECT of the UNION ALL chain"; + return false; + } + ParserSelectQuery select_p; if (!select_p.parse(pos, end, select_query->next_union_all, max_parsed_pos, expected)) return false; diff --git a/dbms/tests/queries/0_stateless/00098_k_union_all.reference b/dbms/tests/queries/0_stateless/00098_k_union_all.reference new file mode 100644 index 00000000000..7cd63dac99e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00098_k_union_all.reference @@ -0,0 +1,18 @@ +1 + +1 +1 + +1 +1 + +2 +1 + +1 +1 + +2 +1 + +3 diff --git a/dbms/tests/queries/0_stateless/00098_k_union_all.sql b/dbms/tests/queries/0_stateless/00098_k_union_all.sql new file mode 100644 index 00000000000..cd3a7706c72 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00098_k_union_all.sql @@ -0,0 +1,3 @@ +SELECT 1 FORMAT PrettySpace; +SELECT 1 UNION ALL SELECT 2 FORMAT PrettySpace; +SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 FORMAT PrettySpace; From a13e6f6a56ff81a2892302b26e9917d5f397fa54 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 25 Jun 2015 21:03:09 +0300 Subject: [PATCH 35/67] dbms: Server: Removed test because UNION ALL and FORMAT cannot be reliably tested together with our current functional test framework: 1) in the result of a UNION ALL the rows may appear in any order; 2) If put UNION ALL in a subrequest and use ORDER BY, we cannot test FORMAT. [#METR-16716] --- .../0_stateless/00098_k_union_all.reference | 18 ------------------ .../queries/0_stateless/00098_k_union_all.sql | 3 --- 2 files changed, 21 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/00098_k_union_all.reference delete mode 100644 dbms/tests/queries/0_stateless/00098_k_union_all.sql diff --git a/dbms/tests/queries/0_stateless/00098_k_union_all.reference b/dbms/tests/queries/0_stateless/00098_k_union_all.reference deleted file mode 100644 index 7cd63dac99e..00000000000 --- a/dbms/tests/queries/0_stateless/00098_k_union_all.reference +++ /dev/null @@ -1,18 +0,0 @@ -1 - -1 -1 - -1 -1 - -2 -1 - -1 -1 - -2 -1 - -3 diff --git a/dbms/tests/queries/0_stateless/00098_k_union_all.sql b/dbms/tests/queries/0_stateless/00098_k_union_all.sql deleted file mode 100644 index cd3a7706c72..00000000000 --- a/dbms/tests/queries/0_stateless/00098_k_union_all.sql +++ /dev/null @@ -1,3 +0,0 @@ -SELECT 1 FORMAT PrettySpace; -SELECT 1 UNION ALL SELECT 2 FORMAT PrettySpace; -SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 FORMAT PrettySpace; From 3e25e37feb22853b93c5c8945e17db3cc4fa012d Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 26 Jun 2015 11:05:59 +0300 Subject: [PATCH 36/67] dbms: Server: Re-added functional test [#METR-16716] --- .../0_stateless/00098_k_union_all.reference | 18 ++++++++++++++++++ .../queries/0_stateless/00098_k_union_all.sql | 3 +++ 2 files changed, 21 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00098_k_union_all.reference create mode 100644 dbms/tests/queries/0_stateless/00098_k_union_all.sql diff --git a/dbms/tests/queries/0_stateless/00098_k_union_all.reference b/dbms/tests/queries/0_stateless/00098_k_union_all.reference new file mode 100644 index 00000000000..1b21a751afc --- /dev/null +++ b/dbms/tests/queries/0_stateless/00098_k_union_all.reference @@ -0,0 +1,18 @@ +1 + +1 +1 + +1 +1 + +1 +1 + +1 +1 + +1 +1 + +1 diff --git a/dbms/tests/queries/0_stateless/00098_k_union_all.sql b/dbms/tests/queries/0_stateless/00098_k_union_all.sql new file mode 100644 index 00000000000..311e5bb19c4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00098_k_union_all.sql @@ -0,0 +1,3 @@ +SELECT 1 FORMAT PrettySpace; +SELECT 1 UNION ALL SELECT 1 FORMAT PrettySpace; +SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 FORMAT PrettySpace; From f7ec7dbf9e4917741c776dafee7654eb10052622 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 26 Jun 2015 13:37:58 +0300 Subject: [PATCH 37/67] dbms: ReadBuffer::ignore does not call eof when n == 0 [#MTRSADMIN-1093] --- dbms/include/DB/IO/ReadBuffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/IO/ReadBuffer.h b/dbms/include/DB/IO/ReadBuffer.h index 6366bbe6b01..97442d69717 100644 --- a/dbms/include/DB/IO/ReadBuffer.h +++ b/dbms/include/DB/IO/ReadBuffer.h @@ -84,7 +84,7 @@ public: void ignore(size_t n) { - while (!eof() && n != 0) + while (n != 0 && !eof()) { size_t bytes_to_ignore = std::min(static_cast(working_buffer.end() - pos), n); pos += bytes_to_ignore; From a9d16d8a9fc09663dd8d2561fbe98251f3e5e956 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 26 Jun 2015 13:52:20 +0300 Subject: [PATCH 38/67] Merge --- .../MergeTree/MergeTreePartChecker.cpp | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp b/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp index a6c10f8d05a..c8e8a84f3fe 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartChecker.cpp @@ -11,7 +11,6 @@ #include #include #include -#include namespace DB @@ -116,12 +115,6 @@ struct Stream readIntBinary(mrk_mark.offset_in_compressed_file, mrk_hashing_buf); readIntBinary(mrk_mark.offset_in_decompressed_block, mrk_hashing_buf); - /// На всякий случай, сохраним смещение в файле и размер предыдущего блока. - SCOPE_EXIT( - prev_offset_in_compressed_file = mrk_mark.offset_in_compressed_file; - prev_buffer_size = uncompressed_hashing_buf.buffer().size(); - ); - bool has_alternative_mark = false; MarkInCompressedFile alternative_data_mark; MarkInCompressedFile data_mark; @@ -145,16 +138,6 @@ struct Stream if (uncompressed_hashing_buf.eof()) return; } - else if (uncompressed_hashing_buf.offset() == 0) - { - /// Восстановим засечку на конец предыдущего блока по сохраненным данным - has_alternative_mark = true; - alternative_data_mark.offset_in_compressed_file = prev_offset_in_compressed_file; - alternative_data_mark.offset_in_decompressed_block = prev_buffer_size; - - if (mrk_mark == alternative_data_mark) - return; - } data_mark.offset_in_compressed_file = compressed_hashing_buf.count() - uncompressing_buf.getSizeCompressed(); data_mark.offset_in_decompressed_block = uncompressed_hashing_buf.offset(); @@ -178,10 +161,6 @@ struct Stream checksums.files[name + ".mrk"] = MergeTreeData::DataPart::Checksums::Checksum( mrk_hashing_buf.count(), mrk_hashing_buf.getHash()); } - -private: - size_t prev_offset_in_compressed_file{}; - size_t prev_buffer_size{}; }; /// Возвращает количество строк. Добавляет в checksums чексуммы всех файлов столбца. From f2f8bf86cc42261dbca55c2484cae9a80672d78b Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 26 Jun 2015 18:45:08 +0300 Subject: [PATCH 39/67] dbms: external dictionaries: fix null pointer dereference after calling unique_ptr::release() --- dbms/src/Interpreters/ExternalDictionaries.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/ExternalDictionaries.cpp b/dbms/src/Interpreters/ExternalDictionaries.cpp index 2f2d8e2cb32..247c5e99a76 100644 --- a/dbms/src/Interpreters/ExternalDictionaries.cpp +++ b/dbms/src/Interpreters/ExternalDictionaries.cpp @@ -81,6 +81,10 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) { const std::lock_guard lock{dictionaries_mutex}; + const auto & lifetime = dict_ptr->getLifetime(); + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + update_times[name] = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; + const auto dict_it = dictionaries.find(name); if (dict_it->second.dict) dict_it->second.dict->set(dict_ptr.release()); @@ -90,10 +94,6 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) /// erase stored exception on success dict_it->second.exception = std::exception_ptr{}; - const auto & lifetime = dict_ptr->getLifetime(); - std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; - update_times[name] = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; - recreated_failed_dictionaries.push_back(name); } } From c9e225be12176d5edefde030245e8e3e92387971 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jun 2015 22:23:25 +0300 Subject: [PATCH 40/67] dbms: QueryLog: development [#METR-16946]. --- dbms/include/DB/Interpreters/QueryLog.h | 132 +----------- dbms/src/Interpreters/QueryLog.cpp | 269 ++++++++++++++++++++++++ 2 files changed, 279 insertions(+), 122 deletions(-) create mode 100644 dbms/src/Interpreters/QueryLog.cpp diff --git a/dbms/include/DB/Interpreters/QueryLog.h b/dbms/include/DB/Interpreters/QueryLog.h index 50f21c31b34..77eab7e6439 100644 --- a/dbms/include/DB/Interpreters/QueryLog.h +++ b/dbms/include/DB/Interpreters/QueryLog.h @@ -5,8 +5,7 @@ #include #include #include -#include -#include +#include namespace DB @@ -76,6 +75,8 @@ struct QueryLogElement #define DBMS_QUERY_LOG_QUEUE_SIZE 1024 +class Context; + class QueryLog : private boost::noncopyable { @@ -89,25 +90,8 @@ public: * где N - минимальное число, начиная с 1 такое, что таблицы с таким именем ещё нет; * и создаётся новая таблица, как будто существующей таблицы не было. */ - QueryLog(Context & context_, const String & database_name_, const String & table_name_, size_t flush_interval_milliseconds_) - : context(context_), database_name(database_name_), table_name(table_name_), flush_interval_milliseconds(flush_interval_milliseconds_) - { - data.reserve(DBMS_QUERY_LOG_QUEUE_SIZE); - - // TODO - - saving_thread = std::thread([this] { threadFunction(); }); - } - - ~QueryLog() - { - /// Говорим потоку, что надо завершиться. - QueryLogElement elem; - elem.type = QueryLogElement::SHUTDOWN; - queue.push(elem); - - saving_thread.join(); - } + QueryLog(Context & context_, const String & database_name_, const String & table_name_, size_t flush_interval_milliseconds_); + ~QueryLog(); /** Добавить запись в лог. * Сохранение в таблицу делается асинхронно, и в случае сбоя, запись может никуда не попасть. @@ -134,111 +118,15 @@ private: */ std::vector data; + Logger * log {&Logger::get("QueryLog")}; + /** В этом потоке данные вынимаются из queue, складываются в data, а затем вставляются в таблицу. */ std::thread saving_thread; - - void threadFunction() - { - Stopwatch time_after_last_write; - bool first = true; - - while (true) - { - try - { - if (first) - { - time_after_last_write.restart(); - first = false; - } - - QueryLogElement element; - bool has_element = false; - - if (data.empty()) - { - element = queue.pop(); - has_element = true; - } - else - { - size_t milliseconds_elapsed = time_after_last_write.elapsed() / 1000000; - if (milliseconds_elapsed < flush_interval_milliseconds) - has_element = queue.tryPop(element, flush_interval_milliseconds - milliseconds_elapsed); - } - - if (has_element) - { - if (element.type = QueryLogElement::SHUTDOWN) - { - flush(); - break; - } - else - data.push_back(element); - } - - size_t milliseconds_elapsed = time_after_last_write.elapsed() / 1000000; - if (milliseconds_elapsed >= flush_interval_milliseconds) - { - /// Записываем данные в таблицу. - flush(); - time_after_last_write.restart(); - } - } - catch (...) - { - /// В случае ошибки теряем накопленные записи, чтобы не блокироваться. - data.clear(); - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - } - - Block createBlock() - { - return { - {new ColumnUInt8, new DataTypeUInt8, "type"}, - {new ColumnUInt32, new DataTypeDateTime, "event_time"}, - {new ColumnUInt32, new DataTypeDateTime, "query_start_time"}, - }; - - /* time_t event_time; - time_t query_start_time; - UInt64 query_duration_ms; - - UInt64 read_rows; - UInt64 read_bytes; - - UInt64 result_rows; - UInt64 result_bytes; - - String query; - - Interface interface; - HTTPMethod http_method; - Poco::Net::IPAddress ip_address; - String user; - String query_id;*/ - } - - void flush() - { - try - { - Block block = createBlock(); - - // TODO Формирование блока и запись. - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - - data.clear(); - } + void threadFunction(); + static Block createBlock(); + void flush(); }; diff --git a/dbms/src/Interpreters/QueryLog.cpp b/dbms/src/Interpreters/QueryLog.cpp new file mode 100644 index 00000000000..6c823712e37 --- /dev/null +++ b/dbms/src/Interpreters/QueryLog.cpp @@ -0,0 +1,269 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + + +QueryLog::QueryLog(Context & context_, const String & database_name_, const String & table_name_, size_t flush_interval_milliseconds_) + : context(context_), database_name(database_name_), table_name(table_name_), flush_interval_milliseconds(flush_interval_milliseconds_) +{ + data.reserve(DBMS_QUERY_LOG_QUEUE_SIZE); + + { + String description = backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name); + + Poco::ScopedLock lock(context.getMutex()); + + table = context.tryGetTable(database_name, table_name); + + if (table) + { + const Block expected = createBlock(); + const Block actual = table->getSampleBlockNonMaterialized(); + + if (!blocksHaveEqualStructure(actual, expected)) + { + /// Переименовываем существующую таблицу. + int suffix = 0; + while (context.isTableExist(database_name, table_name + "_" + toString(suffix))) + ++suffix; + + ASTRenameQuery * rename = new ASTRenameQuery; + ASTPtr holder = rename; + + ASTRenameQuery::Table from; + from.database = database_name; + from.table = table_name; + + ASTRenameQuery::Table to; + to.database = database_name; + to.table = table_name + "_" + toString(suffix); + + ASTRenameQuery::Element elem; + elem.from = from; + elem.to = to; + + rename->elements.emplace_back(elem); + + LOG_DEBUG(log, "Existing table " << description << " for query log has obsolete or different structure." + " Renaming it to " << backQuoteIfNeed(to.table)); + + InterpreterRenameQuery(holder, context).execute(); + + /// Нужная таблица будет создана. + table = nullptr; + } + else + LOG_DEBUG(log, "Will use existing table " << description << " for query log."); + } + + if (!table) + { + /// Создаём таблицу. + LOG_DEBUG(log, "Creating new table " << description << " for query log."); + + ASTCreateQuery * create = new ASTCreateQuery; + ASTPtr holder = create; + + create->database = database_name; + create->table = table_name; + + Block sample = createBlock(); + create->columns = InterpreterCreateQuery::formatColumns(sample.getColumnsList()); + + String engine = "MergeTree(event_date, event_time, 8192)"; + ParserFunction engine_parser; + + create->storage = parseQuery(engine_parser, engine.data(), engine.data() + engine.size(), "ENGINE to create table for query log"); + + InterpreterCreateQuery(holder, context).execute(); + + table = context.getTable(database_name, table_name); + } + } + + saving_thread = std::thread([this] { threadFunction(); }); +} + + +QueryLog::~QueryLog() +{ + /// Говорим потоку, что надо завершиться. + QueryLogElement elem; + elem.type = QueryLogElement::SHUTDOWN; + queue.push(elem); + + saving_thread.join(); +} + + +void QueryLog::threadFunction() +{ + Stopwatch time_after_last_write; + bool first = true; + + while (true) + { + try + { + if (first) + { + time_after_last_write.restart(); + first = false; + } + + QueryLogElement element; + bool has_element = false; + + if (data.empty()) + { + queue.pop(element); + has_element = true; + } + else + { + size_t milliseconds_elapsed = time_after_last_write.elapsed() / 1000000; + if (milliseconds_elapsed < flush_interval_milliseconds) + has_element = queue.tryPop(element, flush_interval_milliseconds - milliseconds_elapsed); + } + + if (has_element) + { + if (element.type == QueryLogElement::SHUTDOWN) + { + flush(); + break; + } + else + data.push_back(element); + } + + size_t milliseconds_elapsed = time_after_last_write.elapsed() / 1000000; + if (milliseconds_elapsed >= flush_interval_milliseconds) + { + /// Записываем данные в таблицу. + flush(); + time_after_last_write.restart(); + } + } + catch (...) + { + /// В случае ошибки теряем накопленные записи, чтобы не блокироваться. + data.clear(); + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + + +Block QueryLog::createBlock() +{ + return + { + {new ColumnUInt8, new DataTypeUInt8, "type"}, + {new ColumnUInt16, new DataTypeDate, "event_date"}, + {new ColumnUInt32, new DataTypeDateTime, "event_time"}, + {new ColumnUInt32, new DataTypeDateTime, "query_start_time"}, + {new ColumnUInt64, new DataTypeUInt64, "query_duration_ms"}, + + {new ColumnUInt64, new DataTypeUInt64, "read_rows"}, + {new ColumnUInt64, new DataTypeUInt64, "read_bytes"}, + + {new ColumnUInt64, new DataTypeUInt64, "result_rows"}, + {new ColumnUInt64, new DataTypeUInt64, "result_bytes"}, + + {new ColumnString, new DataTypeString, "query"}, + + {new ColumnUInt8, new DataTypeUInt8, "interface"}, + {new ColumnUInt8, new DataTypeUInt8, "http_method"}, + {new ColumnFixedString(16), new DataTypeFixedString(16), "ip_address"}, + {new ColumnString, new DataTypeString, "user"}, + {new ColumnString, new DataTypeString, "query_id"}, + }; +} + + +void QueryLog::flush() +{ + try + { + LOG_TRACE(log, "Flushing query log"); + + DateLUT & date_lut = DateLUT::instance(); + + Block block = createBlock(); + + for (const QueryLogElement & elem : data) + { + block.unsafeGetByPosition(0).column.get()->insert(static_cast(elem.type)); + block.unsafeGetByPosition(1).column.get()->insert(static_cast(date_lut.toDayNum(elem.event_time))); + block.unsafeGetByPosition(2).column.get()->insert(static_cast(elem.event_time)); + block.unsafeGetByPosition(3).column.get()->insert(static_cast(elem.query_start_time)); + block.unsafeGetByPosition(4).column.get()->insert(static_cast(elem.query_duration_ms)); + + block.unsafeGetByPosition(5).column.get()->insert(static_cast(elem.read_rows)); + block.unsafeGetByPosition(6).column.get()->insert(static_cast(elem.read_bytes)); + + block.unsafeGetByPosition(7).column.get()->insert(static_cast(elem.result_rows)); + block.unsafeGetByPosition(8).column.get()->insert(static_cast(elem.result_bytes)); + + block.unsafeGetByPosition(9).column.get()->insertData(elem.query.data(), elem.query.size()); + + block.unsafeGetByPosition(10).column.get()->insert(static_cast(elem.interface)); + block.unsafeGetByPosition(11).column.get()->insert(static_cast(elem.http_method)); + + char ipv6_binary[16]; + if (Poco::Net::IPAddress::IPv6 == elem.ip_address.family()) + { + memcpy(ipv6_binary, elem.ip_address.addr(), 16); + } + else if (Poco::Net::IPAddress::IPv4 == elem.ip_address.family()) + { + /// Преобразуем в IPv6-mapped адрес. + memset(ipv6_binary, 0, 10); + ipv6_binary[10] = '\xFF'; + ipv6_binary[11] = '\xFF'; + memcpy(&ipv6_binary[12], elem.ip_address.addr(), 4); + } + else + memset(ipv6_binary, 0, 16); + + block.unsafeGetByPosition(12).column.get()->insertData(ipv6_binary, 16); + + block.unsafeGetByPosition(13).column.get()->insertData(elem.user.data(), elem.user.size()); + block.unsafeGetByPosition(14).column.get()->insertData(elem.query_id.data(), elem.query_id.size()); + } + + BlockOutputStreamPtr stream = table->write(nullptr); + + stream->writePrefix(); + stream->write(block); + stream->writeSuffix(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + data.clear(); +} + +} From c36fdf0e5665232d0dbca62f5f8798169cedb8d7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jun 2015 23:48:10 +0300 Subject: [PATCH 41/67] dbms: initial implementation of QueryLog [#METR-16946]. --- dbms/include/DB/Core/Defines.h | 2 ++ dbms/include/DB/Interpreters/Context.h | 27 +++++++++++++++++++ dbms/include/DB/Interpreters/QueryLog.h | 35 ++++++++----------------- dbms/include/DB/Interpreters/Settings.h | 3 +++ dbms/src/Interpreters/Context.cpp | 22 ++++++++++++++++ dbms/src/Interpreters/QueryLog.cpp | 2 +- dbms/src/Interpreters/executeQuery.cpp | 27 +++++++++++++++++-- dbms/src/Server/HTTPHandler.cpp | 10 +++++++ dbms/src/Server/OLAPHTTPHandler.cpp | 3 +++ dbms/src/Server/Server.cpp | 2 ++ dbms/src/Server/TCPHandler.cpp | 1 + 11 files changed, 107 insertions(+), 27 deletions(-) diff --git a/dbms/include/DB/Core/Defines.h b/dbms/include/DB/Core/Defines.h index ba89beee564..f677967fa31 100644 --- a/dbms/include/DB/Core/Defines.h +++ b/dbms/include/DB/Core/Defines.h @@ -78,5 +78,7 @@ /// Граница, на которых должны быть выровнены блоки для асинхронных файловых операций. #define DEFAULT_AIO_FILE_BLOCK_SIZE 4096 +#define DEFAULT_QUERY_LOG_FLUSH_INTERVAL_MILLISECONDS_STR "10000" + #define ALWAYS_INLINE __attribute__((__always_inline__)) #define NO_INLINE __attribute__((__noinline__)) diff --git a/dbms/include/DB/Interpreters/Context.h b/dbms/include/DB/Interpreters/Context.h index f646c627ecb..2f62242ae8a 100644 --- a/dbms/include/DB/Interpreters/Context.h +++ b/dbms/include/DB/Interpreters/Context.h @@ -40,6 +40,7 @@ class ProcessListElement; class Macros; class Progress; class Clusters; +class QueryLog; /// имя таблицы -> таблица @@ -64,12 +65,30 @@ typedef std::vector Dependencies; */ class Context { +public: + enum class Interface + { + TCP = 1, + HTTP = 2, + OLAP_HTTP = 3, + }; + + enum class HTTPMethod + { + UNKNOWN = 0, + GET = 1, + POST = 2, + }; + private: typedef std::shared_ptr Shared; Shared shared; String user; /// Текущий пользователь. Poco::Net::IPAddress ip_address; /// IP-адрес, с которого задан запрос. + Interface interface = Interface::TCP; + HTTPMethod http_method = HTTPMethod::UNKNOWN; /// NOTE Возможно, перенести это в отдельный struct ClientInfo. + std::shared_ptr quota; /// Текущая квота. По-умолчанию - пустая квота, которая ничего не ограничивает. String current_database; /// Текущая БД. String current_query_id; /// Id текущего запроса. @@ -108,6 +127,12 @@ public: String getUser() const { return user; } Poco::Net::IPAddress getIPAddress() const { return ip_address; } + Interface getInterface() const { return interface; } + void setInterface(Interface interface_) { interface = interface_; } + + HTTPMethod getHTTPMethod() const { return http_method; } + void setHTTPMethod(HTTPMethod http_method_) { http_method = http_method_; } + void setQuota(const String & name, const String & quota_key, const String & user_name, const Poco::Net::IPAddress & address); QuotaForIntervals & getQuota(); @@ -242,6 +267,8 @@ public: Compiler & getCompiler(); + QueryLog & getQueryLog(); + /// Позволяет выбрать метод сжатия по условиям, описанным в конфигурационном файле. CompressionMethod chooseCompressionMethod(size_t part_size, double part_size_ratio) const; diff --git a/dbms/include/DB/Interpreters/QueryLog.h b/dbms/include/DB/Interpreters/QueryLog.h index 77eab7e6439..33cc4bd4b5f 100644 --- a/dbms/include/DB/Interpreters/QueryLog.h +++ b/dbms/include/DB/Interpreters/QueryLog.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -35,38 +36,24 @@ struct QueryLogElement QUERY_FINISH = 2, }; - enum Interface - { - TCP = 1, - HTTP = 2, - OLAP_HTTP = 3, - }; - - enum HTTPMethod - { - UNKNOWN = 0, - GET = 1, - POST = 2, - }; - - Type type; + Type type = QUERY_START; /// В зависимости от типа, не все поля могут быть заполнены. - time_t event_time; - time_t query_start_time; - UInt64 query_duration_ms; + time_t event_time{}; + time_t query_start_time{}; + UInt64 query_duration_ms{}; - UInt64 read_rows; - UInt64 read_bytes; + UInt64 read_rows{}; + UInt64 read_bytes{}; - UInt64 result_rows; - UInt64 result_bytes; + UInt64 result_rows{}; + UInt64 result_bytes{}; String query; - Interface interface; - HTTPMethod http_method; + Context::Interface interface = Context::Interface::TCP; + Context::HTTPMethod http_method = Context::HTTPMethod::UNKNOWN; Poco::Net::IPAddress ip_address; String user; String query_id; diff --git a/dbms/include/DB/Interpreters/Settings.h b/dbms/include/DB/Interpreters/Settings.h index 8d799249f71..2111ede1ee1 100644 --- a/dbms/include/DB/Interpreters/Settings.h +++ b/dbms/include/DB/Interpreters/Settings.h @@ -135,6 +135,9 @@ struct Settings \ /** Приоритет запроса. 1 - самый высокий, больше - ниже; 0 - не использовать приоритеты. */ \ M(SettingUInt64, priority, 0) \ + \ + /** Логгировать запросы и писать лог в системную таблицу. */ \ + M(SettingBool, log_queries, 0) \ /// Всевозможные ограничения на выполнение запроса. Limits limits; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index c67c36e2cd0..fbde5ad7de6 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -86,6 +87,7 @@ struct ContextShared BackgroundProcessingPoolPtr background_pool; /// Пул потоков для фоновой работы, выполняемой таблицами. Macros macros; /// Подстановки из конфига. std::unique_ptr compiler; /// Для динамической компиляции частей запроса, при необходимости. + std::unique_ptr query_log; /// Для логгирования запросов. mutable std::unique_ptr compression_method_selector; /// Правила для выбора метода сжатия в зависимости от размера куска. /// Кластеры для distributed таблиц @@ -830,6 +832,26 @@ Compiler & Context::getCompiler() } +QueryLog & Context::getQueryLog() +{ + Poco::ScopedLock lock(shared->mutex); + + if (!shared->query_log) + { + auto & config = Poco::Util::Application::instance().config(); + + String database = config.getString("query_log.database", "system"); + String table = config.getString("query_log.table", "query_log"); + size_t flush_interval_milliseconds = parse( + config.getString("query_log.flush_interval_milliseconds", DEFAULT_QUERY_LOG_FLUSH_INTERVAL_MILLISECONDS_STR)); + + shared->query_log.reset(new QueryLog{ *this, database, table, flush_interval_milliseconds }); + } + + return *shared->query_log; +} + + CompressionMethod Context::chooseCompressionMethod(size_t part_size, double part_size_ratio) const { Poco::ScopedLock lock(shared->mutex); diff --git a/dbms/src/Interpreters/QueryLog.cpp b/dbms/src/Interpreters/QueryLog.cpp index 6c823712e37..77ba058de38 100644 --- a/dbms/src/Interpreters/QueryLog.cpp +++ b/dbms/src/Interpreters/QueryLog.cpp @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -263,6 +262,7 @@ void QueryLog::flush() tryLogCurrentException(__PRETTY_FUNCTION__); } + /// В случае ошибки тоже очищаем накопленные записи, чтобы не блокироваться. data.clear(); } diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index b122fc24eea..91ae751eea2 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -31,6 +32,7 @@ static void checkLimits(const IAST & ast, const Limits & limits) } +/// Логгировать запрос в обычный лог (не в таблицу). static void logQuery(const String & query, const Context & context) { String logged_query = query; @@ -85,12 +87,12 @@ static std::tuple executeQueryImpl( quota.checkExceeded(current_time); + const Settings & settings = context.getSettingsRef(); + /// Положим запрос в список процессов. Но запрос SHOW PROCESSLIST класть не будем. ProcessList::EntryPtr process_list_entry; if (!internal && nullptr == typeid_cast(&*ast)) { - const Settings & settings = context.getSettingsRef(); - process_list_entry = context.getProcessList().insert( query, context.getUser(), context.getCurrentQueryId(), context.getIPAddress(), settings.limits.max_memory_usage, @@ -101,6 +103,27 @@ static std::tuple executeQueryImpl( context.setProcessListElement(&process_list_entry->get()); } + /// Логгируем в таблицу начало выполнения запроса, если нужно. + if (settings.log_queries) + { + QueryLogElement elem; + + elem.type = QueryLogElement::QUERY_START; + + elem.event_time = current_time; + elem.query_start_time = current_time; + + elem.query = query; + + elem.interface = context.getInterface(); + elem.http_method = context.getHTTPMethod(); + elem.ip_address = context.getIPAddress(); + elem.user = context.getUser(); + elem.query_id = context.getCurrentQueryId(); + + context.getQueryLog().add(elem); + } + BlockIO res; try diff --git a/dbms/src/Server/HTTPHandler.cpp b/dbms/src/Server/HTTPHandler.cpp index cd13d00c57c..7ed2c31da24 100644 --- a/dbms/src/Server/HTTPHandler.cpp +++ b/dbms/src/Server/HTTPHandler.cpp @@ -136,6 +136,16 @@ void HTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net if (readonly) context.getSettingsRef().limits.readonly = true; + context.setInterface(Context::Interface::HTTP); + + Context::HTTPMethod http_method = Context::HTTPMethod::UNKNOWN; + if (request.getMethod() == Poco::Net::HTTPServerRequest::HTTP_GET) + http_method = Context::HTTPMethod::GET; + else if (request.getMethod() == Poco::Net::HTTPServerRequest::HTTP_POST) + http_method = Context::HTTPMethod::POST; + + context.setHTTPMethod(http_method); + Stopwatch watch; executeQuery(*in, *used_output.out_maybe_compressed, context, query_plan); watch.stop(); diff --git a/dbms/src/Server/OLAPHTTPHandler.cpp b/dbms/src/Server/OLAPHTTPHandler.cpp index 7a2bbe24485..462557784c1 100644 --- a/dbms/src/Server/OLAPHTTPHandler.cpp +++ b/dbms/src/Server/OLAPHTTPHandler.cpp @@ -52,6 +52,9 @@ void OLAPHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco: context.setUser(user, password, request.clientAddress().host(), quota_key); + context.setInterface(Context::Interface::HTTP); + context.setHTTPMethod(Context::HTTPMethod::POST); + OLAP::QueryParseResult olap_query = server.olap_parser->parse(request_istream); std::string clickhouse_query; diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 2188ae8a15c..eb804978331 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -533,6 +533,8 @@ int Server::main(const std::vector & args) LOG_DEBUG(log, "Loaded metadata."); /// Создаём системные таблицы. + Poco::File(path + "data/system").createDirectories(); + Poco::File(path + "metadata/system").createDirectories(); global_context->addDatabase("system"); global_context->addTable("system", "one", StorageSystemOne::create("one")); diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp index 395fb769baa..2e9ad9e7425 100644 --- a/dbms/src/Server/TCPHandler.cpp +++ b/dbms/src/Server/TCPHandler.cpp @@ -111,6 +111,7 @@ void TCPHandler::runImpl() { /// Восстанавливаем контекст запроса. query_context = connection_context; + query_context.setInterface(Context::Interface::TCP); /** Если Query - обрабатываем. Если Ping или Cancel - возвращаемся в начало. * Могут прийти настройки на отдельный запрос, которые модифицируют query_context. From d9fd662c125c10faf30cb713bdda2ea91e3d3f73 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jun 2015 00:20:33 +0300 Subject: [PATCH 42/67] dbms: addition to prev. revision [#METR-16946]. --- dbms/src/Server/Server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index eb804978331..9639849a497 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -548,7 +548,7 @@ int Server::main(const std::vector & args) global_context->addTable("system", "events", StorageSystemEvents::create("events")); global_context->addTable("system", "merges", StorageSystemMerges::create("merges")); global_context->addTable("system", "replicas", StorageSystemReplicas::create("replicas")); - global_context->addTable("system", "dictionaries", StorageSystemDictionaries::create("dictionaries")); + global_context->addTable("system", "dictionaries", StorageSystemDictionaries::create("dictionaries")); global_context->addTable("system", "columns", StorageSystemColumns::create("columns")); global_context->addTable("system", "functions", StorageSystemFunctions::create("functions")); global_context->addTable("system", "clusters", StorageSystemClusters::create("clusters", *global_context)); From 6b82289af47de3c59e8d2eb45d97c5a12eed3e9b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jun 2015 01:37:55 +0300 Subject: [PATCH 43/67] dbms: addition to prev. revision [#METR-16946]. --- dbms/src/Server/Server.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 9639849a497..3c2e42a5ccb 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -533,9 +533,12 @@ int Server::main(const std::vector & args) LOG_DEBUG(log, "Loaded metadata."); /// Создаём системные таблицы. - Poco::File(path + "data/system").createDirectories(); - Poco::File(path + "metadata/system").createDirectories(); - global_context->addDatabase("system"); + if (!global_context->isDatabaseExist("system")) + { + Poco::File(path + "data/system").createDirectories(); + Poco::File(path + "metadata/system").createDirectories(); + global_context->addDatabase("system"); + } global_context->addTable("system", "one", StorageSystemOne::create("one")); global_context->addTable("system", "numbers", StorageSystemNumbers::create("numbers")); From cdf90e9510edd9a8ec89ea4b975b3067a4a205ed Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jun 2015 09:25:12 +0300 Subject: [PATCH 44/67] dbms: higher order functions: added support for constant expressions [#METR-17014]. --- .../DB/Functions/FunctionsHigherOrder.h | 193 +++- ...unctions_higher_order_and_consts.reference | 914 ++++++++++++++++++ ...0182_functions_higher_order_and_consts.sql | 235 +++++ 3 files changed, 1328 insertions(+), 14 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00182_functions_higher_order_and_consts.reference create mode 100644 dbms/tests/queries/0_stateless/00182_functions_higher_order_and_consts.sql diff --git a/dbms/include/DB/Functions/FunctionsHigherOrder.h b/dbms/include/DB/Functions/FunctionsHigherOrder.h index d04bf89620f..8e955410e18 100644 --- a/dbms/include/DB/Functions/FunctionsHigherOrder.h +++ b/dbms/include/DB/Functions/FunctionsHigherOrder.h @@ -41,7 +41,9 @@ struct ArrayMapImpl static ColumnPtr execute(const ColumnArray * array, ColumnPtr mapped) { - return new ColumnArray(mapped, array->getOffsetsColumn()); + return mapped->isConst() + ? new ColumnArray(dynamic_cast(*mapped).convertToFullColumn(), array->getOffsetsColumn()) + : new ColumnArray(mapped, array->getOffsetsColumn()); } }; @@ -59,9 +61,20 @@ struct ArrayFilterImpl /// Если массивов несколько, сюда передается первый. static ColumnPtr execute(const ColumnArray * array, ColumnPtr mapped) { - ColumnVector * column_filter = typeid_cast *>(&*mapped); + const ColumnVector * column_filter = typeid_cast *>(&*mapped); + if (!column_filter) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + { + const ColumnConstUInt8 * column_filter_const = typeid_cast(&*mapped); + + if (!column_filter_const) + throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + + if (column_filter_const->getData()) + return array->clone(); + else + return new ColumnArray(array->getDataPtr()->cloneEmpty(), new ColumnArray::ColumnOffsets_t(array->size(), 0)); + } const IColumn::Filter & filter = column_filter->getData(); ColumnPtr filtered = array->getData().filter(filter); @@ -100,9 +113,34 @@ struct ArrayCountImpl static ColumnPtr execute(const ColumnArray * array, ColumnPtr mapped) { - ColumnVector * column_filter = typeid_cast *>(&*mapped); + const ColumnVector * column_filter = typeid_cast *>(&*mapped); + if (!column_filter) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + { + const ColumnConstUInt8 * column_filter_const = typeid_cast(&*mapped); + + if (!column_filter_const) + throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + + if (column_filter_const->getData()) + { + const IColumn::Offsets_t & offsets = array->getOffsets(); + ColumnVector * out_column = new ColumnVector(offsets.size()); + ColumnPtr out_column_ptr = out_column; + ColumnVector::Container_t & out_counts = out_column->getData(); + + size_t pos = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + out_counts[i] = offsets[i] - pos; + pos = offsets[i]; + } + + return out_column_ptr; + } + else + return new ColumnConstUInt32(array->size(), 0); + } const IColumn::Filter & filter = column_filter->getData(); const IColumn::Offsets_t & offsets = array->getOffsets(); @@ -139,9 +177,34 @@ struct ArrayExistsImpl static ColumnPtr execute(const ColumnArray * array, ColumnPtr mapped) { - ColumnVector * column_filter = typeid_cast *>(&*mapped); + const ColumnVector * column_filter = typeid_cast *>(&*mapped); + if (!column_filter) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + { + const ColumnConstUInt8 * column_filter_const = typeid_cast(&*mapped); + + if (!column_filter_const) + throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + + if (column_filter_const->getData()) + { + const IColumn::Offsets_t & offsets = array->getOffsets(); + ColumnVector * out_column = new ColumnVector(offsets.size()); + ColumnPtr out_column_ptr = out_column; + ColumnVector::Container_t & out_exists = out_column->getData(); + + size_t pos = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + out_exists[i] = offsets[i] - pos > 0; + pos = offsets[i]; + } + + return out_column_ptr; + } + else + return new ColumnConstUInt8(array->size(), 0); + } const IColumn::Filter & filter = column_filter->getData(); const IColumn::Offsets_t & offsets = array->getOffsets(); @@ -182,9 +245,34 @@ struct ArrayAllImpl static ColumnPtr execute(const ColumnArray * array, ColumnPtr mapped) { - ColumnVector * column_filter = typeid_cast *>(&*mapped); + const ColumnVector * column_filter = typeid_cast *>(&*mapped); + if (!column_filter) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + { + const ColumnConstUInt8 * column_filter_const = typeid_cast(&*mapped); + + if (!column_filter_const) + throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + + if (column_filter_const->getData()) + return new ColumnConstUInt8(array->size(), 1); + else + { + const IColumn::Offsets_t & offsets = array->getOffsets(); + ColumnVector * out_column = new ColumnVector(offsets.size()); + ColumnPtr out_column_ptr = out_column; + ColumnVector::Container_t & out_all = out_column->getData(); + + size_t pos = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + out_all[i] = offsets[i] == pos; + pos = offsets[i]; + } + + return out_column_ptr; + } + } const IColumn::Filter & filter = column_filter->getData(); const IColumn::Offsets_t & offsets = array->getOffsets(); @@ -245,7 +333,27 @@ struct ArraySumImpl const ColumnVector * column = typeid_cast *>(&*mapped); if (!column) - return false; + { + const ColumnConst * column_const = typeid_cast *>(&*mapped); + + if (!column_const) + return false; + + const Element x = column_const->getData(); + + ColumnVector * res_column = new ColumnVector(offsets.size()); + res_ptr = res_column; + typename ColumnVector::Container_t & res = res_column->getData(); + + size_t pos = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + res[i] = x * (offsets[i] - pos); + pos = offsets[i]; + } + + return true; + } const typename ColumnVector::Container_t & data = column->getData(); ColumnVector * res_column = new ColumnVector(offsets.size()); @@ -300,9 +408,41 @@ struct ArrayFirstImpl static ColumnPtr execute(const ColumnArray * array, ColumnPtr mapped) { - auto column_filter = typeid_cast *>(&*mapped); + auto column_filter = typeid_cast *>(&*mapped); + if (!column_filter) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + { + const ColumnConstUInt8 * column_filter_const = typeid_cast(&*mapped); + + if (!column_filter_const) + throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + + if (column_filter_const->getData()) + { + const auto & offsets = array->getOffsets(); + const auto & data = array->getData(); + ColumnPtr out{data.cloneEmpty()}; + + size_t pos{}; + for (size_t i = 0; i < offsets.size(); ++i) + { + if (offsets[i] - pos > 0) + out->insert(data[pos]); + else + out->insertDefault(); + + pos = offsets[i]; + } + + return out; + } + else + { + ColumnPtr out{array->getData().cloneEmpty()}; + out->insertDefault(); + return out->replicate(IColumn::Offsets_t(1, array->size())); + } + } const auto & filter = column_filter->getData(); const auto & offsets = array->getOffsets(); @@ -345,9 +485,34 @@ struct ArrayFirstIndexImpl static ColumnPtr execute(const ColumnArray * array, ColumnPtr mapped) { - auto column_filter = typeid_cast *>(&*mapped); + auto column_filter = typeid_cast *>(&*mapped); + if (!column_filter) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + { + const ColumnConstUInt8 * column_filter_const = typeid_cast(&*mapped); + + if (!column_filter_const) + throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + + if (column_filter_const->getData()) + { + const auto & offsets = array->getOffsets(); + auto out_column = new ColumnVector{offsets.size()}; + ColumnPtr out_column_ptr{out_column}; + auto & out_index = out_column->getData(); + + size_t pos{}; + for (size_t i = 0; i < offsets.size(); ++i) + { + out_index[i] = offsets[i] - pos > 0; + pos = offsets[i]; + } + + return out_column_ptr; + } + else + return new ColumnConstUInt32(array->size(), 0); + } const auto & filter = column_filter->getData(); const auto & offsets = array->getOffsets(); diff --git a/dbms/tests/queries/0_stateless/00182_functions_higher_order_and_consts.reference b/dbms/tests/queries/0_stateless/00182_functions_higher_order_and_consts.reference new file mode 100644 index 00000000000..4aa1757a136 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00182_functions_higher_order_and_consts.reference @@ -0,0 +1,914 @@ +---map-- +[] +[123,123,123] +[] +[123] +[123,123] +[123,123,123] +[123,123,123,123] +[123,123,123,123,123] +[123,123,123,123,123,123] +[123,123,123,123,123,123,123] +[123,123,123,123,123,123,123,123] +[123,123,123,123,123,123,123,123,123] +---filter-- +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[1,2,3] +[] +[0] +[0,1] +[0,1,2] +[0,1,2,3] +[0,1,2,3,4] +[0,1,2,3,4,5] +[0,1,2,3,4,5,6] +[0,1,2,3,4,5,6,7] +[0,1,2,3,4,5,6,7,8] +---count--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +3 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +---sum--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +30 +0 +10 +20 +30 +40 +50 +60 +70 +80 +90 +---all--- +1 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---exists--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---first--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +---first index--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---map-- +[] +[123,123,123] +[] +[123] +[123,123] +[123,123,123] +[123,123,123,123] +[123,123,123,123,123] +[123,123,123,123,123,123] +[123,123,123,123,123,123,123] +[123,123,123,123,123,123,123,123] +[123,123,123,123,123,123,123,123,123] +---filter-- +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[1,2,3] +[] +[0] +[0,1] +[0,1,2] +[0,1,2,3] +[0,1,2,3,4] +[0,1,2,3,4,5] +[0,1,2,3,4,5,6] +[0,1,2,3,4,5,6,7] +[0,1,2,3,4,5,6,7,8] +---count--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +3 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +---sum--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +30 +0 +10 +20 +30 +40 +50 +60 +70 +80 +90 +---all--- +1 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---exists--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---first--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +---first index--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---map-- +[] +[123,123,123] +[] +[123] +[123,123] +[123,123,123] +[123,123,123,123] +[123,123,123,123,123] +[123,123,123,123,123,123] +[123,123,123,123,123,123,123] +[123,123,123,123,123,123,123,123] +[123,123,123,123,123,123,123,123,123] +---filter-- +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +['1','2','3'] +[] +['0'] +['0','1'] +['0','1','2'] +['0','1','2','3'] +['0','1','2','3','4'] +['0','1','2','3','4','5'] +['0','1','2','3','4','5','6'] +['0','1','2','3','4','5','6','7'] +['0','1','2','3','4','5','6','7','8'] +---count--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +3 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +---sum--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +30 +0 +10 +20 +30 +40 +50 +60 +70 +80 +90 +---all--- +1 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---exists--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---first--- + + + + + + + + + + + + + +1 + +0 +0 +0 +0 +0 +0 +0 +0 +0 +---first index--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---map-- +[] +[123,123,123] +[] +[123] +[123,123] +[123,123,123] +[123,123,123,123] +[123,123,123,123,123] +[123,123,123,123,123,123] +[123,123,123,123,123,123,123] +[123,123,123,123,123,123,123,123] +[123,123,123,123,123,123,123,123,123] +---filter-- +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +['1','2','3'] +[] +['0'] +['0','1'] +['0','1','2'] +['0','1','2','3'] +['0','1','2','3','4'] +['0','1','2','3','4','5'] +['0','1','2','3','4','5','6'] +['0','1','2','3','4','5','6','7'] +['0','1','2','3','4','5','6','7','8'] +---count--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +3 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +---sum--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +30 +0 +10 +20 +30 +40 +50 +60 +70 +80 +90 +---all--- +1 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---exists--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +---first--- + + + + + + + + + + + + + +1 + +0 +0 +0 +0 +0 +0 +0 +0 +0 +---first index--- +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +--- --- +[] +[1] +[0,0] +[1,1,1] +[0,0,0,0] +[1,1,1,1,1] +[0,0,0,0,0,0] +[1,1,1,1,1,1,1] +[0,0,0,0,0,0,0,0] +[1,1,1,1,1,1,1,1,1] +[] +[0] +[] +[0,1,2] +[] +[0,1,2,3,4] +[] +[0,1,2,3,4,5,6] +[] +[0,1,2,3,4,5,6,7,8] +0 +1 +0 +3 +0 +5 +0 +7 +0 +9 +0 +1 +0 +3 +0 +5 +0 +7 +0 +9 +1 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +--- --- +[] +[1] +[0,0] +[1,1,1] +[0,0,0,0] +[1,1,1,1,1] +[0,0,0,0,0,0] +[1,1,1,1,1,1,1] +[0,0,0,0,0,0,0,0] +[1,1,1,1,1,1,1,1,1] +[] +['0'] +[] +['0','1','2'] +[] +['0','1','2','3','4'] +[] +['0','1','2','3','4','5','6'] +[] +['0','1','2','3','4','5','6','7','8'] +0 +1 +0 +3 +0 +5 +0 +7 +0 +9 +0 +1 +0 +3 +0 +5 +0 +7 +0 +9 +1 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 + +0 + +0 + +0 + +0 + +0 +0 +1 +0 +1 +0 +1 +0 +1 +0 +1 diff --git a/dbms/tests/queries/0_stateless/00182_functions_higher_order_and_consts.sql b/dbms/tests/queries/0_stateless/00182_functions_higher_order_and_consts.sql new file mode 100644 index 00000000000..5513df2e1b0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00182_functions_higher_order_and_consts.sql @@ -0,0 +1,235 @@ +SELECT '---map--'; +SELECT arrayMap(x -> 123, emptyArrayUInt8()); +SELECT arrayMap(x -> 123, [1, 2, 3]); +SELECT arrayMap(x -> 123, range(number)) FROM system.numbers LIMIT 10; +SELECT '---filter--'; +SELECT arrayFilter(x -> 0, emptyArrayUInt8()); +SELECT arrayFilter(x -> 0, [1, 2, 3]); +SELECT arrayFilter(x -> 0, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayFilter(x -> 1, emptyArrayUInt8()); +SELECT arrayFilter(x -> 1, [1, 2, 3]); +SELECT arrayFilter(x -> 1, range(number)) FROM system.numbers LIMIT 10; +SELECT '---count---'; +SELECT arrayCount(x -> 0, emptyArrayUInt8()); +SELECT arrayCount(x -> 0, [1, 2, 3]); +SELECT arrayCount(x -> 0, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayCount(x -> 1, emptyArrayUInt8()); +SELECT arrayCount(x -> 1, [1, 2, 3]); +SELECT arrayCount(x -> 1, range(number)) FROM system.numbers LIMIT 10; +SELECT '---sum---'; +SELECT arraySum(x -> 0, emptyArrayUInt8()); +SELECT arraySum(x -> 0, [1, 2, 3]); +SELECT arraySum(x -> 0, range(number)) FROM system.numbers LIMIT 10; +SELECT arraySum(x -> 10, emptyArrayUInt8()); +SELECT arraySum(x -> 10, [1, 2, 3]); +SELECT arraySum(x -> 10, range(number)) FROM system.numbers LIMIT 10; +SELECT '---all---'; +SELECT arrayAll(x -> 0, emptyArrayUInt8()); +SELECT arrayAll(x -> 0, [1, 2, 3]); +SELECT arrayAll(x -> 0, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayAll(x -> 1, emptyArrayUInt8()); +SELECT arrayAll(x -> 1, [1, 2, 3]); +SELECT arrayAll(x -> 1, range(number)) FROM system.numbers LIMIT 10; +SELECT '---exists---'; +SELECT arrayExists(x -> 0, emptyArrayUInt8()); +SELECT arrayExists(x -> 0, [1, 2, 3]); +SELECT arrayExists(x -> 0, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayExists(x -> 1, emptyArrayUInt8()); +SELECT arrayExists(x -> 1, [1, 2, 3]); +SELECT arrayExists(x -> 1, range(number)) FROM system.numbers LIMIT 10; +SELECT '---first---'; +SELECT arrayFirst(x -> 0, emptyArrayUInt8()); +SELECT arrayFirst(x -> 0, [1, 2, 3]); +SELECT arrayFirst(x -> 0, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayFirst(x -> 1, emptyArrayUInt8()); +SELECT arrayFirst(x -> 1, [1, 2, 3]); +SELECT arrayFirst(x -> 1, range(number)) FROM system.numbers LIMIT 10; +SELECT '---first index---'; +SELECT arrayFirstIndex(x -> 0, emptyArrayUInt8()); +SELECT arrayFirstIndex(x -> 0, [1, 2, 3]); +SELECT arrayFirstIndex(x -> 0, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayFirstIndex(x -> 1, emptyArrayUInt8()); +SELECT arrayFirstIndex(x -> 1, [1, 2, 3]); +SELECT arrayFirstIndex(x -> 1, range(number)) FROM system.numbers LIMIT 10; + +SELECT '---map--'; +SELECT arrayMap(x -> materialize(123), emptyArrayUInt8()); +SELECT arrayMap(x -> materialize(123), [1, 2, 3]); +SELECT arrayMap(x -> materialize(123), range(number)) FROM system.numbers LIMIT 10; +SELECT '---filter--'; +SELECT arrayFilter(x -> materialize(0), emptyArrayUInt8()); +SELECT arrayFilter(x -> materialize(0), [1, 2, 3]); +SELECT arrayFilter(x -> materialize(0), range(number)) FROM system.numbers LIMIT 10; +SELECT arrayFilter(x -> materialize(1), emptyArrayUInt8()); +SELECT arrayFilter(x -> materialize(1), [1, 2, 3]); +SELECT arrayFilter(x -> materialize(1), range(number)) FROM system.numbers LIMIT 10; +SELECT '---count---'; +SELECT arrayCount(x -> materialize(0), emptyArrayUInt8()); +SELECT arrayCount(x -> materialize(0), [1, 2, 3]); +SELECT arrayCount(x -> materialize(0), range(number)) FROM system.numbers LIMIT 10; +SELECT arrayCount(x -> materialize(1), emptyArrayUInt8()); +SELECT arrayCount(x -> materialize(1), [1, 2, 3]); +SELECT arrayCount(x -> materialize(1), range(number)) FROM system.numbers LIMIT 10; +SELECT '---sum---'; +SELECT arraySum(x -> materialize(0), emptyArrayUInt8()); +SELECT arraySum(x -> materialize(0), [1, 2, 3]); +SELECT arraySum(x -> materialize(0), range(number)) FROM system.numbers LIMIT 10; +SELECT arraySum(x -> materialize(10), emptyArrayUInt8()); +SELECT arraySum(x -> materialize(10), [1, 2, 3]); +SELECT arraySum(x -> materialize(10), range(number)) FROM system.numbers LIMIT 10; +SELECT '---all---'; +SELECT arrayAll(x -> materialize(0), emptyArrayUInt8()); +SELECT arrayAll(x -> materialize(0), [1, 2, 3]); +SELECT arrayAll(x -> materialize(0), range(number)) FROM system.numbers LIMIT 10; +SELECT arrayAll(x -> materialize(1), emptyArrayUInt8()); +SELECT arrayAll(x -> materialize(1), [1, 2, 3]); +SELECT arrayAll(x -> materialize(1), range(number)) FROM system.numbers LIMIT 10; +SELECT '---exists---'; +SELECT arrayExists(x -> materialize(0), emptyArrayUInt8()); +SELECT arrayExists(x -> materialize(0), [1, 2, 3]); +SELECT arrayExists(x -> materialize(0), range(number)) FROM system.numbers LIMIT 10; +SELECT arrayExists(x -> materialize(1), emptyArrayUInt8()); +SELECT arrayExists(x -> materialize(1), [1, 2, 3]); +SELECT arrayExists(x -> materialize(1), range(number)) FROM system.numbers LIMIT 10; +SELECT '---first---'; +SELECT arrayFirst(x -> materialize(0), emptyArrayUInt8()); +SELECT arrayFirst(x -> materialize(0), [1, 2, 3]); +SELECT arrayFirst(x -> materialize(0), range(number)) FROM system.numbers LIMIT 10; +SELECT arrayFirst(x -> materialize(1), emptyArrayUInt8()); +SELECT arrayFirst(x -> materialize(1), [1, 2, 3]); +SELECT arrayFirst(x -> materialize(1), range(number)) FROM system.numbers LIMIT 10; +SELECT '---first index---'; +SELECT arrayFirstIndex(x -> materialize(0), emptyArrayUInt8()); +SELECT arrayFirstIndex(x -> materialize(0), [1, 2, 3]); +SELECT arrayFirstIndex(x -> materialize(0), range(number)) FROM system.numbers LIMIT 10; +SELECT arrayFirstIndex(x -> materialize(1), emptyArrayUInt8()); +SELECT arrayFirstIndex(x -> materialize(1), [1, 2, 3]); +SELECT arrayFirstIndex(x -> materialize(1), range(number)) FROM system.numbers LIMIT 10; + +SELECT '---map--'; +SELECT arrayMap(x -> 123, emptyArrayString()); +SELECT arrayMap(x -> 123, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayMap(x -> 123, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---filter--'; +SELECT arrayFilter(x -> 0, emptyArrayString()); +SELECT arrayFilter(x -> 0, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFilter(x -> 0, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayFilter(x -> 1, emptyArrayString()); +SELECT arrayFilter(x -> 1, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFilter(x -> 1, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---count---'; +SELECT arrayCount(x -> 0, emptyArrayString()); +SELECT arrayCount(x -> 0, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayCount(x -> 0, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayCount(x -> 1, emptyArrayString()); +SELECT arrayCount(x -> 1, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayCount(x -> 1, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---sum---'; +SELECT arraySum(x -> 0, emptyArrayString()); +SELECT arraySum(x -> 0, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arraySum(x -> 0, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arraySum(x -> 10, emptyArrayString()); +SELECT arraySum(x -> 10, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arraySum(x -> 10, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---all---'; +SELECT arrayAll(x -> 0, emptyArrayString()); +SELECT arrayAll(x -> 0, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayAll(x -> 0, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayAll(x -> 1, emptyArrayString()); +SELECT arrayAll(x -> 1, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayAll(x -> 1, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---exists---'; +SELECT arrayExists(x -> 0, emptyArrayString()); +SELECT arrayExists(x -> 0, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayExists(x -> 0, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayExists(x -> 1, emptyArrayString()); +SELECT arrayExists(x -> 1, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayExists(x -> 1, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---first---'; +SELECT arrayFirst(x -> 0, emptyArrayString()); +SELECT arrayFirst(x -> 0, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFirst(x -> 0, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayFirst(x -> 1, emptyArrayString()); +SELECT arrayFirst(x -> 1, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFirst(x -> 1, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---first index---'; +SELECT arrayFirstIndex(x -> 0, emptyArrayString()); +SELECT arrayFirstIndex(x -> 0, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFirstIndex(x -> 0, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayFirstIndex(x -> 1, emptyArrayString()); +SELECT arrayFirstIndex(x -> 1, arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFirstIndex(x -> 1, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; + +SELECT '---map--'; +SELECT arrayMap(x -> materialize(123), emptyArrayString()); +SELECT arrayMap(x -> materialize(123), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayMap(x -> materialize(123), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---filter--'; +SELECT arrayFilter(x -> materialize(0), emptyArrayString()); +SELECT arrayFilter(x -> materialize(0), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFilter(x -> materialize(0), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayFilter(x -> materialize(1), emptyArrayString()); +SELECT arrayFilter(x -> materialize(1), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFilter(x -> materialize(1), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---count---'; +SELECT arrayCount(x -> materialize(0), emptyArrayString()); +SELECT arrayCount(x -> materialize(0), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayCount(x -> materialize(0), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayCount(x -> materialize(1), emptyArrayString()); +SELECT arrayCount(x -> materialize(1), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayCount(x -> materialize(1), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---sum---'; +SELECT arraySum(x -> materialize(0), emptyArrayString()); +SELECT arraySum(x -> materialize(0), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arraySum(x -> materialize(0), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arraySum(x -> materialize(10), emptyArrayString()); +SELECT arraySum(x -> materialize(10), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arraySum(x -> materialize(10), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---all---'; +SELECT arrayAll(x -> materialize(0), emptyArrayString()); +SELECT arrayAll(x -> materialize(0), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayAll(x -> materialize(0), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayAll(x -> materialize(1), emptyArrayString()); +SELECT arrayAll(x -> materialize(1), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayAll(x -> materialize(1), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---exists---'; +SELECT arrayExists(x -> materialize(0), emptyArrayString()); +SELECT arrayExists(x -> materialize(0), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayExists(x -> materialize(0), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayExists(x -> materialize(1), emptyArrayString()); +SELECT arrayExists(x -> materialize(1), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayExists(x -> materialize(1), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---first---'; +SELECT arrayFirst(x -> materialize(0), emptyArrayString()); +SELECT arrayFirst(x -> materialize(0), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFirst(x -> materialize(0), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayFirst(x -> materialize(1), emptyArrayString()); +SELECT arrayFirst(x -> materialize(1), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFirst(x -> materialize(1), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT '---first index---'; +SELECT arrayFirstIndex(x -> materialize(0), emptyArrayString()); +SELECT arrayFirstIndex(x -> materialize(0), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFirstIndex(x -> materialize(0), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayFirstIndex(x -> materialize(1), emptyArrayString()); +SELECT arrayFirstIndex(x -> materialize(1), arrayMap(x -> toString(x), [1, 2, 3])); +SELECT arrayFirstIndex(x -> materialize(1), arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; + + +SELECT '--- ---'; +SELECT arrayMap(x -> number % 2, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayFilter(x -> number % 2, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayCount(x -> number % 2, range(number)) FROM system.numbers LIMIT 10; +SELECT arraySum(x -> number % 2, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayAll(x -> number % 2, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayExists(x -> number % 2, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayFirst(x -> number % 2, range(number)) FROM system.numbers LIMIT 10; +SELECT arrayFirstIndex(x -> number % 2, range(number)) FROM system.numbers LIMIT 10; +SELECT '--- ---'; +SELECT arrayMap(x -> number % 2, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayFilter(x -> number % 2, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayCount(x -> number % 2, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arraySum(x -> number % 2, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayAll(x -> number % 2, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayExists(x -> number % 2, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayFirst(x -> number % 2, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; +SELECT arrayFirstIndex(x -> number % 2, arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; From 4cebee36eafb1cce72840118e6a4d10d2adb3caf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jun 2015 09:38:18 +0300 Subject: [PATCH 45/67] dbms: added setting 'skip_unavailable_shards' [#METR-17059]. --- dbms/include/DB/Client/ConnectionPool.h | 6 +++--- dbms/include/DB/Client/ConnectionPoolWithFailover.h | 12 ++++++------ dbms/include/DB/Client/ParallelReplicas.h | 8 ++++---- dbms/include/DB/DataStreams/RemoteBlockInputStream.h | 3 +++ .../include/DB/DataStreams/RemoteBlockOutputStream.h | 4 ++-- dbms/include/DB/Interpreters/Settings.h | 3 +++ dbms/src/Client/ParallelReplicas.cpp | 7 ++++--- .../00183_skip_unavailable_shards.reference | 1 + .../0_stateless/00183_skip_unavailable_shards.sql | 1 + 9 files changed, 27 insertions(+), 18 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00183_skip_unavailable_shards.reference create mode 100644 dbms/tests/queries/0_stateless/00183_skip_unavailable_shards.sql diff --git a/dbms/include/DB/Client/ConnectionPool.h b/dbms/include/DB/Client/ConnectionPool.h index 3e65e149a74..c4dd3d69a60 100644 --- a/dbms/include/DB/Client/ConnectionPool.h +++ b/dbms/include/DB/Client/ConnectionPool.h @@ -26,13 +26,13 @@ class IConnectionPool : private boost::noncopyable { public: typedef PoolBase::Entry Entry; - virtual Entry get(Settings * settings = nullptr) = 0; + virtual Entry get(const Settings * settings = nullptr) = 0; /** Выделяет до указанного количества соединений для работы. * Соединения предоставляют доступ к разным репликам одного шарда. * Выкидывает исключение, если не удалось выделить ни одного соединения. */ - virtual std::vector getMany(Settings * settings = nullptr) + virtual std::vector getMany(const Settings * settings = nullptr) { return std::vector{ get(settings) }; } @@ -89,7 +89,7 @@ public: /** Выделяет соединение для работы. */ - Entry get(Settings * settings = nullptr) override + Entry get(const Settings * settings = nullptr) override { if (settings) return Base::get(settings->queue_max_wait_ms.totalMilliseconds()); diff --git a/dbms/include/DB/Client/ConnectionPoolWithFailover.h b/dbms/include/DB/Client/ConnectionPoolWithFailover.h index bded7ff3614..e2f42366341 100644 --- a/dbms/include/DB/Client/ConnectionPoolWithFailover.h +++ b/dbms/include/DB/Client/ConnectionPoolWithFailover.h @@ -21,11 +21,11 @@ namespace DB * * Замечание: если один из вложенных пулов заблокируется из-за переполнения, то этот пул тоже заблокируется. */ -class ConnectionPoolWithFailover : public PoolWithFailoverBase, public IConnectionPool +class ConnectionPoolWithFailover : public PoolWithFailoverBase, public IConnectionPool { public: typedef IConnectionPool::Entry Entry; - typedef PoolWithFailoverBase Base; + typedef PoolWithFailoverBase Base; ConnectionPoolWithFailover(ConnectionPools & nested_pools_, LoadBalancing load_balancing, @@ -52,7 +52,7 @@ public: } /** Выделяет соединение для работы. */ - Entry get(Settings * settings = nullptr) override + Entry get(const Settings * settings = nullptr) override { applyLoadBalancing(settings); return Base::get(settings); @@ -61,14 +61,14 @@ public: /** Выделяет до указанного количества соединений для работы. * Соединения предоставляют доступ к разным репликам одного шарда. */ - std::vector getMany(Settings * settings = nullptr) override + std::vector getMany(const Settings * settings = nullptr) override { applyLoadBalancing(settings); return Base::getMany(settings); } protected: - bool tryGet(ConnectionPoolPtr pool, Settings * settings, Entry & out_entry, std::stringstream & fail_message) override + bool tryGet(ConnectionPoolPtr pool, const Settings * settings, Entry & out_entry, std::stringstream & fail_message) override { try { @@ -90,7 +90,7 @@ private: std::vector hostname_differences; /// Расстояния от имени этого хоста до имен хостов пулов. LoadBalancing default_load_balancing; - void applyLoadBalancing(Settings * settings) + void applyLoadBalancing(const Settings * settings) { LoadBalancing load_balancing = default_load_balancing; if (settings) diff --git a/dbms/include/DB/Client/ParallelReplicas.h b/dbms/include/DB/Client/ParallelReplicas.h index 27e1b7782c1..97881eeec27 100644 --- a/dbms/include/DB/Client/ParallelReplicas.h +++ b/dbms/include/DB/Client/ParallelReplicas.h @@ -21,17 +21,17 @@ class ParallelReplicas final : private boost::noncopyable { public: /// Принимает готовое соединение. - ParallelReplicas(Connection * connection_, Settings * settings_, ThrottlerPtr throttler_); + ParallelReplicas(Connection * connection_, const Settings * settings_, ThrottlerPtr throttler_); /// Принимает пул, из которого нужно будет достать одно или несколько соединений. - ParallelReplicas(IConnectionPool * pool_, Settings * settings_, ThrottlerPtr throttler_); + ParallelReplicas(IConnectionPool * pool_, const Settings * settings_, ThrottlerPtr throttler_); /// Отправить на реплики всё содержимое внешних таблиц. void sendExternalTablesData(std::vector & data); /// Отправить запрос на реплики. void sendQuery(const String & query, const String & query_id = "", - UInt64 stage = QueryProcessingStage::Complete, bool with_pending_data = false); + UInt64 stage = QueryProcessingStage::Complete, bool with_pending_data = false); /// Получить пакет от какой-нибудь реплики. Connection::Packet receivePacket(); @@ -82,7 +82,7 @@ private: void invalidateReplica(ReplicaMap::iterator it); private: - Settings * settings; + const Settings * settings; ReplicaMap replica_map; /// Если не nullptr, то используется, чтобы ограничить сетевой трафик. diff --git a/dbms/include/DB/DataStreams/RemoteBlockInputStream.h b/dbms/include/DB/DataStreams/RemoteBlockInputStream.h index 90c66edba95..30b044c5488 100644 --- a/dbms/include/DB/DataStreams/RemoteBlockInputStream.h +++ b/dbms/include/DB/DataStreams/RemoteBlockInputStream.h @@ -136,6 +136,9 @@ protected: { createParallelReplicas(); + if (settings.skip_unavailable_shards && 0 == parallel_replicas->size()) + return Block(); + established = true; parallel_replicas->sendQuery(query, "", stage, true); diff --git a/dbms/include/DB/DataStreams/RemoteBlockOutputStream.h b/dbms/include/DB/DataStreams/RemoteBlockOutputStream.h index 440750542f6..39f93e671a6 100644 --- a/dbms/include/DB/DataStreams/RemoteBlockOutputStream.h +++ b/dbms/include/DB/DataStreams/RemoteBlockOutputStream.h @@ -15,7 +15,7 @@ namespace DB class RemoteBlockOutputStream : public IBlockOutputStream { public: - RemoteBlockOutputStream(Connection & connection_, const String & query_, Settings * settings_ = nullptr) + RemoteBlockOutputStream(Connection & connection_, const String & query_, const Settings * settings_ = nullptr) : connection(connection_), query(query_), settings(settings_) { } @@ -105,7 +105,7 @@ public: private: Connection & connection; String query; - Settings * settings; + const Settings * settings; Block sample_block; }; diff --git a/dbms/include/DB/Interpreters/Settings.h b/dbms/include/DB/Interpreters/Settings.h index 2111ede1ee1..405f23ca396 100644 --- a/dbms/include/DB/Interpreters/Settings.h +++ b/dbms/include/DB/Interpreters/Settings.h @@ -97,6 +97,9 @@ struct Settings M(SettingUInt64, parallel_replicas_count, 0) \ M(SettingUInt64, parallel_replica_offset, 0) \ \ + /** Тихо пропускать недоступные шарды. */ \ + M(SettingBool, skip_unavailable_shards, false) \ + \ /** Тонкие настройки для чтения из MergeTree */ \ \ /** Если из одного файла читается хотя бы столько строк, чтение можно распараллелить. */ \ diff --git a/dbms/src/Client/ParallelReplicas.cpp b/dbms/src/Client/ParallelReplicas.cpp index 81d629d392c..135c1b06aa0 100644 --- a/dbms/src/Client/ParallelReplicas.cpp +++ b/dbms/src/Client/ParallelReplicas.cpp @@ -3,7 +3,7 @@ namespace DB { -ParallelReplicas::ParallelReplicas(Connection * connection_, Settings * settings_, ThrottlerPtr throttler_) +ParallelReplicas::ParallelReplicas(Connection * connection_, const Settings * settings_, ThrottlerPtr throttler_) : settings(settings_), throttler(throttler_), active_replica_count(1), supports_parallel_execution(false) @@ -11,7 +11,7 @@ ParallelReplicas::ParallelReplicas(Connection * connection_, Settings * settings registerReplica(connection_); } -ParallelReplicas::ParallelReplicas(IConnectionPool * pool_, Settings * settings_, ThrottlerPtr throttler_) +ParallelReplicas::ParallelReplicas(IConnectionPool * pool_, const Settings * settings_, ThrottlerPtr throttler_) : settings(settings_), throttler(throttler_) { if (pool_ == nullptr) @@ -37,7 +37,8 @@ ParallelReplicas::ParallelReplicas(IConnectionPool * pool_, Settings * settings_ supports_parallel_execution = false; pool_entry = pool_->get(settings); - registerReplica(&*pool_entry); + if (!pool_entry.isNull()) + registerReplica(&*pool_entry); } } diff --git a/dbms/tests/queries/0_stateless/00183_skip_unavailable_shards.reference b/dbms/tests/queries/0_stateless/00183_skip_unavailable_shards.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00183_skip_unavailable_shards.reference @@ -0,0 +1 @@ +2 diff --git a/dbms/tests/queries/0_stateless/00183_skip_unavailable_shards.sql b/dbms/tests/queries/0_stateless/00183_skip_unavailable_shards.sql new file mode 100644 index 00000000000..4824bdba5ad --- /dev/null +++ b/dbms/tests/queries/0_stateless/00183_skip_unavailable_shards.sql @@ -0,0 +1 @@ +SELECT count() FROM remote('{127,1}.0.0.{1,2}', system.one) SETTINGS skip_unavailable_shards = 1; \ No newline at end of file From efc741efb0b51850b1c9cac6ee6b90f598aa3f60 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jun 2015 09:48:42 +0300 Subject: [PATCH 46/67] dbms: added setting 'distributed_group_by_no_merge' [#METR-17060]. --- dbms/include/DB/Interpreters/Settings.h | 5 +++++ dbms/src/Storages/StorageDistributed.cpp | 2 +- .../00184_distributed_group_by_no_merge.reference | 2 ++ .../0_stateless/00184_distributed_group_by_no_merge.sql | 1 + 4 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00184_distributed_group_by_no_merge.reference create mode 100644 dbms/tests/queries/0_stateless/00184_distributed_group_by_no_merge.sql diff --git a/dbms/include/DB/Interpreters/Settings.h b/dbms/include/DB/Interpreters/Settings.h index 405f23ca396..2bc4c0bca4a 100644 --- a/dbms/include/DB/Interpreters/Settings.h +++ b/dbms/include/DB/Interpreters/Settings.h @@ -100,6 +100,11 @@ struct Settings /** Тихо пропускать недоступные шарды. */ \ M(SettingBool, skip_unavailable_shards, false) \ \ + /** Не мерджить состояния агрегации с разных серверов при распределённой обработке запроса \ + * - на случай, когда доподлинно известно, что на разных шардах разные ключи. \ + */ \ + M(SettingBool, distributed_group_by_no_merge, false) \ + \ /** Тонкие настройки для чтения из MergeTree */ \ \ /** Если из одного файла читается хотя бы столько строк, чтение можно распараллелить. */ \ diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index a807f8d1f69..700f4c8a766 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -154,7 +154,7 @@ BlockInputStreams StorageDistributed::read( size_t result_size = (cluster.pools.size() * settings.max_parallel_replicas) + cluster.getLocalNodesNum(); - processed_stage = result_size == 1 + processed_stage = result_size == 1 || settings.distributed_group_by_no_merge ? QueryProcessingStage::Complete : QueryProcessingStage::WithMergeableState; diff --git a/dbms/tests/queries/0_stateless/00184_distributed_group_by_no_merge.reference b/dbms/tests/queries/0_stateless/00184_distributed_group_by_no_merge.reference new file mode 100644 index 00000000000..1957f3a9604 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00184_distributed_group_by_no_merge.reference @@ -0,0 +1,2 @@ +1 1 +1 1 diff --git a/dbms/tests/queries/0_stateless/00184_distributed_group_by_no_merge.sql b/dbms/tests/queries/0_stateless/00184_distributed_group_by_no_merge.sql new file mode 100644 index 00000000000..f44fdca0836 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00184_distributed_group_by_no_merge.sql @@ -0,0 +1 @@ +SELECT count(), uniq(dummy) FROM remote('127.0.0.{1,2}', system.one) SETTINGS distributed_group_by_no_merge = 1; From 9d0f0bf334147d6960fcdf3df6eca15dc00859f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jun 2015 10:05:42 +0300 Subject: [PATCH 47/67] Client: added parameter --progress [#METR-17001]. --- dbms/src/Client/Client.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dbms/src/Client/Client.cpp b/dbms/src/Client/Client.cpp index 136e9aa4325..88e087a7457 100644 --- a/dbms/src/Client/Client.cpp +++ b/dbms/src/Client/Client.cpp @@ -96,6 +96,7 @@ private: }; bool is_interactive = true; /// Использовать readline интерфейс или batch режим. + bool need_render_progress = true; /// Рисовать прогресс выполнения запроса. bool print_time_to_stderr = false; /// В неинтерактивном режиме, выводить время выполнения в stderr. bool stdin_is_not_tty = false; /// stdin - не терминал. @@ -268,6 +269,9 @@ private: insert_format = "Values"; insert_format_max_block_size = config().getInt("insert_format_max_block_size", context.getSettingsRef().max_insert_block_size); + if (!is_interactive) + need_render_progress = config().getBool("progress", false); + connect(); if (is_interactive) @@ -906,7 +910,7 @@ private: void writeProgress() { - if (!is_interactive) + if (!need_render_progress) return; static size_t increment = 0; @@ -1053,6 +1057,7 @@ public: ("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command") ("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)") ("stacktrace", "print stack traces of exceptions") + ("progress", "print progress even in non-interactive mode") APPLY_FOR_SETTINGS(DECLARE_SETTING) APPLY_FOR_LIMITS(DECLARE_LIMIT) ; @@ -1171,6 +1176,8 @@ public: config().setBool("vertical", true); if (options.count("stacktrace")) config().setBool("stacktrace", true); + if (options.count("progress")) + config().setBool("progress", true); if (options.count("time")) print_time_to_stderr = true; } From c8b4a8f1bedab88544dd0f74c45ad4f253e2f555 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jun 2015 10:34:58 +0300 Subject: [PATCH 48/67] dbms: lowered default query_log.flush_interval_milliseconds to 7500 [#METR-16946]. --- dbms/include/DB/Core/Defines.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Core/Defines.h b/dbms/include/DB/Core/Defines.h index f677967fa31..e25fb73fc80 100644 --- a/dbms/include/DB/Core/Defines.h +++ b/dbms/include/DB/Core/Defines.h @@ -78,7 +78,7 @@ /// Граница, на которых должны быть выровнены блоки для асинхронных файловых операций. #define DEFAULT_AIO_FILE_BLOCK_SIZE 4096 -#define DEFAULT_QUERY_LOG_FLUSH_INTERVAL_MILLISECONDS_STR "10000" +#define DEFAULT_QUERY_LOG_FLUSH_INTERVAL_MILLISECONDS_STR "7500" #define ALWAYS_INLINE __attribute__((__always_inline__)) #define NO_INLINE __attribute__((__noinline__)) From b308d79f5e8d195fe066653b12dacf289c12f34a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jun 2015 07:54:52 +0300 Subject: [PATCH 49/67] dbms: improved performance of query parsing and analyzing with very long arrays [#METR-17061]. --- dbms/include/DB/DataTypes/FieldToDataType.h | 33 ++-- .../DB/Parsers/ExpressionElementParsers.h | 14 ++ dbms/src/Columns/ColumnConst.cpp | 3 +- dbms/src/DataTypes/FieldToDataType.cpp | 170 ++++++++++++++++++ dbms/src/Interpreters/ExpressionAnalyzer.cpp | 1 + dbms/src/Parsers/ExpressionElementParsers.cpp | 57 ++++++ .../00185_array_literals.reference | 23 +++ .../0_stateless/00185_array_literals.sql | 30 ++++ 8 files changed, 314 insertions(+), 17 deletions(-) create mode 100644 dbms/src/DataTypes/FieldToDataType.cpp create mode 100644 dbms/tests/queries/0_stateless/00185_array_literals.reference create mode 100644 dbms/tests/queries/0_stateless/00185_array_literals.sql diff --git a/dbms/include/DB/DataTypes/FieldToDataType.h b/dbms/include/DB/DataTypes/FieldToDataType.h index 455a9c1218a..846fda28ee2 100644 --- a/dbms/include/DB/DataTypes/FieldToDataType.h +++ b/dbms/include/DB/DataTypes/FieldToDataType.h @@ -4,51 +4,52 @@ #include #include +#include +#include + namespace DB { -/// Для заданного значения Field возвращает минимальный тип данных, позволяющий хранить значение этого типа. +/** Для заданного значения Field возвращает минимальный тип данных, позволяющий хранить значение этого типа. + * В случае, если Field - массив, конвертирует все элементы к общему типу. + */ class FieldToDataType : public StaticVisitor { public: - DataTypePtr operator() (const Null & x) const + DataTypePtr operator() (Null & x) const { - throw Exception("NULL literals is not implemented yet", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("NULL literals are not implemented yet", ErrorCodes::NOT_IMPLEMENTED); } - - DataTypePtr operator() (const UInt64 & x) const + + DataTypePtr operator() (UInt64 & x) const { if (x <= std::numeric_limits::max()) return new DataTypeUInt8; if (x <= std::numeric_limits::max()) return new DataTypeUInt16; if (x <= std::numeric_limits::max()) return new DataTypeUInt32; return new DataTypeUInt64; } - - DataTypePtr operator() (const Int64 & x) const + + DataTypePtr operator() (Int64 & x) const { if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return new DataTypeInt8; if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return new DataTypeInt16; if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return new DataTypeInt32; return new DataTypeInt64; } - - DataTypePtr operator() (const Float64 & x) const + + DataTypePtr operator() (Float64 & x) const { return new DataTypeFloat64; } - - DataTypePtr operator() (const String & x) const + + DataTypePtr operator() (String & x) const { return new DataTypeString; } - DataTypePtr operator() (const Array & x) const - { - return new DataTypeArray(apply_visitor(FieldToDataType(), x.at(0))); - } + DataTypePtr operator() (Array & x) const; }; - } diff --git a/dbms/include/DB/Parsers/ExpressionElementParsers.h b/dbms/include/DB/Parsers/ExpressionElementParsers.h index 6ea723f6742..e060a47fc17 100644 --- a/dbms/include/DB/Parsers/ExpressionElementParsers.h +++ b/dbms/include/DB/Parsers/ExpressionElementParsers.h @@ -100,6 +100,20 @@ protected: }; +/** Массив литералов. + * Массивы могут распарситься и как применение оператора []. + * Но парсинг всего массива как целой константы серьёзно ускоряет анализ выражений в случае очень больших массивов. + * Мы пробуем распарсить массив как массив литералов сначала (fast path), + * а если не получилось (когда массив состоит из сложных выражений) - парсим как применение оператора [] (slow path). + */ +class ParserArrayOfLiterals : public IParserBase +{ +protected: + const char * getName() const { return "array"; } + bool parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected); +}; + + /** Литерал - одно из: NULL, UInt64, Int64, Float64, String. */ class ParserLiteral : public IParserBase diff --git a/dbms/src/Columns/ColumnConst.cpp b/dbms/src/Columns/ColumnConst.cpp index 0a8c1170ce2..ec78c294464 100644 --- a/dbms/src/Columns/ColumnConst.cpp +++ b/dbms/src/Columns/ColumnConst.cpp @@ -1,5 +1,6 @@ -#include +#include #include +#include #include #include diff --git a/dbms/src/DataTypes/FieldToDataType.cpp b/dbms/src/DataTypes/FieldToDataType.cpp new file mode 100644 index 00000000000..52e0d611b8f --- /dev/null +++ b/dbms/src/DataTypes/FieldToDataType.cpp @@ -0,0 +1,170 @@ +#include + +namespace DB +{ + + +template +static void convertArrayToCommonType(Array & arr) +{ + for (auto & elem : arr) + elem = apply_visitor(FieldVisitorConvertToNumber(), elem); +} + + +DataTypePtr FieldToDataType::operator() (Array & x) const +{ + if (x.empty()) + throw Exception("Cannot infer type of empty array", ErrorCodes::EMPTY_DATA_PASSED); + + /** Тип массива нужно вывести по типу его элементов. + * Если элементы - числа, то нужно выбрать наименьший общий тип, если такой есть, + * или кинуть исключение. + * Код похож на NumberTraits::ResultOfIf, но тем кодом трудно здесь непосредственно воспользоваться. + * + * Также заметим, что Float32 не выводится, вместо этого используется только Float64. + * Это сделано потому что литералов типа Float32 не бывает в запросе. + */ + + bool has_string = false; + bool has_array = false; + bool has_float = false; + int max_bits = 0; + int max_signed_bits = 0; + int max_unsigned_bits = 0; + + for (const Field & elem : x) + { + switch (elem.getType()) + { + case Field::Types::UInt64: + { + UInt64 num = elem.get(); + if (num <= std::numeric_limits::max()) + max_unsigned_bits = std::max(8, max_unsigned_bits); + else if (num <= std::numeric_limits::max()) + max_unsigned_bits = std::max(16, max_unsigned_bits); + else if (num <= std::numeric_limits::max()) + max_unsigned_bits = std::max(32, max_unsigned_bits); + else + max_unsigned_bits = 64; + max_bits = std::max(max_unsigned_bits, max_bits); + break; + } + case Field::Types::Int64: + { + Int64 num = elem.get(); + if (num <= std::numeric_limits::max() && num >= std::numeric_limits::min()) + max_signed_bits = std::max(8, max_signed_bits); + else if (num <= std::numeric_limits::max() && num >= std::numeric_limits::min()) + max_signed_bits = std::max(16, max_signed_bits); + else if (num <= std::numeric_limits::max() && num >= std::numeric_limits::min()) + max_signed_bits = std::max(32, max_signed_bits); + else + max_signed_bits = 64; + max_bits = std::max(max_signed_bits, max_bits); + break; + } + case Field::Types::Float64: + { + has_float = true; + break; + } + case Field::Types::String: + { + has_string = true; + break; + } + case Field::Types::Array: + { + has_array = true; + break; + } + case Field::Types::Null: + { + throw Exception("NULL literals are not implemented yet", ErrorCodes::NOT_IMPLEMENTED); + break; + } + } + } + + if ((has_string + has_array + (max_bits > 0)) > 1) + throw Exception("Incompatible types of elements of array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (has_array) + throw Exception("Type inference of multidimensional arrays is not supported", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (has_string) + return new DataTypeArray(new DataTypeString); + + if (has_float && max_bits == 64) + throw Exception("Incompatible types Float64 and UInt64/Int64 of elements of array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (has_float) + { + convertArrayToCommonType(x); + return new DataTypeArray(new DataTypeFloat64); + } + + if (max_signed_bits == 64 && max_unsigned_bits == 64) + throw Exception("Incompatible types UInt64 and Int64 of elements of array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (max_signed_bits && !max_unsigned_bits) + { + if (max_signed_bits == 8) + return new DataTypeArray(new DataTypeInt8); + if (max_signed_bits == 16) + return new DataTypeArray(new DataTypeInt16); + if (max_signed_bits == 32) + return new DataTypeArray(new DataTypeInt32); + if (max_signed_bits == 64) + return new DataTypeArray(new DataTypeInt64); + } + + if (!max_signed_bits && max_unsigned_bits) + { + if (max_unsigned_bits == 8) + return new DataTypeArray(new DataTypeUInt8); + if (max_unsigned_bits == 16) + return new DataTypeArray(new DataTypeUInt16); + if (max_unsigned_bits == 32) + return new DataTypeArray(new DataTypeUInt32); + if (max_unsigned_bits == 64) + return new DataTypeArray(new DataTypeUInt64); + } + + if (max_signed_bits && max_unsigned_bits) + { + convertArrayToCommonType(x); + + if (max_unsigned_bits >= max_signed_bits) + { + /// Беззнаковый тип не помещается в знаковый. Надо увеличить количество бит. + if (max_bits == 8) + return new DataTypeArray(new DataTypeInt16); + if (max_bits == 16) + return new DataTypeArray(new DataTypeInt32); + if (max_bits == 32) + return new DataTypeArray(new DataTypeInt64); + else + throw Exception("Incompatible types UInt64 and signed integer of elements of array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + else + { + /// Беззнаковый тип помещается в знаковый. + if (max_bits == 8) + return new DataTypeArray(new DataTypeInt8); + if (max_bits == 16) + return new DataTypeArray(new DataTypeInt16); + if (max_bits == 32) + return new DataTypeArray(new DataTypeInt32); + if (max_bits == 64) + return new DataTypeArray(new DataTypeInt64); + } + } + + throw Exception("Incompatible types of elements of array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); +} + + +} diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 56cfd614b8c..60d7b502fb0 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1399,6 +1399,7 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl else if (ASTLiteral * node = typeid_cast(&*ast)) { DataTypePtr type = apply_visitor(FieldToDataType(), node->value); + ColumnWithNameAndType column; column.column = type->createConstColumn(1, node->value); column.type = type; diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 464763c2165..1209af32578 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -405,6 +405,59 @@ bool ParserStringLiteral::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max } +bool ParserArrayOfLiterals::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected) +{ + Pos begin = pos; + Array arr; + + if (pos == end || *pos != '[') + { + expected = "opening square bracket"; + return false; + } + + ParserWhiteSpaceOrComments ws; + ParserLiteral literal_p; + + ++pos; + + while (pos != end) + { + ws.ignore(pos, end); + + if (!arr.empty()) + { + if (*pos == ']') + { + ++pos; + node = new ASTLiteral(StringRange(begin, pos), arr); + return true; + } + else if (*pos == ',') + { + ++pos; + } + else + { + expected = "comma or closing square bracket"; + return false; + } + } + + ws.ignore(pos, end); + + ASTPtr literal_node; + if (!literal_p.parse(pos, end, literal_node, max_parsed_pos, expected)) + return false; + + arr.push_back(typeid_cast(*literal_node).value); + } + + expected = "closing square bracket"; + return false; +} + + bool ParserLiteral::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected) { ParserNull null_p; @@ -450,6 +503,7 @@ bool ParserExpressionElement::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & ParserParenthesisExpression paren_p; ParserSubquery subquery_p; ParserArray array_p; + ParserArrayOfLiterals array_lite_p; ParserLiteral lit_p; ParserFunction fun_p; ParserCompoundIdentifier id_p; @@ -461,6 +515,9 @@ bool ParserExpressionElement::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & if (paren_p.parse(pos, end, node, max_parsed_pos, expected)) return true; + if (array_lite_p.parse(pos, end, node, max_parsed_pos, expected)) + return true; + if (array_p.parse(pos, end, node, max_parsed_pos, expected)) return true; diff --git a/dbms/tests/queries/0_stateless/00185_array_literals.reference b/dbms/tests/queries/0_stateless/00185_array_literals.reference new file mode 100644 index 00000000000..20d1f61ef8d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00185_array_literals.reference @@ -0,0 +1,23 @@ +[1,2] +[1,2] +[-1,2] +[-1,1000] +[-1,1000000] +[-1000,2] +[-1000000,2] +['Hello','world'] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256] +[1,2] +[1,2] +[-1,2] +[-1,1000] +[-1,1000000] +[-1000,2] +[-1000000,2] +['Hello','world'] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256] +[0] +[0] +[0] +[0] +[0] diff --git a/dbms/tests/queries/0_stateless/00185_array_literals.sql b/dbms/tests/queries/0_stateless/00185_array_literals.sql new file mode 100644 index 00000000000..5d6234c8bc6 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00185_array_literals.sql @@ -0,0 +1,30 @@ +SELECT [1, 2]; +SELECT [1.0, 2]; +SELECT [-1, 2]; +SELECT [-1, 1000]; +SELECT [-1, 1000000]; +SELECT [-1000, 2]; +SELECT [-1000000, 2]; +SELECT ['Hello', 'world']; + +SELECT [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256]; + +SELECT [1, 1 + 1]; +SELECT [1.0, 1 + 1]; +SELECT [-1, 1 + 1]; +SELECT [-1, toUInt16(1000)]; +SELECT [-1, toUInt32(1000000)]; +SELECT [-1000, 1 + 1]; +SELECT [-1000000, 1 + 1]; +SELECT ['Hello', concat('wor', 'ld')]; + +SELECT [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256 + 0]; + +SELECT [0]; +SELECT [0 ]; +SELECT [ 0]; +SELECT [ 0 ]; +SELECT +[ + 0 +]; From 9527f99d0705d8680f77b1c4f338dfcf50eb2788 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jun 2015 08:46:55 +0300 Subject: [PATCH 50/67] dbms: improved performance for very long arrays [#METR-17061]. --- dbms/include/DB/Common/SipHash.h | 1 + dbms/include/DB/Parsers/ASTLiteral.h | 2 +- dbms/src/Parsers/ASTLiteral.cpp | 82 ++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 dbms/src/Parsers/ASTLiteral.cpp diff --git a/dbms/include/DB/Common/SipHash.h b/dbms/include/DB/Common/SipHash.h index 280d83a9b84..4296995a9ce 100644 --- a/dbms/include/DB/Common/SipHash.h +++ b/dbms/include/DB/Common/SipHash.h @@ -12,6 +12,7 @@ */ #include +#include #define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) ) diff --git a/dbms/include/DB/Parsers/ASTLiteral.h b/dbms/include/DB/Parsers/ASTLiteral.h index b7b24ef56db..2a610255be8 100644 --- a/dbms/include/DB/Parsers/ASTLiteral.h +++ b/dbms/include/DB/Parsers/ASTLiteral.h @@ -20,7 +20,7 @@ public: ASTLiteral() = default; ASTLiteral(const StringRange range_, const Field & value_) : ASTWithAlias(range_), value(value_) {} - String getColumnName() const override { return apply_visitor(FieldVisitorToString(), value); } + String getColumnName() const override; /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Literal_" + apply_visitor(FieldVisitorDump(), value); } diff --git a/dbms/src/Parsers/ASTLiteral.cpp b/dbms/src/Parsers/ASTLiteral.cpp new file mode 100644 index 00000000000..13a35f4ab5c --- /dev/null +++ b/dbms/src/Parsers/ASTLiteral.cpp @@ -0,0 +1,82 @@ +#include +#include + + +namespace DB +{ + + +/** Обновляет SipHash по данным Field */ +class FieldVisitorHash : public StaticVisitor<> +{ +private: + SipHash & hash; +public: + FieldVisitorHash(SipHash & hash) : hash(hash) {} + + void operator() (const Null & x) const + { + UInt8 type = Field::Types::Null; + hash.update(reinterpret_cast(&type), sizeof(type)); + } + + void operator() (const UInt64 & x) const + { + UInt8 type = Field::Types::UInt64; + hash.update(reinterpret_cast(&type), sizeof(type)); + hash.update(reinterpret_cast(&x), sizeof(x)); + } + + void operator() (const Int64 & x) const + { + UInt8 type = Field::Types::Int64; + hash.update(reinterpret_cast(&type), sizeof(type)); + hash.update(reinterpret_cast(&x), sizeof(x)); + } + + void operator() (const Float64 & x) const + { + UInt8 type = Field::Types::Float64; + hash.update(reinterpret_cast(&type), sizeof(type)); + hash.update(reinterpret_cast(&x), sizeof(x)); + } + + void operator() (const String & x) const + { + UInt8 type = Field::Types::String; + hash.update(reinterpret_cast(&type), sizeof(type)); + size_t size = x.size(); + hash.update(reinterpret_cast(&size), sizeof(size)); + hash.update(x.data(), x.size()); + } + + void operator() (const Array & x) const + { + UInt8 type = Field::Types::Array; + hash.update(reinterpret_cast(&type), sizeof(type)); + size_t size = x.size(); + hash.update(reinterpret_cast(&size), sizeof(size)); + + for (const auto & elem : x) + apply_visitor(*this, elem); + } +}; + + +String ASTLiteral::getColumnName() const +{ + /// Отдельный случай для очень больших массивов. Вместо указания всех элементов, будем использовать хэш от содержимого. +/* if (value.getType() == Field::Types::Array + && value.get().size() > 100) /// 100 - наугад. + { + SipHash hash; + apply_visitor(FieldVisitorHash(hash), value); + UInt64 low, high; + hash.get128(low, high); + return "__array_" + toString(low) + "_" + toString(high); + }*/ + + return apply_visitor(FieldVisitorToString(), value); +} + +} From 8c1d4358bc10c8fc00a4aaca85222d88f46300b8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jun 2015 08:48:08 +0300 Subject: [PATCH 51/67] dbms: addition to prev. revision [#METR-17061]. --- dbms/src/Parsers/ASTLiteral.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Parsers/ASTLiteral.cpp b/dbms/src/Parsers/ASTLiteral.cpp index 13a35f4ab5c..9c6392b1e0f 100644 --- a/dbms/src/Parsers/ASTLiteral.cpp +++ b/dbms/src/Parsers/ASTLiteral.cpp @@ -66,7 +66,7 @@ public: String ASTLiteral::getColumnName() const { /// Отдельный случай для очень больших массивов. Вместо указания всех элементов, будем использовать хэш от содержимого. -/* if (value.getType() == Field::Types::Array + if (value.getType() == Field::Types::Array && value.get().size() > 100) /// 100 - наугад. { SipHash hash; @@ -74,7 +74,7 @@ String ASTLiteral::getColumnName() const UInt64 low, high; hash.get128(low, high); return "__array_" + toString(low) + "_" + toString(high); - }*/ + } return apply_visitor(FieldVisitorToString(), value); } From d23de31335d0eea911a0f5e179014c66a0259a18 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jun 2015 10:41:23 +0300 Subject: [PATCH 52/67] dbms: improved performance for very long arrays [#METR-17061]. --- dbms/include/DB/Columns/ColumnConst.h | 126 +++++++++++++----- dbms/src/Columns/ColumnConst.cpp | 23 +++- .../00186_very_long_arrays.reference | 1 + .../0_stateless/00186_very_long_arrays.sh | 3 + 4 files changed, 118 insertions(+), 35 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00186_very_long_arrays.reference create mode 100755 dbms/tests/queries/0_stateless/00186_very_long_arrays.sh diff --git a/dbms/include/DB/Columns/ColumnConst.h b/dbms/include/DB/Columns/ColumnConst.h index 3286f707e7e..fad47e76f96 100644 --- a/dbms/include/DB/Columns/ColumnConst.h +++ b/dbms/include/DB/Columns/ColumnConst.h @@ -24,36 +24,51 @@ public: }; -/** шаблон для столбцов-констант (столбцов одинаковых значений). +/** Столбец-константа может содержать внутри себя само значение, + * или, в случае массивов, SharedPtr от значения-массива, + * чтобы избежать проблем производительности при копировании очень больших массивов. + * + * T - тип значения, + * DataHolder - как значение хранится в таблице (либо T, либо SharedPtr) + * Derived должен реализовать методы getDataFromHolderImpl - получить ссылку на значение из holder-а. + * + * Для строк и массивов реализации sizeOfField и byteSize могут быть некорректными. */ -template -class ColumnConst final : public IColumnConst +template +class ColumnConstBase : public IColumnConst { +protected: + size_t s; + DataHolder data; + DataTypePtr data_type; + + T & getDataFromHolder() { return static_cast(this)->getDataFromHolderImpl(); } + const T & getDataFromHolder() const { return static_cast(this)->getDataFromHolderImpl(); } + + ColumnConstBase(size_t s_, const DataHolder & data_, DataTypePtr data_type_) + : s(s_), data(data_), data_type(data_type_) {} + public: typedef T Type; typedef typename NearestFieldType::Type FieldType; - /// Для ColumnConst data_type_ должен быть ненулевым. - /// Для ColumnConst data_type_ должен быть ненулевым, если тип данных FixedString. - ColumnConst(size_t s_, const T & data_, DataTypePtr data_type_ = DataTypePtr()) : s(s_), data(data_), data_type(data_type_) {} - std::string getName() const override { return "ColumnConst<" + TypeName::get() + ">"; } bool isNumeric() const override { return IsNumber::value; } bool isFixed() const override { return IsNumber::value; } size_t sizeOfField() const override { return sizeof(T); } - ColumnPtr cloneResized(size_t s_) const override { return new ColumnConst(s_, data, data_type); } + ColumnPtr cloneResized(size_t s_) const override { return new Derived(s_, data, data_type); } size_t size() const override { return s; } - Field operator[](size_t n) const override { return FieldType(data); } - void get(size_t n, Field & res) const override { res = FieldType(data); } + Field operator[](size_t n) const override { return FieldType(getDataFromHolder()); } + void get(size_t n, Field & res) const override { res = FieldType(getDataFromHolder()); } ColumnPtr cut(size_t start, size_t length) const override { - return new ColumnConst(length, data, data_type); + return new Derived(length, data, data_type); } void insert(const Field & x) override { - if (x.get() != FieldType(data)) + if (x.get() != FieldType(getDataFromHolder())) throw Exception("Cannot insert different element into constant column " + getName(), ErrorCodes::CANNOT_INSERT_ELEMENT_INTO_CONSTANT_COLUMN); ++s; @@ -66,7 +81,7 @@ public: void insertFrom(const IColumn & src, size_t n) override { - if (data != static_cast &>(src).data) + if (getDataFromHolder() != static_cast(src).getDataFromHolder()) throw Exception("Cannot insert different element into constant column " + getName(), ErrorCodes::CANNOT_INSERT_ELEMENT_INTO_CONSTANT_COLUMN); ++s; @@ -79,7 +94,7 @@ public: if (s != filt.size()) throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - return new ColumnConst(countBytesInFilter(filt), data, data_type); + return new Derived(countBytesInFilter(filt), data, data_type); } ColumnPtr replicate(const Offsets_t & offsets) const override @@ -88,7 +103,7 @@ public: throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); size_t replicated_size = 0 == s ? 0 : offsets.back(); - return new ColumnConst(replicated_size, data, data_type); + return new Derived(replicated_size, data, data_type); } size_t byteSize() const override { return sizeof(data) + sizeof(s); } @@ -103,13 +118,13 @@ public: if (perm.size() < limit) throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - return new ColumnConst(limit, data, data_type); + return new Derived(limit, data, data_type); } int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { - const ColumnConst & rhs = static_cast &>(rhs_); - return data < rhs.data /// TODO: правильное сравнение NaN-ов в константных столбцах. + const Derived & rhs = static_cast(rhs_); + return getDataFromHolder() < rhs.getDataFromHolder() /// TODO: правильное сравнение NaN-ов в константных столбцах. ? -1 : (data == rhs.data ? 0 @@ -123,30 +138,79 @@ public: res[i] = i; } + DataTypePtr & getDataType() { return data_type; } + const DataTypePtr & getDataType() const { return data_type; } +}; + + +/** шаблон для столбцов-констант (столбцов одинаковых значений). + */ +template +class ColumnConst final : public ColumnConstBase> +{ +private: + friend class ColumnConstBase>; + + T & getDataFromHolderImpl() { return this->data; } + const T & getDataFromHolderImpl() const { return this->data; } + +public: + /// Для ColumnConst data_type_ должен быть ненулевым. + /// Для ColumnConst data_type_ должен быть ненулевым, если тип данных FixedString. + ColumnConst(size_t s_, const T & data_, DataTypePtr data_type_ = DataTypePtr()) + : ColumnConstBase>(s_, data_, data_type_) {} + StringRef getDataAt(size_t n) const override; StringRef getDataAtWithTerminatingZero(size_t n) const override; UInt64 get64(size_t n) const override; /** Более эффективные методы манипуляции */ - T & getData() { return data; } - const T & getData() const { return data; } + T & getData() { return this->data; } + const T & getData() const { return this->data; } /** Преобразование из константы в полноценный столбец */ ColumnPtr convertToFullColumn() const override; void getExtremes(Field & min, Field & max) const override { - min = FieldType(data); - max = FieldType(data); + min = typename ColumnConstBase>::FieldType(this->data); + max = typename ColumnConstBase>::FieldType(this->data); } +}; - DataTypePtr & getDataType() { return data_type; } - const DataTypePtr & getDataType() const { return data_type; } +template <> +class ColumnConst final : public ColumnConstBase, ColumnConst> +{ private: - size_t s; - T data; - DataTypePtr data_type; + friend class ColumnConstBase, ColumnConst>; + + Array & getDataFromHolderImpl() { return *data; } + const Array & getDataFromHolderImpl() const { return *data; } + +public: + /// data_type_ должен быть ненулевым. + ColumnConst(size_t s_, const Array & data_, DataTypePtr data_type_ = DataTypePtr()) + : ColumnConstBase, ColumnConst>(s_, new Array(data_), data_type_) {} + + ColumnConst(size_t s_, const SharedPtr & data_, DataTypePtr data_type_ = DataTypePtr()) + : ColumnConstBase, ColumnConst>(s_, data_, data_type_) {} + + StringRef getDataAt(size_t n) const override; + StringRef getDataAtWithTerminatingZero(size_t n) const override; + UInt64 get64(size_t n) const override; + + /** Более эффективные методы манипуляции */ + const Array & getData() const { return *data; } + + /** Преобразование из константы в полноценный столбец */ + ColumnPtr convertToFullColumn() const override; + + void getExtremes(Field & min, Field & max) const override + { + min = FieldType(); + max = FieldType(); + } }; @@ -158,19 +222,17 @@ template ColumnPtr ColumnConst::convertToFullColumn() const { ColumnVector * res_ = new ColumnVector; ColumnPtr res = res_; - res_->getData().assign(s, data); + res_->getData().assign(this->s, this->data); return res; } template <> ColumnPtr ColumnConst::convertToFullColumn() const; -template <> ColumnPtr ColumnConst::convertToFullColumn() const; - template StringRef ColumnConst::getDataAt(size_t n) const { - throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Method getDataAt is not supported for " + this->getName(), ErrorCodes::NOT_IMPLEMENTED); } template <> inline StringRef ColumnConst::getDataAt(size_t n) const @@ -180,7 +242,7 @@ template <> inline StringRef ColumnConst::getDataAt(size_t n) const template UInt64 ColumnConst::get64(size_t n) const { - throw Exception("Method get64 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Method get64 is not supported for " + this->getName(), ErrorCodes::NOT_IMPLEMENTED); } /// Для элементарных типов. diff --git a/dbms/src/Columns/ColumnConst.cpp b/dbms/src/Columns/ColumnConst.cpp index ec78c294464..a121b02fb97 100644 --- a/dbms/src/Columns/ColumnConst.cpp +++ b/dbms/src/Columns/ColumnConst.cpp @@ -62,7 +62,7 @@ template <> ColumnPtr ColumnConst::convertToFullColumn() const } -template <> ColumnPtr ColumnConst::convertToFullColumn() const +ColumnPtr ColumnConst::convertToFullColumn() const { if (!data_type) throw Exception("No data type specified for ColumnConstArray", ErrorCodes::LOGICAL_ERROR); @@ -71,7 +71,8 @@ template <> ColumnPtr ColumnConst::convertToFullColumn() const if (!type) throw Exception("Non-array data type specified for ColumnConstArray", ErrorCodes::LOGICAL_ERROR); - size_t array_size = data.size(); + const Array & array = getDataFromHolderImpl(); + size_t array_size = array.size(); ColumnPtr nested_column = type->getNestedType()->createColumn(); ColumnArray * res = new ColumnArray(nested_column); @@ -82,11 +83,27 @@ template <> ColumnPtr ColumnConst::convertToFullColumn() const { offsets[i] = (i + 1) * array_size; for (size_t j = 0; j < array_size; ++j) - nested_column->insert(data[j]); + nested_column->insert(array[j]); } return res; } +StringRef ColumnConst::getDataAt(size_t n) const +{ + throw Exception("Method getDataAt is not supported for " + this->getName(), ErrorCodes::NOT_IMPLEMENTED); +} + +UInt64 ColumnConst::get64(size_t n) const +{ + throw Exception("Method get64 is not supported for " + this->getName(), ErrorCodes::NOT_IMPLEMENTED); +} + +StringRef ColumnConst::getDataAtWithTerminatingZero(size_t n) const +{ + throw Exception("Method getDataAt is not supported for " + this->getName(), ErrorCodes::NOT_IMPLEMENTED); +} + + } diff --git a/dbms/tests/queries/0_stateless/00186_very_long_arrays.reference b/dbms/tests/queries/0_stateless/00186_very_long_arrays.reference new file mode 100644 index 00000000000..825319e1c5b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00186_very_long_arrays.reference @@ -0,0 +1 @@ +10000000 diff --git a/dbms/tests/queries/0_stateless/00186_very_long_arrays.sh b/dbms/tests/queries/0_stateless/00186_very_long_arrays.sh new file mode 100755 index 00000000000..c393ee497a9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00186_very_long_arrays.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +(echo 'SELECT number FROM system.numbers WHERE transform(number, ['; seq 1 100000 | tr '\n' ','; echo '0],['; seq 1 100000 | tr '\n' ','; echo '0]) = 10000000 LIMIT 1';) | clickhouse-client --max_query_size=100000000 From c1ed7ca004944c8b60799f70d8b5e23225084edf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jun 2015 21:47:46 +0300 Subject: [PATCH 53/67] dbms: fixed error with LIKE [#METR-17078]. --- dbms/include/DB/Functions/FunctionsStringSearch.h | 7 ++++++- .../queries/0_stateless/00187_like_regexp_prefix.reference | 1 + .../tests/queries/0_stateless/00187_like_regexp_prefix.sql | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00187_like_regexp_prefix.reference create mode 100644 dbms/tests/queries/0_stateless/00187_like_regexp_prefix.sql diff --git a/dbms/include/DB/Functions/FunctionsStringSearch.h b/dbms/include/DB/Functions/FunctionsStringSearch.h index 351fb47f1ef..f1a5a3a45a1 100644 --- a/dbms/include/DB/Functions/FunctionsStringSearch.h +++ b/dbms/include/DB/Functions/FunctionsStringSearch.h @@ -447,10 +447,15 @@ struct MatchImpl const char * str_data = reinterpret_cast(&data[i != 0 ? offsets[i - 1] : 0]); size_t str_size = (i != 0 ? offsets[i] - offsets[i - 1] : offsets[0]) - 1; + /** Даже в случае required_substring_is_prefix используем UNANCHORED проверку регекспа, + * чтобы он мог сматчиться, когда required_substring встречается в строке несколько раз, + * и на первом вхождении регексп не матчит. + */ + if (required_substring_is_prefix) res[i] = revert ^ regexp->getRE2()->Match( re2_st::StringPiece(str_data, str_size), - reinterpret_cast(pos) - str_data, str_size, re2_st::RE2::ANCHOR_START, nullptr, 0); + reinterpret_cast(pos) - str_data, str_size, re2_st::RE2::UNANCHORED, nullptr, 0); else res[i] = revert ^ regexp->getRE2()->Match( re2_st::StringPiece(str_data, str_size), diff --git a/dbms/tests/queries/0_stateless/00187_like_regexp_prefix.reference b/dbms/tests/queries/0_stateless/00187_like_regexp_prefix.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00187_like_regexp_prefix.reference @@ -0,0 +1 @@ +1 diff --git a/dbms/tests/queries/0_stateless/00187_like_regexp_prefix.sql b/dbms/tests/queries/0_stateless/00187_like_regexp_prefix.sql new file mode 100644 index 00000000000..7fec21b68ad --- /dev/null +++ b/dbms/tests/queries/0_stateless/00187_like_regexp_prefix.sql @@ -0,0 +1 @@ +SELECT materialize('prepre_f') LIKE '%pre_f%'; From b5fd5fe490e87192e13615eea0a4eaa5718e9eb6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2015 00:35:35 +0300 Subject: [PATCH 54/67] dbms: QueryLog: development (incomplete) [#METR-16946]. --- dbms/include/DB/Core/Exception.h | 2 + dbms/include/DB/DataStreams/BlockIO.h | 20 +++ dbms/include/DB/Interpreters/QueryLog.h | 4 + dbms/src/Core/Exception.cpp | 29 ++++- dbms/src/Interpreters/executeQuery.cpp | 162 ++++++++++++++++++------ dbms/src/Server/HTTPHandler.cpp | 60 +-------- dbms/src/Server/HTTPHandler.h | 2 +- dbms/src/Server/OLAPHTTPHandler.cpp | 25 ---- dbms/src/Server/TCPHandler.cpp | 39 +----- dbms/src/Server/TCPHandler.h | 3 - 10 files changed, 184 insertions(+), 162 deletions(-) diff --git a/dbms/include/DB/Core/Exception.h b/dbms/include/DB/Core/Exception.h index afc56e6571f..3d28e412137 100644 --- a/dbms/include/DB/Core/Exception.h +++ b/dbms/include/DB/Core/Exception.h @@ -31,6 +31,8 @@ ExceptionPtr cloneCurrentException(); void tryLogCurrentException(const char * log_name); void tryLogCurrentException(Poco::Logger * logger); +std::string getCurrentExceptionMessage(bool with_stacktrace); + void rethrowFirstException(Exceptions & exceptions); diff --git a/dbms/include/DB/DataStreams/BlockIO.h b/dbms/include/DB/DataStreams/BlockIO.h index b0e69bbb27d..e45ffeebd61 100644 --- a/dbms/include/DB/DataStreams/BlockIO.h +++ b/dbms/include/DB/DataStreams/BlockIO.h @@ -24,6 +24,23 @@ struct BlockIO Block in_sample; /// Пример блока, который будет прочитан из in. Block out_sample; /// Пример блока, которого нужно писать в out. + /// Здесь могут быть установлены колбэки для логгирования запроса. + std::function finish_callback; + std::function exception_callback; + + /// Вызывайте эти функции, если нужно логгировать запрос. + void onFinish() + { + if (in && finish_callback) + finish_callback(*in); + } + + void onException() + { + if (exception_callback) + exception_callback(); + } + BlockIO & operator= (const BlockIO & rhs) { /// Обеспечиваем правильный порядок уничтожения. @@ -37,6 +54,9 @@ struct BlockIO in_sample = rhs.in_sample; out_sample = rhs.out_sample; + finish_callback = rhs.finish_callback; + exception_callback = rhs.exception_callback; + return *this; } diff --git a/dbms/include/DB/Interpreters/QueryLog.h b/dbms/include/DB/Interpreters/QueryLog.h index 33cc4bd4b5f..d573f6e7677 100644 --- a/dbms/include/DB/Interpreters/QueryLog.h +++ b/dbms/include/DB/Interpreters/QueryLog.h @@ -34,6 +34,7 @@ struct QueryLogElement SHUTDOWN = 0, /// Эта запись имеет служебное значение. QUERY_START = 1, QUERY_FINISH = 2, + EXCEPTION = 3, }; Type type = QUERY_START; @@ -52,6 +53,9 @@ struct QueryLogElement String query; + String exception; + String stack_trace; + Context::Interface interface = Context::Interface::TCP; Context::HTTPMethod http_method = Context::HTTPMethod::UNKNOWN; Poco::Net::IPAddress ip_address; diff --git a/dbms/src/Core/Exception.cpp b/dbms/src/Core/Exception.cpp index f5864a4f210..789af7beeae 100644 --- a/dbms/src/Core/Exception.cpp +++ b/dbms/src/Core/Exception.cpp @@ -59,6 +59,19 @@ void tryLogCurrentException(const char * log_name) void tryLogCurrentException(Poco::Logger * logger) { + try + { + LOG_ERROR(logger, getCurrentExceptionMessage(true)); + } + catch (...) + { + } +} + +std::string getCurrentExceptionMessage(bool with_stacktrace) +{ + std::stringstream stream; + try { throw; @@ -67,8 +80,10 @@ void tryLogCurrentException(Poco::Logger * logger) { try { - LOG_ERROR(logger, "Code: " << e.code() << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what() - << ", Stack trace:\n\n" << e.getStackTrace().toString()); + stream << "Code: " << e.code() << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what(); + + if (with_stacktrace) + stream << ", Stack trace:\n\n" << e.getStackTrace().toString(); } catch (...) {} } @@ -76,8 +91,8 @@ void tryLogCurrentException(Poco::Logger * logger) { try { - LOG_ERROR(logger, "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() - << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what()); + stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() + << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what(); } catch (...) {} } @@ -91,7 +106,7 @@ void tryLogCurrentException(Poco::Logger * logger) if (status) name += " (demangling status: " + toString(status) + ")"; - LOG_ERROR(logger, "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what()); + stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what(); } catch (...) {} } @@ -105,10 +120,12 @@ void tryLogCurrentException(Poco::Logger * logger) if (status) name += " (demangling status: " + toString(status) + ")"; - LOG_ERROR(logger, "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name); + stream << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name; } catch (...) {} } + + return stream.str(); } diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 91ae751eea2..c66aa7ecb57 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -1,10 +1,12 @@ #include +#include #include #include #include #include +#include #include #include @@ -103,8 +105,9 @@ static std::tuple executeQueryImpl( context.setProcessListElement(&process_list_entry->get()); } - /// Логгируем в таблицу начало выполнения запроса, если нужно. - if (settings.log_queries) + BlockIO res; + + /// Всё, что связано с логгированием запросов. { QueryLogElement elem; @@ -121,10 +124,73 @@ static std::tuple executeQueryImpl( elem.user = context.getUser(); elem.query_id = context.getCurrentQueryId(); - context.getQueryLog().add(elem); - } + bool log_queries = settings.log_queries; - BlockIO res; + /// Логгируем в таблицу начало выполнения запроса, если нужно. + if (log_queries) + context.getQueryLog().add(elem); + + /// Также дадим вызывающему коду в дальнейшем логгировать завершение запроса и эксепшен. + res.finish_callback = [elem, &context, log_queries] (IBlockInputStream & stream) mutable + { + elem.type = QueryLogElement::QUERY_FINISH; + + elem.event_time = time(0); + elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); /// Грубое время для запросов без profiling_stream; + + if (IProfilingBlockInputStream * profiling_stream = dynamic_cast(&stream)) + { + const BlockStreamProfileInfo & info = profiling_stream->getInfo(); + + elem.query_duration_ms = info.total_stopwatch.elapsed() / 1000000; + + stream.getLeafRowsBytes(elem.read_rows, elem.read_bytes); /// TODO неверно для распределённых запросов? + + elem.result_rows = info.rows; + elem.result_bytes = info.bytes; + + if (elem.read_rows != 0) + { + LOG_INFO(&Logger::get("executeQuery"), std::fixed << std::setprecision(3) + << "Read " << elem.read_rows << " rows, " + << formatReadableSizeWithBinarySuffix(elem.read_bytes) << " in " << elem.query_duration_ms / 1000.0 << " sec., " + << static_cast(elem.read_rows * 1000.0 / elem.query_duration_ms) << " rows/sec., " + << formatReadableSizeWithBinarySuffix(elem.read_bytes * 1000.0 / elem.query_duration_ms) << "/sec."); + } + } + + if (log_queries) + context.getQueryLog().add(elem); + }; + + res.exception_callback = [elem, &context, log_queries] () mutable + { + elem.type = QueryLogElement::EXCEPTION; + + elem.event_time = time(0); + elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); /// Низкая точность. Можно сделать лучше. + elem.exception = getCurrentExceptionMessage(false); + + /// Достаём стек трейс, если возможно. + try + { + throw; + } + catch (const Exception & e) + { + elem.stack_trace = e.getStackTrace().toString(); + + LOG_ERROR(&Logger::get("executeQuery"), elem.exception << ", Stack trace:\n\n" << elem.stack_trace); + } + catch (...) + { + LOG_ERROR(&Logger::get("executeQuery"), elem.exception); + } + + if (log_queries) + context.getQueryLog().add(elem); + }; + } try { @@ -142,6 +208,14 @@ static std::tuple executeQueryImpl( quota.addQuery(current_time); + if (res.in) + { + std::stringstream log_str; + log_str << "Query pipeline:\n"; + res.in->dumpTree(log_str); + LOG_DEBUG(&Logger::get("executeQuery"), log_str.str()); + } + return std::make_tuple(ast, res); } @@ -197,51 +271,63 @@ void executeQuery( std::tie(ast, streams) = executeQueryImpl(begin, end, context, internal, stage); - if (streams.out) + bool exception = false; + try { - const ASTInsertQuery * ast_insert_query = dynamic_cast(ast.get()); + if (streams.out) + { + const ASTInsertQuery * ast_insert_query = dynamic_cast(ast.get()); - if (!ast_insert_query) - throw Exception("Logical error: query requires data to insert, but it is not INSERT query", ErrorCodes::LOGICAL_ERROR); + if (!ast_insert_query) + throw Exception("Logical error: query requires data to insert, but it is not INSERT query", ErrorCodes::LOGICAL_ERROR); - String format = ast_insert_query->format; - if (format.empty()) - format = "Values"; + String format = ast_insert_query->format; + if (format.empty()) + format = "Values"; - /// Данные могут содержаться в распарсенной (ast_insert_query.data) и ещё не распарсенной (istr) части запроса. + /// Данные могут содержаться в распарсенной (ast_insert_query.data) и ещё не распарсенной (istr) части запроса. - ConcatReadBuffer::ReadBuffers buffers; - ReadBuffer buf1(const_cast(ast_insert_query->data), ast_insert_query->data ? ast_insert_query->end - ast_insert_query->data : 0, 0); + ConcatReadBuffer::ReadBuffers buffers; + ReadBuffer buf1(const_cast(ast_insert_query->data), ast_insert_query->data ? ast_insert_query->end - ast_insert_query->data : 0, 0); - if (ast_insert_query->data) - buffers.push_back(&buf1); - buffers.push_back(&istr); + if (ast_insert_query->data) + buffers.push_back(&buf1); + buffers.push_back(&istr); - /** NOTE Нельзя читать из istr до того, как прочтём всё между ast_insert_query.data и ast_insert_query.end. - * - потому что query.data может ссылаться на кусок памяти, использующийся в качестве буфера в istr. - */ + /** NOTE Нельзя читать из istr до того, как прочтём всё между ast_insert_query.data и ast_insert_query.end. + * - потому что query.data может ссылаться на кусок памяти, использующийся в качестве буфера в istr. + */ - ConcatReadBuffer data_istr(buffers); + ConcatReadBuffer data_istr(buffers); - BlockInputStreamPtr in{ - context.getFormatFactory().getInput( - format, data_istr, streams.out_sample, context.getSettings().max_insert_block_size)}; + BlockInputStreamPtr in{ + context.getFormatFactory().getInput( + format, data_istr, streams.out_sample, context.getSettings().max_insert_block_size)}; - copyData(*in, *streams.out); + copyData(*in, *streams.out); + } + + if (streams.in) + { + const ASTQueryWithOutput * ast_query_with_output = dynamic_cast(ast.get()); + + String format_name = ast_query_with_output && (ast_query_with_output->getFormat() != nullptr) + ? typeid_cast(*ast_query_with_output->getFormat()).name + : context.getDefaultFormat(); + + BlockOutputStreamPtr out = context.getFormatFactory().getOutput(format_name, ostr, streams.in_sample); + + copyData(*streams.in, *out); + } + } + catch (...) + { + exception = true; + streams.onException(); } - if (streams.in) - { - const ASTQueryWithOutput * ast_query_with_output = dynamic_cast(ast.get()); - - String format_name = ast_query_with_output && (ast_query_with_output->getFormat() != nullptr) - ? typeid_cast(*ast_query_with_output->getFormat()).name - : context.getDefaultFormat(); - - BlockOutputStreamPtr out = context.getFormatFactory().getOutput(format_name, ostr, streams.in_sample); - - copyData(*streams.in, *out); - } + if (!exception) + streams.onFinish(); } } diff --git a/dbms/src/Server/HTTPHandler.cpp b/dbms/src/Server/HTTPHandler.cpp index 7ed2c31da24..9d5e344077c 100644 --- a/dbms/src/Server/HTTPHandler.cpp +++ b/dbms/src/Server/HTTPHandler.cpp @@ -150,37 +150,12 @@ void HTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net executeQuery(*in, *used_output.out_maybe_compressed, context, query_plan); watch.stop(); - if (query_plan) - { - std::stringstream log_str; - log_str << "Query pipeline:\n"; - query_plan->dumpTree(log_str); - LOG_DEBUG(log, log_str.str()); - - /// Выведем информацию о том, сколько считано строк и байт. - size_t rows = 0; - size_t bytes = 0; - - query_plan->getLeafRowsBytes(rows, bytes); - - if (rows != 0) - { - LOG_INFO(log, std::fixed << std::setprecision(3) - << "Read " << rows << " rows, " << bytes / 1048576.0 << " MiB in " << watch.elapsedSeconds() << " sec., " - << static_cast(rows / watch.elapsedSeconds()) << " rows/sec., " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec."); - } - } - - QuotaForIntervals & quota = context.getQuota(); - if (!quota.empty()) - LOG_INFO(log, "Quota:\n" << quota.toString()); - /// Если не было эксепшена и данные ещё не отправлены - отправляются HTTP заголовки с кодом 200. used_output.out->finalize(); } -void HTTPHandler::trySendExceptionToClient(std::stringstream & s, +void HTTPHandler::trySendExceptionToClient(const std::string & s, Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output) { @@ -201,7 +176,7 @@ void HTTPHandler::trySendExceptionToClient(std::stringstream & s, if (!response.sent() && !used_output.out_maybe_compressed) { /// Ещё ничего не отправляли, и даже не знаем, нужно ли сжимать ответ. - response.send() << s.str() << std::endl; + response.send() << s << std::endl; } else if (used_output.out_maybe_compressed) { @@ -217,8 +192,7 @@ void HTTPHandler::trySendExceptionToClient(std::stringstream & s, used_output.out->position() = used_output.out->buffer().begin(); } - std::string exception_message = s.str(); - writeString(exception_message, *used_output.out_maybe_compressed); + writeString(s, *used_output.out_maybe_compressed); writeChar('\n', *used_output.out_maybe_compressed); used_output.out_maybe_compressed->next(); used_output.out->finalize(); @@ -255,35 +229,9 @@ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne processQuery(request, response, used_output); LOG_INFO(log, "Done processing query"); } - catch (Exception & e) - { - std::stringstream s; - s << "Code: " << e.code() << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what() - << ", Stack trace:\n\n" << e.getStackTrace().toString(); - LOG_ERROR(log, s.str()); - trySendExceptionToClient(s, request, response, used_output); - } - catch (Poco::Exception & e) - { - std::stringstream s; - s << "Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() - << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what(); - LOG_ERROR(log, s.str()); - trySendExceptionToClient(s, request, response, used_output); - } - catch (std::exception & e) - { - std::stringstream s; - s << "Code: " << ErrorCodes::STD_EXCEPTION << ". " << e.what(); - LOG_ERROR(log, s.str()); - trySendExceptionToClient(s, request, response, used_output); - } catch (...) { - std::stringstream s; - s << "Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ". Unknown exception."; - LOG_ERROR(log, s.str()); - trySendExceptionToClient(s, request, response, used_output); + trySendExceptionToClient(getCurrentExceptionMessage(true), request, response, used_output); } } diff --git a/dbms/src/Server/HTTPHandler.h b/dbms/src/Server/HTTPHandler.h index 7357c7f5831..ae8a2c5c564 100644 --- a/dbms/src/Server/HTTPHandler.h +++ b/dbms/src/Server/HTTPHandler.h @@ -26,7 +26,7 @@ public: void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response); - void trySendExceptionToClient(std::stringstream & s, + void trySendExceptionToClient(const std::string & s, Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output); diff --git a/dbms/src/Server/OLAPHTTPHandler.cpp b/dbms/src/Server/OLAPHTTPHandler.cpp index 462557784c1..dc61f6920fa 100644 --- a/dbms/src/Server/OLAPHTTPHandler.cpp +++ b/dbms/src/Server/OLAPHTTPHandler.cpp @@ -69,31 +69,6 @@ void OLAPHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco: executeQuery(in, out, context, query_plan); watch.stop(); - if (query_plan) - { - std::stringstream log_str; - log_str << "Query pipeline:\n"; - query_plan->dumpTree(log_str); - LOG_DEBUG(log, log_str.str()); - - /// Выведем информацию о том, сколько считано строк и байт. - size_t rows = 0; - size_t bytes = 0; - - query_plan->getLeafRowsBytes(rows, bytes); - - if (rows != 0) - { - LOG_INFO(log, std::fixed << std::setprecision(3) - << "Read " << rows << " rows, " << bytes / 1048576.0 << " MiB in " << watch.elapsedSeconds() << " sec., " - << static_cast(rows / watch.elapsedSeconds()) << " rows/sec., " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec."); - } - } - - QuotaForIntervals & quota = context.getQuota(); - if (!quota.empty()) - LOG_INFO(log, "Quota:\n" << quota.toString()); - /// Если не было эксепшена и данные ещё не отправлены - отправляются HTTP заголовки с кодом 200. out.finalize(); } diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp index 2e9ad9e7425..196ebf08afa 100644 --- a/dbms/src/Server/TCPHandler.cpp +++ b/dbms/src/Server/TCPHandler.cpp @@ -149,8 +149,7 @@ void TCPHandler::runImpl() } catch (const Exception & e) { - LOG_ERROR(log, "Code: " << e.code() << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what() - << ", Stack trace:\n\n" << e.getStackTrace().toString()); + state.io.onException(); exception = e.clone(); if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT) @@ -164,24 +163,22 @@ void TCPHandler::runImpl() * Хотя в одном из них, мы должны отправить эксепшен клиенту, а в другом - не можем. * Будем пытаться отправить эксепшен клиенту в любом случае - см. ниже. */ - LOG_ERROR(log, "Poco::Net::NetException. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() - << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what()); + state.io.onException(); exception = new Exception(e.displayText(), ErrorCodes::POCO_EXCEPTION); } catch (const Poco::Exception & e) { - LOG_ERROR(log, "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() - << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what()); + state.io.onException(); exception = new Exception(e.displayText(), ErrorCodes::POCO_EXCEPTION); } catch (const std::exception & e) { - LOG_ERROR(log, "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", e.what() = " << e.what()); + state.io.onException(); exception = new Exception(e.what(), ErrorCodes::STD_EXCEPTION); } catch (...) { - LOG_ERROR(log, "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION); + state.io.onException(); exception = new Exception("Unknown exception", ErrorCodes::UNKNOWN_EXCEPTION); } @@ -290,7 +287,6 @@ void TCPHandler::processOrdinaryQuery() async_in.dumpTree(query_pipeline); LOG_DEBUG(log, "Query pipeline:\n" << query_pipeline.rdbuf()); - Stopwatch watch; while (true) { Block block; @@ -342,9 +338,7 @@ void TCPHandler::processOrdinaryQuery() } async_in.readSuffix(); - - watch.stop(); - logProfileInfo(watch, *state.io.in); + state.io.onFinish(); } } @@ -413,27 +407,6 @@ void TCPHandler::sendExtremes() } -void TCPHandler::logProfileInfo(Stopwatch & watch, IBlockInputStream & in) -{ - /// Выведем информацию о том, сколько считано строк и байт. - size_t rows = 0; - size_t bytes = 0; - - in.getLeafRowsBytes(rows, bytes); - - if (rows != 0) - { - LOG_INFO(log, std::fixed << std::setprecision(3) - << "Read " << rows << " rows, " << bytes / 1048576.0 << " MiB in " << watch.elapsedSeconds() << " sec., " - << static_cast(rows / watch.elapsedSeconds()) << " rows/sec., " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec."); - } - - QuotaForIntervals & quota = query_context.getQuota(); - if (!quota.empty()) - LOG_INFO(log, "Quota:\n" << quota.toString()); -} - - void TCPHandler::receiveHello() { /// Получить hello пакет. diff --git a/dbms/src/Server/TCPHandler.h b/dbms/src/Server/TCPHandler.h index ffae284d459..1b45cf72191 100644 --- a/dbms/src/Server/TCPHandler.h +++ b/dbms/src/Server/TCPHandler.h @@ -130,9 +130,6 @@ private: /// Эта функция вызывается из разных потоков. void updateProgress(const Progress & value); - - /// Вывести информацию о скорости выполнения SELECT запроса. - void logProfileInfo(Stopwatch & watch, IBlockInputStream & in); }; From cb223d360a146b9533f7d865c56a66c4d2fa8bc3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2015 02:54:33 +0300 Subject: [PATCH 55/67] dbms: QueryLog: development [#METR-16946]. --- dbms/src/Interpreters/QueryLog.cpp | 14 +++--- dbms/src/Interpreters/executeQuery.cpp | 59 ++++++++++++++------------ dbms/src/Server/TCPHandler.cpp | 4 -- 3 files changed, 40 insertions(+), 37 deletions(-) diff --git a/dbms/src/Interpreters/QueryLog.cpp b/dbms/src/Interpreters/QueryLog.cpp index 77ba058de38..8a317ae5d10 100644 --- a/dbms/src/Interpreters/QueryLog.cpp +++ b/dbms/src/Interpreters/QueryLog.cpp @@ -190,6 +190,8 @@ Block QueryLog::createBlock() {new ColumnUInt64, new DataTypeUInt64, "result_bytes"}, {new ColumnString, new DataTypeString, "query"}, + {new ColumnString, new DataTypeString, "exception"}, + {new ColumnString, new DataTypeString, "stack_trace"}, {new ColumnUInt8, new DataTypeUInt8, "interface"}, {new ColumnUInt8, new DataTypeUInt8, "http_method"}, @@ -225,9 +227,11 @@ void QueryLog::flush() block.unsafeGetByPosition(8).column.get()->insert(static_cast(elem.result_bytes)); block.unsafeGetByPosition(9).column.get()->insertData(elem.query.data(), elem.query.size()); + block.unsafeGetByPosition(10).column.get()->insertData(elem.exception.data(), elem.exception.size()); + block.unsafeGetByPosition(11).column.get()->insertData(elem.stack_trace.data(), elem.stack_trace.size()); - block.unsafeGetByPosition(10).column.get()->insert(static_cast(elem.interface)); - block.unsafeGetByPosition(11).column.get()->insert(static_cast(elem.http_method)); + block.unsafeGetByPosition(12).column.get()->insert(static_cast(elem.interface)); + block.unsafeGetByPosition(13).column.get()->insert(static_cast(elem.http_method)); char ipv6_binary[16]; if (Poco::Net::IPAddress::IPv6 == elem.ip_address.family()) @@ -245,10 +249,10 @@ void QueryLog::flush() else memset(ipv6_binary, 0, 16); - block.unsafeGetByPosition(12).column.get()->insertData(ipv6_binary, 16); + block.unsafeGetByPosition(14).column.get()->insertData(ipv6_binary, 16); - block.unsafeGetByPosition(13).column.get()->insertData(elem.user.data(), elem.user.size()); - block.unsafeGetByPosition(14).column.get()->insertData(elem.query_id.data(), elem.query_id.size()); + block.unsafeGetByPosition(15).column.get()->insertData(elem.user.data(), elem.user.size()); + block.unsafeGetByPosition(16).column.get()->insertData(elem.query_id.data(), elem.query_id.size()); } BlockOutputStreamPtr stream = table->write(nullptr); diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index c66aa7ecb57..6d5c2076396 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -50,6 +50,8 @@ static std::tuple executeQueryImpl( bool internal, QueryProcessingStage::Enum stage) { + /// TODO Логгировать здесь эксепшены, возникающие до начала выполнения запроса. + ProfileEvents::increment(ProfileEvents::Query); ParserQuery parser; @@ -107,7 +109,23 @@ static std::tuple executeQueryImpl( BlockIO res; - /// Всё, что связано с логгированием запросов. + try + { + auto interpreter = InterpreterFactory::get(ast, context, stage); + res = interpreter->execute(); + + /// Держим элемент списка процессов до конца обработки запроса. + res.process_list_entry = process_list_entry; + } + catch (...) + { + quota.addError(current_time); /// TODO Было бы лучше добавить ещё в exception_callback + throw; + } + + quota.addQuery(current_time); + + /// Всё, что связано с логом запросов. { QueryLogElement elem; @@ -142,7 +160,8 @@ static std::tuple executeQueryImpl( { const BlockStreamProfileInfo & info = profiling_stream->getInfo(); - elem.query_duration_ms = info.total_stopwatch.elapsed() / 1000000; + double elapsed_seconds = info.total_stopwatch.elapsed(); /// TODO этот Stopwatch - coarse, использовать другой + elem.query_duration_ms = elapsed_seconds * 1000; stream.getLeafRowsBytes(elem.read_rows, elem.read_bytes); /// TODO неверно для распределённых запросов? @@ -153,9 +172,9 @@ static std::tuple executeQueryImpl( { LOG_INFO(&Logger::get("executeQuery"), std::fixed << std::setprecision(3) << "Read " << elem.read_rows << " rows, " - << formatReadableSizeWithBinarySuffix(elem.read_bytes) << " in " << elem.query_duration_ms / 1000.0 << " sec., " - << static_cast(elem.read_rows * 1000.0 / elem.query_duration_ms) << " rows/sec., " - << formatReadableSizeWithBinarySuffix(elem.read_bytes * 1000.0 / elem.query_duration_ms) << "/sec."); + << formatReadableSizeWithBinarySuffix(elem.read_bytes) << " in " << elapsed_seconds << " sec., " + << static_cast(elem.read_rows / elapsed_seconds) << " rows/sec., " + << formatReadableSizeWithBinarySuffix(elem.read_bytes / elapsed_seconds) << "/sec."); } } @@ -190,30 +209,14 @@ static std::tuple executeQueryImpl( if (log_queries) context.getQueryLog().add(elem); }; - } - try - { - auto interpreter = InterpreterFactory::get(ast, context, stage); - res = interpreter->execute(); - - /// Держим элемент списка процессов до конца обработки запроса. - res.process_list_entry = process_list_entry; - } - catch (...) - { - quota.addError(current_time); - throw; - } - - quota.addQuery(current_time); - - if (res.in) - { - std::stringstream log_str; - log_str << "Query pipeline:\n"; - res.in->dumpTree(log_str); - LOG_DEBUG(&Logger::get("executeQuery"), log_str.str()); + if (!internal && res.in) + { + std::stringstream log_str; + log_str << "Query pipeline:\n"; + res.in->dumpTree(log_str); + LOG_DEBUG(&Logger::get("executeQuery"), log_str.str()); + } } return std::make_tuple(ast, res); diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp index 196ebf08afa..8c9231c94f1 100644 --- a/dbms/src/Server/TCPHandler.cpp +++ b/dbms/src/Server/TCPHandler.cpp @@ -283,10 +283,6 @@ void TCPHandler::processOrdinaryQuery() AsynchronousBlockInputStream async_in(state.io.in); async_in.readPrefix(); - std::stringstream query_pipeline; - async_in.dumpTree(query_pipeline); - LOG_DEBUG(log, "Query pipeline:\n" << query_pipeline.rdbuf()); - while (true) { Block block; From 3ac04b49b251abe8d88d247ae95fcd9bd2b75638 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2015 02:59:25 +0300 Subject: [PATCH 56/67] dbms: QueryLog: development [#METR-16946]. --- dbms/src/Interpreters/executeQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 6d5c2076396..d9eb60413d4 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -160,7 +160,7 @@ static std::tuple executeQueryImpl( { const BlockStreamProfileInfo & info = profiling_stream->getInfo(); - double elapsed_seconds = info.total_stopwatch.elapsed(); /// TODO этот Stopwatch - coarse, использовать другой + double elapsed_seconds = info.total_stopwatch.elapsedSeconds(); /// TODO этот Stopwatch - coarse, использовать другой elem.query_duration_ms = elapsed_seconds * 1000; stream.getLeafRowsBytes(elem.read_rows, elem.read_bytes); /// TODO неверно для распределённых запросов? From b5e2e9a7ae22fc93dc3c09f09d1f27d2366c7689 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2015 21:22:06 +0300 Subject: [PATCH 57/67] dbms: function 'roundAge': fixed error [#METR-17105]. --- dbms/include/DB/Functions/FunctionsRound.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsRound.h b/dbms/include/DB/Functions/FunctionsRound.h index 7706c86519b..7fea4d78850 100644 --- a/dbms/include/DB/Functions/FunctionsRound.h +++ b/dbms/include/DB/Functions/FunctionsRound.h @@ -89,11 +89,12 @@ namespace DB static inline ResultType apply(A x) { - return x < 18 ? 0 + return x < 1 ? 0 + : (x < 18 ? 1 : (x < 25 ? 18 : (x < 35 ? 25 : (x < 45 ? 35 - : 45))); + : 45)))); } }; From cc8734c5aacd1d3b8a6f92bcc42eb93ff89472e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2015 21:38:55 +0300 Subject: [PATCH 58/67] dbms: addition to prev. revision [#METR-17105]. --- dbms/include/DB/Functions/FunctionsRound.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Functions/FunctionsRound.h b/dbms/include/DB/Functions/FunctionsRound.h index 7fea4d78850..bf321e8d760 100644 --- a/dbms/include/DB/Functions/FunctionsRound.h +++ b/dbms/include/DB/Functions/FunctionsRound.h @@ -90,7 +90,7 @@ namespace DB static inline ResultType apply(A x) { return x < 1 ? 0 - : (x < 18 ? 1 + : (x < 18 ? 17 : (x < 25 ? 18 : (x < 35 ? 25 : (x < 45 ? 35 From 115d04aa240ca6dbae7f6ec2764c9b2611ca6236 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2015 21:48:55 +0300 Subject: [PATCH 59/67] Update README --- dbms/benchmark/vertica/README | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/benchmark/vertica/README b/dbms/benchmark/vertica/README index 34525e46597..91af3e41e27 100644 --- a/dbms/benchmark/vertica/README +++ b/dbms/benchmark/vertica/README @@ -8,6 +8,7 @@ Download HP Vertica 7.1.1 Analytic Database Server, Debian or Ubuntu 14.04 versi sudo apt-get install sysstat pstack mcelog sudo dpkg -i vertica_7.1.1-0_amd64.deb sudo sh -c "echo 'export TZ=Europe/Moscow' >> /home/dbadmin/.bash_profile" +# Don't specify localhost due to poor support of IPv6. sudo /opt/vertica/sbin/install_vertica --hosts=127.0.0.1 --failure-threshold=NONE sudo mkdir /opt/vertica-data/ sudo chown dbadmin /opt/vertica-data/ @@ -23,8 +24,6 @@ both directories: /opt/vertica-data/ main menu exit -PS. Note that Vertica doesn't support IPv6. - How to prepare data ------------------- From cd734e6f06b7384fce7679289a6872db81bedeb9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2015 22:05:30 +0300 Subject: [PATCH 60/67] dbms: allowed constant arguments of aggregate functions and keys [#METRIKASUPP-5388]. --- dbms/src/Interpreters/Aggregator.cpp | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 9d0dc69c36e..100b846d5fb 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -557,14 +557,21 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result, for (size_t i = 0; i < aggregates_size; ++i) aggregate_columns[i].resize(aggregates[i].arguments.size()); + /** Константные столбцы не поддерживаются напрямую при агрегации. + * Чтобы они всё-равно работали, материализуем их. + */ + Columns materialized_columns; + /// Запоминаем столбцы, с которыми будем работать for (size_t i = 0; i < keys_size; ++i) { key_columns[i] = block.getByPosition(keys[i]).column; - if (key_columns[i]->isConst()) - throw Exception("Constants are not allowed as GROUP BY keys" - " (but all of them must be eliminated in ExpressionAnalyzer)", ErrorCodes::ILLEGAL_COLUMN); + if (const IColumnConst * column_const = dynamic_cast(key_columns[i])) + { + materialized_columns.push_back(column_const->convertToFullColumn()); + key_columns[i] = materialized_columns.back().get(); + } } for (size_t i = 0; i < aggregates_size; ++i) @@ -573,11 +580,11 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result, { aggregate_columns[i][j] = block.getByPosition(aggregates[i].arguments[j]).column; - /** Агрегатные функции рассчитывают, что в них передаются полноценные столбцы. - * Поэтому, стобцы-константы не разрешены в качестве аргументов агрегатных функций. - */ - if (aggregate_columns[i][j]->isConst()) - throw Exception("Constants are not allowed as arguments of aggregate functions", ErrorCodes::ILLEGAL_COLUMN); + if (const IColumnConst * column_const = dynamic_cast(aggregate_columns[i][j])) + { + materialized_columns.push_back(column_const->convertToFullColumn()); + aggregate_columns[i][j] = materialized_columns.back().get(); + } } } From 1618ea3dd7a00200ac74d59e6c621dc1cbde83f6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jun 2015 22:06:52 +0300 Subject: [PATCH 61/67] dbms: added test to prev. revision [#METRIKASUPP-5388]. --- ...00188_constants_as_arguments_of_aggregate_functions.reference | 1 + .../00188_constants_as_arguments_of_aggregate_functions.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00188_constants_as_arguments_of_aggregate_functions.reference create mode 100644 dbms/tests/queries/0_stateless/00188_constants_as_arguments_of_aggregate_functions.sql diff --git a/dbms/tests/queries/0_stateless/00188_constants_as_arguments_of_aggregate_functions.reference b/dbms/tests/queries/0_stateless/00188_constants_as_arguments_of_aggregate_functions.reference new file mode 100644 index 00000000000..c696b78280f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00188_constants_as_arguments_of_aggregate_functions.reference @@ -0,0 +1 @@ +10 10 1 diff --git a/dbms/tests/queries/0_stateless/00188_constants_as_arguments_of_aggregate_functions.sql b/dbms/tests/queries/0_stateless/00188_constants_as_arguments_of_aggregate_functions.sql new file mode 100644 index 00000000000..fd36cf6cc5e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00188_constants_as_arguments_of_aggregate_functions.sql @@ -0,0 +1 @@ +SELECT count(), sum(1), uniq(123) FROM (SELECT * FROM system.numbers LIMIT 10); From 5ed6a1d4461dec99058145cf26a1fa7469a1657d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 1 Jul 2015 08:18:54 +0300 Subject: [PATCH 62/67] dbms: QueryLog: development [#METR-16946]. --- dbms/include/DB/DataStreams/BlockIO.h | 2 + dbms/include/DB/Interpreters/QueryLog.h | 5 +- dbms/include/DB/Interpreters/Quota.h | 4 +- dbms/src/Interpreters/QueryLog.cpp | 50 ++-- dbms/src/Interpreters/Quota.cpp | 4 +- dbms/src/Interpreters/executeQuery.cpp | 291 +++++++++++++++--------- dbms/src/Server/HTTPHandler.cpp | 2 - 7 files changed, 218 insertions(+), 140 deletions(-) diff --git a/dbms/include/DB/DataStreams/BlockIO.h b/dbms/include/DB/DataStreams/BlockIO.h index e45ffeebd61..06d79cc0016 100644 --- a/dbms/include/DB/DataStreams/BlockIO.h +++ b/dbms/include/DB/DataStreams/BlockIO.h @@ -39,6 +39,8 @@ struct BlockIO { if (exception_callback) exception_callback(); + else + tryLogCurrentException(__PRETTY_FUNCTION__); } BlockIO & operator= (const BlockIO & rhs) diff --git a/dbms/include/DB/Interpreters/QueryLog.h b/dbms/include/DB/Interpreters/QueryLog.h index d573f6e7677..b6ade5d8fca 100644 --- a/dbms/include/DB/Interpreters/QueryLog.h +++ b/dbms/include/DB/Interpreters/QueryLog.h @@ -34,7 +34,8 @@ struct QueryLogElement SHUTDOWN = 0, /// Эта запись имеет служебное значение. QUERY_START = 1, QUERY_FINISH = 2, - EXCEPTION = 3, + EXCEPTION_BEFORE_START = 3, + EXCEPTION_WHILE_PROCESSING = 4, }; Type type = QUERY_START; @@ -51,6 +52,8 @@ struct QueryLogElement UInt64 result_rows{}; UInt64 result_bytes{}; + UInt64 memory_usage{}; + String query; String exception; diff --git a/dbms/include/DB/Interpreters/Quota.h b/dbms/include/DB/Interpreters/Quota.h index 4c03ecd41a6..ea8e86d8450 100644 --- a/dbms/include/DB/Interpreters/Quota.h +++ b/dbms/include/DB/Interpreters/Quota.h @@ -83,7 +83,7 @@ struct QuotaForInterval /// Увеличить соответствующее значение. void addQuery(time_t current_time, const String & quota_name); - void addError(time_t current_time, const String & quota_name); + void addError(time_t current_time, const String & quota_name) noexcept; /// Проверить, не превышена ли квота уже. Если превышена - кидает исключение. void checkExceeded(time_t current_time, const String & quota_name); @@ -139,7 +139,7 @@ public: void setMax(const QuotaForIntervals & quota); void addQuery(time_t current_time); - void addError(time_t current_time); + void addError(time_t current_time) noexcept; void checkExceeded(time_t current_time); diff --git a/dbms/src/Interpreters/QueryLog.cpp b/dbms/src/Interpreters/QueryLog.cpp index 8a317ae5d10..7e342b64343 100644 --- a/dbms/src/Interpreters/QueryLog.cpp +++ b/dbms/src/Interpreters/QueryLog.cpp @@ -189,6 +189,8 @@ Block QueryLog::createBlock() {new ColumnUInt64, new DataTypeUInt64, "result_rows"}, {new ColumnUInt64, new DataTypeUInt64, "result_bytes"}, + {new ColumnUInt64, new DataTypeUInt64, "memory_usage"}, + {new ColumnString, new DataTypeString, "query"}, {new ColumnString, new DataTypeString, "exception"}, {new ColumnString, new DataTypeString, "stack_trace"}, @@ -214,25 +216,6 @@ void QueryLog::flush() for (const QueryLogElement & elem : data) { - block.unsafeGetByPosition(0).column.get()->insert(static_cast(elem.type)); - block.unsafeGetByPosition(1).column.get()->insert(static_cast(date_lut.toDayNum(elem.event_time))); - block.unsafeGetByPosition(2).column.get()->insert(static_cast(elem.event_time)); - block.unsafeGetByPosition(3).column.get()->insert(static_cast(elem.query_start_time)); - block.unsafeGetByPosition(4).column.get()->insert(static_cast(elem.query_duration_ms)); - - block.unsafeGetByPosition(5).column.get()->insert(static_cast(elem.read_rows)); - block.unsafeGetByPosition(6).column.get()->insert(static_cast(elem.read_bytes)); - - block.unsafeGetByPosition(7).column.get()->insert(static_cast(elem.result_rows)); - block.unsafeGetByPosition(8).column.get()->insert(static_cast(elem.result_bytes)); - - block.unsafeGetByPosition(9).column.get()->insertData(elem.query.data(), elem.query.size()); - block.unsafeGetByPosition(10).column.get()->insertData(elem.exception.data(), elem.exception.size()); - block.unsafeGetByPosition(11).column.get()->insertData(elem.stack_trace.data(), elem.stack_trace.size()); - - block.unsafeGetByPosition(12).column.get()->insert(static_cast(elem.interface)); - block.unsafeGetByPosition(13).column.get()->insert(static_cast(elem.http_method)); - char ipv6_binary[16]; if (Poco::Net::IPAddress::IPv6 == elem.ip_address.family()) { @@ -249,10 +232,33 @@ void QueryLog::flush() else memset(ipv6_binary, 0, 16); - block.unsafeGetByPosition(14).column.get()->insertData(ipv6_binary, 16); + size_t i = 0; - block.unsafeGetByPosition(15).column.get()->insertData(elem.user.data(), elem.user.size()); - block.unsafeGetByPosition(16).column.get()->insertData(elem.query_id.data(), elem.query_id.size()); + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(elem.type)); + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(date_lut.toDayNum(elem.event_time))); + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(elem.event_time)); + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(elem.query_start_time)); + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(elem.query_duration_ms)); + + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(elem.read_rows)); + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(elem.read_bytes)); + + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(elem.result_rows)); + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(elem.result_bytes)); + + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(elem.memory_usage)); + + block.unsafeGetByPosition(i++).column.get()->insertData(elem.query.data(), elem.query.size()); + block.unsafeGetByPosition(i++).column.get()->insertData(elem.exception.data(), elem.exception.size()); + block.unsafeGetByPosition(i++).column.get()->insertData(elem.stack_trace.data(), elem.stack_trace.size()); + + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(elem.interface)); + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(elem.http_method)); + + block.unsafeGetByPosition(i++).column.get()->insertData(ipv6_binary, 16); + + block.unsafeGetByPosition(i++).column.get()->insertData(elem.user.data(), elem.user.size()); + block.unsafeGetByPosition(i++).column.get()->insertData(elem.query_id.data(), elem.query_id.size()); } BlockOutputStreamPtr stream = table->write(nullptr); diff --git a/dbms/src/Interpreters/Quota.cpp b/dbms/src/Interpreters/Quota.cpp index af59ddd2afa..9a253ca96ee 100644 --- a/dbms/src/Interpreters/Quota.cpp +++ b/dbms/src/Interpreters/Quota.cpp @@ -66,7 +66,7 @@ void QuotaForInterval::addQuery(time_t current_time, const String & quota_name) __sync_fetch_and_add(&used.queries, 1); } -void QuotaForInterval::addError(time_t current_time, const String & quota_name) +void QuotaForInterval::addError(time_t current_time, const String & quota_name) noexcept { __sync_fetch_and_add(&used.errors, 1); } @@ -181,7 +181,7 @@ void QuotaForIntervals::addQuery(time_t current_time) it->second.addQuery(current_time, name); } -void QuotaForIntervals::addError(time_t current_time) +void QuotaForIntervals::addError(time_t current_time) noexcept { for (Container::reverse_iterator it = cont.rbegin(); it != cont.rend(); ++it) it->second.addError(current_time, name); diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index d9eb60413d4..8e9e97b149f 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -43,6 +43,53 @@ static void logQuery(const String & query, const Context & context) } +static void setClientInfo(QueryLogElement & elem, Context & context) +{ + elem.interface = context.getInterface(); + elem.http_method = context.getHTTPMethod(); + elem.ip_address = context.getIPAddress(); + elem.user = context.getUser(); + elem.query_id = context.getCurrentQueryId(); +} + + +static void onExceptionBeforeStart(const String & query, Context & context, time_t current_time) +{ + /// Эксепшен до начала выполнения запроса. + context.getQuota().addError(current_time); + + bool log_queries = context.getSettingsRef().log_queries; + + /// Логгируем в таблицу начало выполнения запроса, если нужно. + if (log_queries) + { + QueryLogElement elem; + + elem.type = QueryLogElement::EXCEPTION_BEFORE_START; + + elem.event_time = current_time; + elem.query_start_time = current_time; + + elem.query = query; + elem.exception = getCurrentExceptionMessage(false); + + setClientInfo(elem, context); + + try + { + throw; + } + catch (const Exception & e) + { + elem.stack_trace = e.getStackTrace().toString(); + } + catch (...) {} + + context.getQueryLog().add(elem); + } +} + + static std::tuple executeQueryImpl( IParser::Pos begin, IParser::Pos end, @@ -50,9 +97,8 @@ static std::tuple executeQueryImpl( bool internal, QueryProcessingStage::Enum stage) { - /// TODO Логгировать здесь эксепшены, возникающие до начала выполнения запроса. - ProfileEvents::increment(ProfileEvents::Query); + time_t current_time = time(0); ParserQuery parser; ASTPtr ast; @@ -73,100 +119,103 @@ static std::tuple executeQueryImpl( catch (...) { /// Всё равно логгируем запрос. - logQuery(String(begin, begin + std::min(end - begin, static_cast(max_query_size))), context); + if (!internal) + { + String query = String(begin, begin + std::min(end - begin, static_cast(max_query_size))); + logQuery(query, context); + tryLogCurrentException(__PRETTY_FUNCTION__); + onExceptionBeforeStart(query, context, current_time); + } throw; } String query(begin, query_size); - - if (!internal) - logQuery(query, context); - - /// Проверка ограничений. - checkLimits(*ast, context.getSettingsRef().limits); - - QuotaForIntervals & quota = context.getQuota(); - time_t current_time = time(0); - - quota.checkExceeded(current_time); - - const Settings & settings = context.getSettingsRef(); - - /// Положим запрос в список процессов. Но запрос SHOW PROCESSLIST класть не будем. - ProcessList::EntryPtr process_list_entry; - if (!internal && nullptr == typeid_cast(&*ast)) - { - process_list_entry = context.getProcessList().insert( - query, context.getUser(), context.getCurrentQueryId(), context.getIPAddress(), - settings.limits.max_memory_usage, - settings.queue_max_wait_ms.totalMilliseconds(), - settings.replace_running_query, - settings.priority); - - context.setProcessListElement(&process_list_entry->get()); - } - BlockIO res; try { + if (!internal) + logQuery(query, context); + + /// Проверка ограничений. + checkLimits(*ast, context.getSettingsRef().limits); + + QuotaForIntervals & quota = context.getQuota(); + + quota.checkExceeded(current_time); + + const Settings & settings = context.getSettingsRef(); + + /// Положим запрос в список процессов. Но запрос SHOW PROCESSLIST класть не будем. + ProcessList::EntryPtr process_list_entry; + if (!internal && nullptr == typeid_cast(&*ast)) + { + process_list_entry = context.getProcessList().insert( + query, context.getUser(), context.getCurrentQueryId(), context.getIPAddress(), + settings.limits.max_memory_usage, + settings.queue_max_wait_ms.totalMilliseconds(), + settings.replace_running_query, + settings.priority); + + context.setProcessListElement(&process_list_entry->get()); + } + auto interpreter = InterpreterFactory::get(ast, context, stage); res = interpreter->execute(); /// Держим элемент списка процессов до конца обработки запроса. res.process_list_entry = process_list_entry; - } - catch (...) - { - quota.addError(current_time); /// TODO Было бы лучше добавить ещё в exception_callback - throw; - } - quota.addQuery(current_time); + quota.addQuery(current_time); - /// Всё, что связано с логом запросов. - { - QueryLogElement elem; - - elem.type = QueryLogElement::QUERY_START; - - elem.event_time = current_time; - elem.query_start_time = current_time; - - elem.query = query; - - elem.interface = context.getInterface(); - elem.http_method = context.getHTTPMethod(); - elem.ip_address = context.getIPAddress(); - elem.user = context.getUser(); - elem.query_id = context.getCurrentQueryId(); - - bool log_queries = settings.log_queries; - - /// Логгируем в таблицу начало выполнения запроса, если нужно. - if (log_queries) - context.getQueryLog().add(elem); - - /// Также дадим вызывающему коду в дальнейшем логгировать завершение запроса и эксепшен. - res.finish_callback = [elem, &context, log_queries] (IBlockInputStream & stream) mutable + /// Всё, что связано с логом запросов. { - elem.type = QueryLogElement::QUERY_FINISH; + QueryLogElement elem; - elem.event_time = time(0); - elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); /// Грубое время для запросов без profiling_stream; + elem.type = QueryLogElement::QUERY_START; - if (IProfilingBlockInputStream * profiling_stream = dynamic_cast(&stream)) + elem.event_time = current_time; + elem.query_start_time = current_time; + + elem.query = query; + + setClientInfo(elem, context); + + bool log_queries = settings.log_queries; + + /// Логгируем в таблицу начало выполнения запроса, если нужно. + if (log_queries) + context.getQueryLog().add(elem); + + /// Также дадим вызывающему коду в дальнейшем логгировать завершение запроса и эксепшен. + res.finish_callback = [elem, &context, log_queries] (IBlockInputStream & stream) mutable { - const BlockStreamProfileInfo & info = profiling_stream->getInfo(); + ProcessListElement * process_list_elem = context.getProcessListElement(); - double elapsed_seconds = info.total_stopwatch.elapsedSeconds(); /// TODO этот Stopwatch - coarse, использовать другой + if (!process_list_elem) + return; + + double elapsed_seconds = process_list_elem->watch.elapsedSeconds(); + + elem.type = QueryLogElement::QUERY_FINISH; + + elem.event_time = time(0); elem.query_duration_ms = elapsed_seconds * 1000; - stream.getLeafRowsBytes(elem.read_rows, elem.read_bytes); /// TODO неверно для распределённых запросов? + elem.read_rows = process_list_elem->progress.rows; + elem.read_bytes = process_list_elem->progress.bytes; - elem.result_rows = info.rows; - elem.result_bytes = info.bytes; + auto memory_usage = process_list_elem->memory_tracker.getPeak(); + elem.memory_usage = memory_usage > 0 ? memory_usage : 0; + + if (IProfilingBlockInputStream * profiling_stream = dynamic_cast(&stream)) + { + const BlockStreamProfileInfo & info = profiling_stream->getInfo(); + + elem.result_rows = info.rows; + elem.result_bytes = info.bytes; + } if (elem.read_rows != 0) { @@ -176,48 +225,70 @@ static std::tuple executeQueryImpl( << static_cast(elem.read_rows / elapsed_seconds) << " rows/sec., " << formatReadableSizeWithBinarySuffix(elem.read_bytes / elapsed_seconds) << "/sec."); } - } - if (log_queries) - context.getQueryLog().add(elem); - }; + if (log_queries) + context.getQueryLog().add(elem); + }; - res.exception_callback = [elem, &context, log_queries] () mutable - { - elem.type = QueryLogElement::EXCEPTION; - - elem.event_time = time(0); - elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); /// Низкая точность. Можно сделать лучше. - elem.exception = getCurrentExceptionMessage(false); - - /// Достаём стек трейс, если возможно. - try + res.exception_callback = [elem, &context, log_queries, current_time] () mutable { - throw; - } - catch (const Exception & e) + context.getQuota().addError(current_time); + + elem.type = QueryLogElement::EXCEPTION_WHILE_PROCESSING; + + elem.event_time = time(0); + elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); + elem.exception = getCurrentExceptionMessage(false); + + ProcessListElement * process_list_elem = context.getProcessListElement(); + + if (process_list_elem) + { + double elapsed_seconds = process_list_elem->watch.elapsedSeconds(); + + elem.query_duration_ms = elapsed_seconds * 1000; + + elem.read_rows = process_list_elem->progress.rows; + elem.read_bytes = process_list_elem->progress.bytes; + + auto memory_usage = process_list_elem->memory_tracker.getPeak(); + elem.memory_usage = memory_usage > 0 ? memory_usage : 0; + } + + /// Достаём стек трейс, если возможно. + try + { + throw; + } + catch (const Exception & e) + { + elem.stack_trace = e.getStackTrace().toString(); + + LOG_ERROR(&Logger::get("executeQuery"), elem.exception << ", Stack trace:\n\n" << elem.stack_trace); + } + catch (...) + { + LOG_ERROR(&Logger::get("executeQuery"), elem.exception); + } + + if (log_queries) + context.getQueryLog().add(elem); + }; + + if (!internal && res.in) { - elem.stack_trace = e.getStackTrace().toString(); - - LOG_ERROR(&Logger::get("executeQuery"), elem.exception << ", Stack trace:\n\n" << elem.stack_trace); + std::stringstream log_str; + log_str << "Query pipeline:\n"; + res.in->dumpTree(log_str); + LOG_DEBUG(&Logger::get("executeQuery"), log_str.str()); } - catch (...) - { - LOG_ERROR(&Logger::get("executeQuery"), elem.exception); - } - - if (log_queries) - context.getQueryLog().add(elem); - }; - - if (!internal && res.in) - { - std::stringstream log_str; - log_str << "Query pipeline:\n"; - res.in->dumpTree(log_str); - LOG_DEBUG(&Logger::get("executeQuery"), log_str.str()); } } + catch (...) + { + onExceptionBeforeStart(query, context, current_time); + throw; + } return std::make_tuple(ast, res); } @@ -274,7 +345,6 @@ void executeQuery( std::tie(ast, streams) = executeQueryImpl(begin, end, context, internal, stage); - bool exception = false; try { if (streams.out) @@ -325,12 +395,11 @@ void executeQuery( } catch (...) { - exception = true; streams.onException(); + throw; } - if (!exception) - streams.onFinish(); + streams.onFinish(); } } diff --git a/dbms/src/Server/HTTPHandler.cpp b/dbms/src/Server/HTTPHandler.cpp index 9d5e344077c..7c92edb2f15 100644 --- a/dbms/src/Server/HTTPHandler.cpp +++ b/dbms/src/Server/HTTPHandler.cpp @@ -146,9 +146,7 @@ void HTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net context.setHTTPMethod(http_method); - Stopwatch watch; executeQuery(*in, *used_output.out_maybe_compressed, context, query_plan); - watch.stop(); /// Если не было эксепшена и данные ещё не отправлены - отправляются HTTP заголовки с кодом 200. used_output.out->finalize(); From 4505af81b386c829efa8501d0da5391aef12c4e3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 1 Jul 2015 08:24:08 +0300 Subject: [PATCH 63/67] dbms: QueryLog: development [#METR-16946]. --- dbms/src/Interpreters/executeQuery.cpp | 1 - dbms/src/Server/HTTPHandler.cpp | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 8e9e97b149f..44f747da0f2 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -123,7 +123,6 @@ static std::tuple executeQueryImpl( { String query = String(begin, begin + std::min(end - begin, static_cast(max_query_size))); logQuery(query, context); - tryLogCurrentException(__PRETTY_FUNCTION__); onExceptionBeforeStart(query, context, current_time); } diff --git a/dbms/src/Server/HTTPHandler.cpp b/dbms/src/Server/HTTPHandler.cpp index 7c92edb2f15..7649e3e01ea 100644 --- a/dbms/src/Server/HTTPHandler.cpp +++ b/dbms/src/Server/HTTPHandler.cpp @@ -229,6 +229,7 @@ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne } catch (...) { + tryLogCurrentException(log); trySendExceptionToClient(getCurrentExceptionMessage(true), request, response, used_output); } } From f07967515b8648c9eabf29dbb02c52485e4b2415 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 1 Jul 2015 08:31:27 +0300 Subject: [PATCH 64/67] dbms: addition to prev. revision [#METR-16946]. --- dbms/include/DB/Common/MemoryTracker.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dbms/include/DB/Common/MemoryTracker.h b/dbms/include/DB/Common/MemoryTracker.h index 87f89ac6293..18132397eb7 100644 --- a/dbms/include/DB/Common/MemoryTracker.h +++ b/dbms/include/DB/Common/MemoryTracker.h @@ -38,6 +38,11 @@ public: { return amount; } + + Int64 getPeak() const + { + return peak; + } }; From 541f051cd146f1f5b80e8eeb7b1485290f14be3c Mon Sep 17 00:00:00 2001 From: Pavel Kartavyy Date: Wed, 1 Jul 2015 11:55:31 +0300 Subject: [PATCH 65/67] zkutil: remove ctor [#METR-17045] --- libs/libzkutil/include/zkutil/ZooKeeper.h | 1 - libs/libzkutil/src/ZooKeeper.cpp | 7 ------- 2 files changed, 8 deletions(-) diff --git a/libs/libzkutil/include/zkutil/ZooKeeper.h b/libs/libzkutil/include/zkutil/ZooKeeper.h index a402470e8bf..d7c7c077d01 100644 --- a/libs/libzkutil/include/zkutil/ZooKeeper.h +++ b/libs/libzkutil/include/zkutil/ZooKeeper.h @@ -47,7 +47,6 @@ public: */ ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name); - ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, int32_t session_timeout_ms); ~ZooKeeper(); diff --git a/libs/libzkutil/src/ZooKeeper.cpp b/libs/libzkutil/src/ZooKeeper.cpp index 06ed54d65d4..52b0a0eb545 100644 --- a/libs/libzkutil/src/ZooKeeper.cpp +++ b/libs/libzkutil/src/ZooKeeper.cpp @@ -128,13 +128,6 @@ ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std init(args.hosts, args.session_timeout_ms); } -ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration& config, const std::string& config_name, int32_t session_timeout_ms_) -{ - ZooKeeperArgs args(config, config_name); - init(args.hosts, session_timeout_ms_); -} - - void * ZooKeeper::watchForEvent(EventPtr event) { if (event) From d71b7ca1b020004b5fa7074355a757529b2e332b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 1 Jul 2015 23:14:23 +0300 Subject: [PATCH 66/67] dbms: better diagnostics [#METR-12542]. --- dbms/src/Storages/StorageLog.cpp | 10 +++++++++- dbms/src/Storages/StorageTinyLog.cpp | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/StorageLog.cpp b/dbms/src/Storages/StorageLog.cpp index 8527fd32f54..f4bc9b587ae 100644 --- a/dbms/src/Storages/StorageLog.cpp +++ b/dbms/src/Storages/StorageLog.cpp @@ -233,7 +233,15 @@ Block LogBlockInputStream::readImpl() else column.column = column.type->createColumn(); - readData(*it, *column.type, *column.column, max_rows_to_read, 0, read_offsets); + try + { + readData(*it, *column.type, *column.column, max_rows_to_read, 0, read_offsets); + } + catch (Exception & e) + { + e.addMessage("while reading column " + *it + " at " + storage.path + escapeForFileName(storage.name)); + throw; + } if (column.column->size()) res.insert(column); diff --git a/dbms/src/Storages/StorageTinyLog.cpp b/dbms/src/Storages/StorageTinyLog.cpp index c8c357ddad5..acde326fb58 100644 --- a/dbms/src/Storages/StorageTinyLog.cpp +++ b/dbms/src/Storages/StorageTinyLog.cpp @@ -198,7 +198,15 @@ Block TinyLogBlockInputStream::readImpl() else column.column = column.type->createColumn(); - readData(*it, *column.type, *column.column, block_size, 0, read_offsets); + try + { + readData(*it, *column.type, *column.column, block_size, 0, read_offsets); + } + catch (Exception & e) + { + e.addMessage("while reading column " + *it + " at " + storage.full_path()); + throw; + } if (column.column->size()) res.insert(column); From efddabd212f0eaa9f0fb683c143d021f2a45f4ec Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 1 Jul 2015 23:42:42 +0300 Subject: [PATCH 67/67] Merge --- dbms/src/Storages/StorageChunkMerger.cpp | 15 ++++++++------- dbms/src/Storages/StorageChunks.cpp | 3 --- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/StorageChunkMerger.cpp b/dbms/src/Storages/StorageChunkMerger.cpp index 964d4af9a7e..e314f127b08 100644 --- a/dbms/src/Storages/StorageChunkMerger.cpp +++ b/dbms/src/Storages/StorageChunkMerger.cpp @@ -545,7 +545,8 @@ bool StorageChunkMerger::MergeTask::mergeChunks(const Storages & chunks) { LOG_INFO(log, "Shutdown requested while merging chunks."); output->writeSuffix(); - new_storage.removeReference(); /// После этого временные данные удалятся. + output = nullptr; + executeQuery("DROP TABLE IF EXISTS " + new_table_full_name, context, true); return false; } @@ -575,14 +576,16 @@ bool StorageChunkMerger::MergeTask::mergeChunks(const Storages & chunks) /// Отцепляем исходную таблицу. Ее данные и метаданные остаются на диске. tables_to_drop.push_back(context.detachTable(chunk_merger.source_database, src_name)); - /// Создаем на ее месте ChunkRef. Это возможно только потому что у ChunkRef нет ни, ни метаданных. + /// Создаем на ее месте ChunkRef. Это возможно только потому что у ChunkRef нет ни данных, ни метаданных. try { - context.addTable(chunk_merger.source_database, src_name, StorageChunkRef::create(src_name, context, chunk_merger.source_database, new_table_name, false)); + context.addTable(chunk_merger.source_database, src_name, + StorageChunkRef::create(src_name, context, chunk_merger.source_database, new_table_name, false)); } catch (...) { - LOG_ERROR(log, "Chunk " + src_name + " was removed but not replaced. Its data is stored in table " << new_table_name << ". You may need to resolve this manually."); + LOG_ERROR(log, "Chunk " + src_name + " was removed but not replaced. Its data is stored in table " + << new_table_name << ". You may need to resolve this manually."); throw; } @@ -601,9 +604,6 @@ bool StorageChunkMerger::MergeTask::mergeChunks(const Storages & chunks) /// что-нибудь может сломаться. } - /// Сейчас на new_storage ссылаются таблицы типа ChunkRef. Удалим лишнюю ссылку, которая была при создании. - new_storage.removeReference(); - LOG_TRACE(log, "Merged chunks."); return true; @@ -613,6 +613,7 @@ bool StorageChunkMerger::MergeTask::mergeChunks(const Storages & chunks) Poco::ScopedLock lock(context.getMutex()); currently_written_groups.erase(new_table_full_name); + executeQuery("DROP TABLE IF EXISTS " + new_table_full_name, context, true); throw; } diff --git a/dbms/src/Storages/StorageChunks.cpp b/dbms/src/Storages/StorageChunks.cpp index 68ec39e358d..aa55c1b43c0 100644 --- a/dbms/src/Storages/StorageChunks.cpp +++ b/dbms/src/Storages/StorageChunks.cpp @@ -161,9 +161,6 @@ StorageChunks::StorageChunks( context(context_), log(&Logger::get("StorageChunks")) { - if (!attach) - reference_counter.add(1, true); - _table_column_name = "_table" + VirtualColumnUtils::chooseSuffix(getColumnsList(), "_table"); try