From 0045133b0e1d01c487b9f86ae1504c1f72c2cb00 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Oct 2014 22:33:52 +0400 Subject: [PATCH 01/29] dbms: prepared for fully-functional progress bar [#METR-2944]. --- dbms/include/DB/Common/formatReadable.h | 13 ++++ dbms/include/DB/Core/Defines.h | 1 + dbms/include/DB/Core/Progress.h | 62 ++++++++++++++++--- .../DB/DataStreams/IBlockInputStream.h | 9 +-- .../DataStreams/IProfilingBlockInputStream.h | 5 +- .../DB/DataStreams/RemoteBlockInputStream.h | 4 +- dbms/include/DB/Interpreters/ProcessList.h | 9 ++- .../MergeTree/MergeTreeBlockInputStream.h | 25 ++++++-- dbms/src/Client/Benchmark.cpp | 9 ++- dbms/src/Client/Client.cpp | 39 +++++++----- dbms/src/Client/Connection.cpp | 2 +- dbms/src/Common/MemoryTracker.cpp | 22 ++----- dbms/src/Common/formatReadable.cpp | 36 +++++++++++ .../IProfilingBlockInputStream.cpp | 39 +++++++----- dbms/src/Server/TCPHandler.cpp | 21 +++---- dbms/src/Server/TCPHandler.h | 29 +++++---- .../MergeTree/MergeTreeDataMerger.cpp | 12 ++-- dbms/src/Storages/StorageSystemProcesses.cpp | 11 ++-- 18 files changed, 232 insertions(+), 116 deletions(-) create mode 100644 dbms/include/DB/Common/formatReadable.h create mode 100644 dbms/src/Common/formatReadable.cpp diff --git a/dbms/include/DB/Common/formatReadable.h b/dbms/include/DB/Common/formatReadable.h new file mode 100644 index 00000000000..20fa17be915 --- /dev/null +++ b/dbms/include/DB/Common/formatReadable.h @@ -0,0 +1,13 @@ +#pragma once + +#include + + +/// Выводит переданный размер в байтах в виде 123.45 GiB. +std::string formatReadableSizeWithBinarySuffix(double value, int precision = 2); + +/// Выводит переданный размер в байтах в виде 132.55 GB. +std::string formatReadableSizeWithDecimalSuffix(double value, int precision = 2); + +/// Выводит число в виде 123.45 billion. +std::string formatReadableQuantity(double value, int precision = 2); diff --git a/dbms/include/DB/Core/Defines.h b/dbms/include/DB/Core/Defines.h index 210857bc679..950b48fb60d 100644 --- a/dbms/include/DB/Core/Defines.h +++ b/dbms/include/DB/Core/Defines.h @@ -64,6 +64,7 @@ #define DBMS_MIN_REVISION_WITH_TOTALS_EXTREMES 35265 #define DBMS_MIN_REVISION_WITH_STRING_QUERY_ID 39002 #define DBMS_MIN_REVISION_WITH_TEMPORARY_TABLES 50264 +#define DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS 51554 #define DBMS_DISTRIBUTED_DIRECTORY_MONITOR_SLEEP_TIME_MS 100 diff --git a/dbms/include/DB/Core/Progress.h b/dbms/include/DB/Core/Progress.h index 276cb1ab9d3..427e0b89977 100644 --- a/dbms/include/DB/Core/Progress.h +++ b/dbms/include/DB/Core/Progress.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -10,25 +11,72 @@ namespace DB { -/// Прогресс выполнения запроса +/** Прогресс выполнения запроса. + * Передаваемые по сети значения представляют собой разницу - сколько было сделано после предыдущего отправленного значения. + * Тот же объект используется для суммирования полученных значений. + */ struct Progress { - size_t rows; /// Строк обработано. - size_t bytes; /// Байт обработано. + size_t rows = 0; /// Строк обработано. + size_t bytes = 0; /// Байт обработано. - Progress() : rows(0), bytes(0) {} - Progress(size_t rows_, size_t bytes_) : rows(rows_), bytes(bytes_) {} + /** Сколько ещё строк надо обработать, приблизительно. Передаётся не ноль, когда возникает информация о какой-то новой части работы. + * Полученные значения надо суммровать, чтобы получить оценку общего количества строк для обработки. + * Используется для отображения прогресс-бара на клиенте. + */ + size_t total_rows = 0; - void read(ReadBuffer & in) + Progress() {} + Progress(size_t rows_, size_t bytes_, size_t total_rows_ = 0) + : rows(rows_), bytes(bytes_), total_rows(total_rows_) {} + + void read(ReadBuffer & in, UInt64 server_revision) { readVarUInt(rows, in); readVarUInt(bytes, in); + + if (server_revision >= DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS) + readVarUInt(total_rows, in); } - void write(WriteBuffer & out) + void write(WriteBuffer & out, UInt64 client_revision) const { writeVarUInt(rows, out); writeVarUInt(bytes, out); + + if (client_revision >= DBMS_MIN_REVISION_WITH_TOTAL_ROWS_IN_PROGRESS) + writeVarUInt(total_rows, out); + } + + void increment(const Progress & rhs) + { + rows += rhs.rows; + bytes += rhs.bytes; + total_rows += rhs.total_rows; + } + + /// Каждое значение по-отдельности изменяется атомарно. + void incrementPiecewiseAtomically(const Progress & rhs) + { + __sync_add_and_fetch(&rows, rhs.rows); + __sync_add_and_fetch(&bytes, rhs.bytes); + __sync_add_and_fetch(&total_rows, rhs.total_rows); + } + + void reset() + { + *this = Progress(); + } + + Progress fetchAndResetPiecewiseAtomically() + { + Progress res; + + res.rows = __sync_fetch_and_and(&rows, 0); + res.bytes = __sync_fetch_and_and(&bytes, 0); + res.total_rows = __sync_fetch_and_and(&total_rows, 0); + + return res; } }; diff --git a/dbms/include/DB/DataStreams/IBlockInputStream.h b/dbms/include/DB/DataStreams/IBlockInputStream.h index a63c3fee017..4dcfc940f2c 100644 --- a/dbms/include/DB/DataStreams/IBlockInputStream.h +++ b/dbms/include/DB/DataStreams/IBlockInputStream.h @@ -5,6 +5,7 @@ #include #include +#include #include @@ -18,7 +19,7 @@ using Poco::SharedPtr; * Функция принимает количество строк в последнем блоке, количество байт в последнем блоке. * Следует иметь ввиду, что колбэк может вызываться из разных потоков. */ -typedef std::function ProgressCallback; +typedef std::function ProgressCallback; /** Интерфейс потока для чтения данных по блокам из БД. @@ -29,9 +30,9 @@ class IBlockInputStream : private boost::noncopyable public: typedef SharedPtr BlockInputStreamPtr; typedef std::vector BlockInputStreams; - + IBlockInputStream() {} - + /** Прочитать следующий блок. * Если блоков больше нет - вернуть пустой блок (для которого operator bool возвращает false). */ @@ -61,7 +62,7 @@ public: virtual String getID() const = 0; BlockInputStreams & getChildren() { return children; } - + void dumpTree(std::ostream & ostr, size_t indent = 0, size_t multiplier = 1); /// Получить листовые источники (не считая этот). diff --git a/dbms/include/DB/DataStreams/IProfilingBlockInputStream.h b/dbms/include/DB/DataStreams/IProfilingBlockInputStream.h index 2204e8e8ad6..44cfd74557f 100644 --- a/dbms/include/DB/DataStreams/IProfilingBlockInputStream.h +++ b/dbms/include/DB/DataStreams/IProfilingBlockInputStream.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -109,8 +110,8 @@ public: * - проверяются ограничения и квоты, которые должны быть проверены не в рамках одного источника, * а над общим количеством потраченных ресурсов во всех источниках сразу (информация в ProcessList-е). */ - virtual void progress(size_t rows, size_t bytes) { progressImpl(rows, bytes); } - void progressImpl(size_t rows, size_t bytes); + virtual void progress(const Progress & value) { progressImpl(value); } + void progressImpl(const Progress & value); /** Установить указатель на элемент списка процессов. diff --git a/dbms/include/DB/DataStreams/RemoteBlockInputStream.h b/dbms/include/DB/DataStreams/RemoteBlockInputStream.h index eba575cec59..97ec7a437f5 100644 --- a/dbms/include/DB/DataStreams/RemoteBlockInputStream.h +++ b/dbms/include/DB/DataStreams/RemoteBlockInputStream.h @@ -66,7 +66,7 @@ public: /** Отменяем умолчальное уведомление о прогрессе, * так как колбэк прогресса вызывается самостоятельно. */ - void progress(size_t rows, size_t bytes) {} + void progress(const Progress & value) override {} void cancel() @@ -156,7 +156,7 @@ protected: * ограничений (например, минимальная скорость выполнения запроса) * и квот (например, на количество строчек для чтения). */ - progressImpl(packet.progress.rows, packet.progress.bytes); + progressImpl(packet.progress); if (!was_cancelled && !finished && isCancelled()) cancel(); diff --git a/dbms/include/DB/Interpreters/ProcessList.h b/dbms/include/DB/Interpreters/ProcessList.h index d3de25ea431..75d54114de1 100644 --- a/dbms/include/DB/Interpreters/ProcessList.h +++ b/dbms/include/DB/Interpreters/ProcessList.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -35,8 +36,7 @@ public: Stopwatch watch; - volatile size_t rows_processed = 0; - volatile size_t bytes_processed = 0; + Progress progress; MemoryTracker memory_tracker; @@ -56,10 +56,9 @@ public: current_memory_tracker = nullptr; } - bool update(size_t rows, size_t bytes) volatile + bool update(const Progress & value) { - __sync_add_and_fetch(&rows_processed, rows); - __sync_add_and_fetch(&bytes_processed, bytes); + progress.incrementPiecewiseAtomically(value); return !is_cancelled; } }; diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h b/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h index 64396a5aeef..c339a233b13 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h @@ -68,8 +68,16 @@ public: columns = owned_data_part->columns.addTypes(column_names); } + /// Оценим общее количество строк - для прогресс-бара. + for (const auto & range : all_mark_ranges) + total_rows += range.end - range.begin; + total_rows *= storage.index_granularity; + LOG_TRACE(log, "Reading " << all_mark_ranges.size() << " ranges from part " << owned_data_part->name - << ", up to " << (all_mark_ranges.back().end - all_mark_ranges.front().begin) * storage.index_granularity + << ", approx. " << total_rows + << (all_mark_ranges.size() > 1 + ? ", up to " + toString((all_mark_ranges.back().end - all_mark_ranges.front().begin) * storage.index_granularity) + : "") << " rows starting from " << all_mark_ranges.front().begin * storage.index_granularity); } @@ -97,7 +105,7 @@ public: protected: /// Будем вызывать progressImpl самостоятельно. - void progress(size_t rows, size_t bytes) {} + void progress(const Progress & value) override {} Block readImpl() { @@ -108,6 +116,10 @@ protected: if (!reader) { + /// Отправим информацию о том, что собираемся читать примерно столько-то строк. + /// NOTE В конструкторе это делать не получилось бы, потому что тогда ещё не установлен progress_callback. + progressImpl(Progress(0, 0, total_rows)); + UncompressedCache * uncompressed_cache = use_uncompressed_cache ? storage.context.getUncompressedCache() : NULL; reader.reset(new MergeTreeReader(path, owned_data_part->name, columns, uncompressed_cache, storage, all_mark_ranges)); if (prewhere_actions) @@ -135,7 +147,7 @@ protected: if (range.begin == range.end) remaining_mark_ranges.pop_back(); } - progressImpl(res.rows(), res.bytes()); + progressImpl(Progress(res.rows(), res.bytes())); pre_reader->fillMissingColumns(res); /// Вычислим выражение в PREWHERE. @@ -164,7 +176,7 @@ protected: reader->readRange(range.begin, range.end, res); } - progressImpl(0, res.bytes() - pre_bytes); + progressImpl(Progress(0, res.bytes() - pre_bytes)); } else if (ColumnUInt8 * column_vec = typeid_cast(&*column)) { @@ -216,7 +228,7 @@ protected: continue; } - progressImpl(0, res.bytes() - pre_bytes); + progressImpl(Progress(0, res.bytes() - pre_bytes)); post_filter.resize(post_filter_pos); @@ -259,7 +271,7 @@ protected: remaining_mark_ranges.pop_back(); } - progressImpl(res.rows(), res.bytes()); + progressImpl(Progress(res.rows(), res.bytes())); reader->fillMissingColumns(res); } @@ -297,6 +309,7 @@ private: ExpressionActionsPtr prewhere_actions; String prewhere_column; bool remove_prewhere_column; + size_t total_rows = 0; /// Приблизительное общее количество строк - для прогресс-бара. Logger * log; }; diff --git a/dbms/src/Client/Benchmark.cpp b/dbms/src/Client/Benchmark.cpp index a079b90059b..46b5c62f680 100644 --- a/dbms/src/Client/Benchmark.cpp +++ b/dbms/src/Client/Benchmark.cpp @@ -239,9 +239,8 @@ private: Stopwatch watch; RemoteBlockInputStream stream(connection, query, nullptr); - size_t read_rows = 0; - size_t read_bytes = 0; - stream.setProgressCallback([&](size_t rows_inc, size_t bytes_inc) { read_rows += rows_inc; read_bytes += bytes_inc; }); + Progress progress; + stream.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); }); stream.readPrefix(); while (Block block = stream.read()) @@ -253,8 +252,8 @@ private: double seconds = watch.elapsedSeconds(); Poco::ScopedLock lock(mutex); - info_per_interval.add(seconds, read_rows, read_bytes, info.rows, info.bytes); - info_total.add(seconds, read_rows, read_bytes, info.rows, info.bytes); + info_per_interval.add(seconds, progress.rows, progress.bytes, info.rows, info.bytes); + info_total.add(seconds, progress.rows, progress.bytes, info.rows, info.bytes); } diff --git a/dbms/src/Client/Client.cpp b/dbms/src/Client/Client.cpp index ab282a9339f..841b0ea57e0 100644 --- a/dbms/src/Client/Client.cpp +++ b/dbms/src/Client/Client.cpp @@ -28,6 +28,8 @@ #include #include +#include + #include #include #include @@ -121,8 +123,9 @@ private: Stopwatch watch; - size_t rows_read_on_server = 0; - size_t bytes_read_on_server = 0; + /// С сервера периодически приходит информация, о том, сколько прочитано данных за прошедшее время. + Progress progress; + size_t written_progress_chars = 0; bool written_first_block = false; @@ -470,8 +473,7 @@ private: return true; processed_rows = 0; - rows_read_on_server = 0; - bytes_read_on_server = 0; + progress.reset(); written_progress_chars = 0; written_first_block = false; @@ -511,7 +513,7 @@ private: std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << watch.elapsedSeconds() << " sec. "; - if (rows_read_on_server >= 1000) + if (progress.rows >= 1000) writeFinalProgress(); std::cout << std::endl << std::endl; @@ -809,11 +811,9 @@ private: } - void onProgress(const Progress & progress) + void onProgress(const Progress & value) { - rows_read_on_server += progress.rows; - bytes_read_on_server += progress.bytes; - + progress.increment(value); writeProgress(); } @@ -851,13 +851,20 @@ private: std::stringstream message; message << indicators[increment % 8] << std::fixed << std::setprecision(3) - << " Progress: " << rows_read_on_server << " rows, " << bytes_read_on_server / 1000000.0 << " MB"; + << " Progress: "; + + if (progress.total_rows) + message << (100.0 * progress.rows / progress.total_rows) << "%, "; + + message + << formatReadableQuantity(progress.rows) << " rows, " + << formatReadableSizeWithDecimalSuffix(progress.bytes); size_t elapsed_ns = watch.elapsed(); if (elapsed_ns) message << " (" - << rows_read_on_server * 1000000000.0 / elapsed_ns << " rows/s., " - << bytes_read_on_server * 1000.0 / elapsed_ns << " MB/s.) "; + << formatReadableQuantity(progress.rows * 1000000000.0 / elapsed_ns) << " rows/s., " + << formatReadableSizeWithDecimalSuffix(progress.bytes * 1000000000.0 / elapsed_ns) << "/s.) "; else message << ". "; @@ -869,13 +876,15 @@ private: void writeFinalProgress() { - std::cout << "Processed " << rows_read_on_server << " rows, " << bytes_read_on_server / 1000000.0 << " MB"; + std::cout << "Processed " + << formatReadableQuantity(progress.rows) << " rows, " + << formatReadableSizeWithDecimalSuffix(progress.bytes); size_t elapsed_ns = watch.elapsed(); if (elapsed_ns) std::cout << " (" - << rows_read_on_server * 1000000000.0 / elapsed_ns << " rows/s., " - << bytes_read_on_server * 1000.0 / elapsed_ns << " MB/s.) "; + << formatReadableQuantity(progress.rows * 1000000000.0 / elapsed_ns) << " rows/s., " + << formatReadableSizeWithDecimalSuffix(progress.bytes * 1000000000.0 / elapsed_ns) << "/s.) "; else std::cout << ". "; } diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 54953cc6650..4242d451702 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -470,7 +470,7 @@ Progress Connection::receiveProgress() //LOG_TRACE(log_wrapper.get(), "Receiving progress (" << getServerAddress() << ")"); Progress progress; - progress.read(*in); + progress.read(*in, server_revision); return progress; } diff --git a/dbms/src/Common/MemoryTracker.cpp b/dbms/src/Common/MemoryTracker.cpp index ed70a164ebb..ac514b4517a 100644 --- a/dbms/src/Common/MemoryTracker.cpp +++ b/dbms/src/Common/MemoryTracker.cpp @@ -1,30 +1,16 @@ #include #include #include +#include #include #include #include -static std::string formatReadableSize(double size) -{ - const char* units[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}; - size_t i = 0; - while (i + 1 < sizeof(units) / sizeof(units[0]) && - fabs(size) >= 1024) - { - size /= 1024; - ++i; - } - std::stringstream ss; - ss << std::fixed << std::setprecision(i) << size << ' ' << units[i]; - return ss.str(); -} - MemoryTracker::~MemoryTracker() { - LOG_DEBUG(&Logger::get("MemoryTracker"), "Peak memory usage for query: " << formatReadableSize(peak) << "."); + LOG_DEBUG(&Logger::get("MemoryTracker"), "Peak memory usage for query: " << formatReadableSizeWithBinarySuffix(peak) << "."); } void MemoryTracker::alloc(Int64 size) @@ -34,9 +20,9 @@ void MemoryTracker::alloc(Int64 size) if (unlikely(limit && will_be > limit)) { free(size); - throw DB::Exception("Memory limit exceeded: would use " + formatReadableSize(will_be) + "" + throw DB::Exception("Memory limit exceeded: would use " + formatReadableSizeWithBinarySuffix(will_be) + "" " (attempt to allocate chunk of " + DB::toString(size) + " bytes)" - ", maximum: " + formatReadableSize(limit), DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED); + ", maximum: " + formatReadableSizeWithBinarySuffix(limit), DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED); } if (will_be > peak) diff --git a/dbms/src/Common/formatReadable.cpp b/dbms/src/Common/formatReadable.cpp new file mode 100644 index 00000000000..85d294fe92f --- /dev/null +++ b/dbms/src/Common/formatReadable.cpp @@ -0,0 +1,36 @@ +#include +#include +#include + +#include + + +static std::string formatReadable(double size, int precision, const char ** units, size_t units_size, double delimiter) +{ + size_t i = 0; + for (; i + 1 < units_size && fabs(size) >= delimiter; ++i) + size /= delimiter; + + std::stringstream ss; + ss << std::fixed << std::setprecision(precision) << size << units[i]; + return ss.str(); +} + + +std::string formatReadableSizeWithBinarySuffix(double value, int precision) +{ + const char * units[] = {" B", " KiB", " MiB", " GiB", " TiB", " PiB", " EiB", " ZiB", " YiB"}; + return formatReadable(value, precision, units, sizeof(units) / sizeof(units[0]), 1024); +} + +std::string formatReadableSizeWithDecimalSuffix(double value, int precision) +{ + const char * units[] = {" B", " KB", " MB", " GB", " TB", " PB", " EB", " ZB", " YB"}; + return formatReadable(value, precision, units, sizeof(units) / sizeof(units[0]), 1000); +} + +std::string formatReadableQuantity(double value, int precision) +{ + const char * units[] = {"", " thousand", " million", " billion", " trillion", " quadrillion"}; + return formatReadable(value, precision, units, sizeof(units) / sizeof(units[0]), 1000); +} diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp index d26a0147590..05b80511cc0 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp @@ -166,7 +166,7 @@ Block IProfilingBlockInputStream::read() cancel(); } - progress(res.rowsInFirstColumn(), res.bytes()); + progress(Progress(res.rowsInFirstColumn(), res.bytes())); return res; } @@ -295,36 +295,45 @@ void IProfilingBlockInputStream::checkQuota(Block & block) } -void IProfilingBlockInputStream::progressImpl(size_t rows, size_t bytes) +void IProfilingBlockInputStream::progressImpl(const Progress & value) { /// Данные для прогресса берутся из листовых источников. if (children.empty()) { if (progress_callback) - progress_callback(rows, bytes); + progress_callback(value); if (process_list_elem) { - if (!process_list_elem->update(rows, bytes)) + if (!process_list_elem->update(value)) cancel(); - /// Общее количество данных, обработанных во всех листовых источниках, возможно, на удалённых серверах. + /// Общее количество данных, обработанных или предполагаемых к обработке во всех листовых источниках, возможно, на удалённых серверах. - size_t total_rows = process_list_elem->rows_processed; - size_t total_bytes = process_list_elem->bytes_processed; + size_t rows_processed = process_list_elem->progress.rows; + size_t bytes_processed = process_list_elem->progress.bytes; + + size_t total_rows_estimate = std::max(process_list_elem->progress.rows, process_list_elem->progress.total_rows); /** Проверяем ограничения на объём данных для чтения, скорость выполнения запроса, квоту на объём данных для чтения. * NOTE: Может быть, имеет смысл сделать, чтобы они проверялись прямо в ProcessList? */ if (limits.mode == LIMITS_TOTAL - && ((limits.max_rows_to_read && total_rows > limits.max_rows_to_read) - || (limits.max_bytes_to_read && total_bytes > limits.max_bytes_to_read))) + && ((limits.max_rows_to_read && total_rows_estimate > limits.max_rows_to_read) + || (limits.max_bytes_to_read && bytes_processed > limits.max_bytes_to_read))) { if (limits.read_overflow_mode == OverflowMode::THROW) - throw Exception("Limit for rows to read exceeded: read " + toString(total_rows) - + " rows, maximum: " + toString(limits.max_rows_to_read), - ErrorCodes::TOO_MUCH_ROWS); + { + if (limits.max_rows_to_read && total_rows_estimate > limits.max_rows_to_read) + throw Exception("Limit for rows to read exceeded: " + toString(total_rows_estimate) + + " rows read (or to read), maximum: " + toString(limits.max_rows_to_read), + ErrorCodes::TOO_MUCH_ROWS); + else + throw Exception("Limit for (uncompressed) bytes to read exceeded: " + toString(bytes_processed) + + " bytes read, maximum: " + toString(limits.max_bytes_to_read), + ErrorCodes::TOO_MUCH_ROWS); + } else if (limits.read_overflow_mode == OverflowMode::BREAK) cancel(); else @@ -336,9 +345,9 @@ void IProfilingBlockInputStream::progressImpl(size_t rows, size_t bytes) double total_elapsed = info.total_stopwatch.elapsedSeconds(); if (total_elapsed > limits.timeout_before_checking_execution_speed.totalMicroseconds() / 1000000.0 - && total_rows / total_elapsed < limits.min_execution_speed) + && rows_processed / total_elapsed < limits.min_execution_speed) { - throw Exception("Query is executing too slow: " + toString(total_rows / total_elapsed) + throw Exception("Query is executing too slow: " + toString(rows_processed / total_elapsed) + " rows/sec., minimum: " + toString(limits.min_execution_speed), ErrorCodes::TOO_SLOW); } @@ -346,7 +355,7 @@ void IProfilingBlockInputStream::progressImpl(size_t rows, size_t bytes) if (quota != nullptr && limits.mode == LIMITS_TOTAL) { - quota->checkAndAddReadRowsBytes(time(0), rows, bytes); + quota->checkAndAddReadRowsBytes(time(0), value.rows, value.bytes); } } } diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp index aa87e7bf14e..f77fa9202cf 100644 --- a/dbms/src/Server/TCPHandler.cpp +++ b/dbms/src/Server/TCPHandler.cpp @@ -1,7 +1,5 @@ #include -#include - #include #include @@ -85,9 +83,7 @@ void TCPHandler::runImpl() sendHello(); - connection_context.setProgressCallback([this] (const size_t rows, const size_t bytes) { - return this->updateProgress(rows, bytes); - }); + connection_context.setProgressCallback([this] (const Progress & value) { return this->updateProgress(value); }); while (1) { @@ -125,6 +121,7 @@ void TCPHandler::runImpl() /// Очищаем, так как, получая данные внешних таблиц, мы получили пустой блок. /// А значит, stream помечен как cancelled и читать из него нельзя. state.block_in = nullptr; + state.maybe_compressed_in = nullptr; /// Для более корректного учёта MemoryTracker-ом. /// Обрабатываем Query state.io = executeQuery(state.query, query_context, false, state.stage); @@ -286,7 +283,7 @@ void TCPHandler::processOrdinaryQuery() } else { - if (state.rows_processed && after_send_progress.elapsed() / 1000 >= query_context.getSettingsRef().interactive_delay) + if (state.progress.rows && after_send_progress.elapsed() / 1000 >= query_context.getSettingsRef().interactive_delay) { /// Прошло некоторое время и есть прогресс. after_send_progress.restart(); @@ -691,21 +688,17 @@ void TCPHandler::sendEndOfStream() } -void TCPHandler::updateProgress(size_t rows, size_t bytes) +void TCPHandler::updateProgress(const Progress & value) { - __sync_fetch_and_add(&state.rows_processed, rows); - __sync_fetch_and_add(&state.bytes_processed, bytes); + state.progress.incrementPiecewiseAtomically(value); } void TCPHandler::sendProgress() { - size_t rows_processed = __sync_fetch_and_and(&state.rows_processed, 0); - size_t bytes_processed = __sync_fetch_and_and(&state.bytes_processed, 0); - writeVarUInt(Protocol::Server::Progress, *out); - Progress progress(rows_processed, bytes_processed); - progress.write(*out); + Progress increment = state.progress.fetchAndResetPiecewiseAtomically(); + increment.write(*out, client_revision); out->next(); } diff --git a/dbms/src/Server/TCPHandler.h b/dbms/src/Server/TCPHandler.h index 83928aed0bf..c05ff199f9d 100644 --- a/dbms/src/Server/TCPHandler.h +++ b/dbms/src/Server/TCPHandler.h @@ -23,8 +23,8 @@ struct QueryState /// Идентификатор запроса. String query_id; - QueryProcessingStage::Enum stage; - Protocol::Compression::Enum compression; + QueryProcessingStage::Enum stage = QueryProcessingStage::Complete; + Protocol::Compression::Enum compression = Protocol::Compression::Disable; /// Откуда читать данные для INSERT-а. SharedPtr maybe_compressed_in; @@ -40,24 +40,29 @@ struct QueryState BlockIO io; /// Отменен ли запрос - bool is_cancelled; + bool is_cancelled = false; /// Пустой или нет - bool is_empty; + bool is_empty = true; /// Данные были отправлены. - bool sent_all_data; + bool sent_all_data = false; /// Запрос на вставку или нет. - bool is_insert; + bool is_insert = false; /// Для вывода прогресса - разница после предыдущей отправки прогресса. - volatile size_t rows_processed; - volatile size_t bytes_processed; + Progress progress; - QueryState() : query_id(""), stage(QueryProcessingStage::Complete), compression(Protocol::Compression::Disable), - is_cancelled(false), is_empty(true), sent_all_data(false), is_insert(false), rows_processed(0), bytes_processed(0) {} - void reset() { + /** process_list_entry также включает/выключает учёт памяти MemoryTracker-ом. + * Члены maybe_compressed_in, block_in, maybe_compressed_out, block_out + * могли быть инициализированы до io, и выделенная в них память могла не быть учтена MemoryTracker-ом. + * Если эти члены будут уничтожены раньше, то освобождение памяти будет учтено MemoryTracker-ом, + * и вычисленный расход памяти может оказаться отрицательным (это не проблема, но некрасиво). + * Поэтому, сначала уничтожим process_list_entry. + */ + io.process_list_entry = nullptr; + *this = QueryState(); } @@ -133,7 +138,7 @@ private: bool isQueryCancelled(); /// Эта функция вызывается из разных потоков. - void updateProgress(size_t rows, size_t bytes); + void updateProgress(const Progress & value); /// Вывести информацию о скорости выполнения SELECT запроса. void logProfileInfo(Stopwatch & watch, IBlockInputStream & in); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index 5c305c85b7f..761e1e77eeb 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -350,11 +350,13 @@ MergeTreeData::DataPartPtr MergeTreeDataMerger::mergeParts( auto input = stdext::make_unique( data.getFullPath() + parts[i]->name + '/', DEFAULT_MERGE_BLOCK_SIZE, union_column_names, data, parts[i], ranges, false, nullptr, ""); - input->setProgressCallback([&merge_entry, rows_total] (const std::size_t rows, const std::size_t bytes) { - const auto new_rows_read = __sync_add_and_fetch(&merge_entry->rows_read, rows); - merge_entry->progress = static_cast(new_rows_read) / rows_total; - __sync_add_and_fetch(&merge_entry->bytes_read_uncompressed, bytes); - }); + + input->setProgressCallback([&merge_entry, rows_total] (const Progress & value) + { + const auto new_rows_read = __sync_add_and_fetch(&merge_entry->rows_read, value.rows); + merge_entry->progress = static_cast(new_rows_read) / rows_total; + __sync_add_and_fetch(&merge_entry->bytes_read_uncompressed, value.bytes); + }); src_streams.push_back(new ExpressionBlockInputStream(input.release(), data.getPrimaryExpression())); sum_rows_approx += parts[i]->size * data.index_granularity; diff --git a/dbms/src/Storages/StorageSystemProcesses.cpp b/dbms/src/Storages/StorageSystemProcesses.cpp index d0e61752dd0..e9a92de1572 100644 --- a/dbms/src/Storages/StorageSystemProcesses.cpp +++ b/dbms/src/Storages/StorageSystemProcesses.cpp @@ -17,6 +17,7 @@ StorageSystemProcesses::StorageSystemProcesses(const std::string & name_, const { "elapsed", new DataTypeFloat64 }, { "rows_read", new DataTypeUInt64 }, { "bytes_read", new DataTypeUInt64 }, + { "total_rows_approx", new DataTypeUInt64 }, { "memory_usage", new DataTypeUInt64 }, { "query", new DataTypeString }, { "query_id", new DataTypeString } @@ -42,20 +43,19 @@ BlockInputStreams StorageSystemProcesses::read( ColumnWithNameAndType col_elapsed{new ColumnFloat64, new DataTypeFloat64, "elapsed"}; ColumnWithNameAndType col_rows_read{new ColumnUInt64, new DataTypeUInt64, "rows_read"}; ColumnWithNameAndType col_bytes_read{new ColumnUInt64, new DataTypeUInt64, "bytes_read"}; + ColumnWithNameAndType col_total_rows_approx{new ColumnUInt64, new DataTypeUInt64, "total_rows_approx"}; ColumnWithNameAndType col_memory_usage{new ColumnUInt64, new DataTypeUInt64, "memory_usage"}; ColumnWithNameAndType col_query{new ColumnString, new DataTypeString, "query"}; ColumnWithNameAndType col_query_id{new ColumnString, new DataTypeString, "query_id"}; for (const auto & process : context.getProcessList().get()) { - const size_t rows_read = process.rows_processed; - const size_t bytes_read = process.bytes_processed; - col_user.column->insert(process.user); col_address.column->insert(process.ip_address.toString()); col_elapsed.column->insert(process.watch.elapsedSeconds()); - col_rows_read.column->insert(rows_read); - col_bytes_read.column->insert(bytes_read); + col_rows_read.column->insert(process.progress.rows); + col_bytes_read.column->insert(process.progress.bytes); + col_total_rows_approx.column->insert(process.progress.total_rows); col_memory_usage.column->insert(static_cast(process.memory_tracker.get())); col_query.column->insert(process.query); col_query_id.column->insert(process.query_id); @@ -67,6 +67,7 @@ BlockInputStreams StorageSystemProcesses::read( col_elapsed, col_rows_read, col_bytes_read, + col_total_rows_approx, col_memory_usage, col_query, col_query_id From 08e93dbbd4180a4b76a102158479327fdaef7840 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Oct 2014 00:27:37 +0400 Subject: [PATCH 02/29] dbms: Client: showing progress bar for long-running queries [#METR-2944]. --- dbms/include/DB/Common/UnicodeBar.h | 62 +++++++++++++++++++ .../DB/Functions/FunctionsMiscellaneous.h | 56 +++-------------- dbms/src/Client/Client.cpp | 41 +++++++++--- 3 files changed, 104 insertions(+), 55 deletions(-) create mode 100644 dbms/include/DB/Common/UnicodeBar.h diff --git a/dbms/include/DB/Common/UnicodeBar.h b/dbms/include/DB/Common/UnicodeBar.h new file mode 100644 index 00000000000..8e53d6881d5 --- /dev/null +++ b/dbms/include/DB/Common/UnicodeBar.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include + + +#define UNICODE_BAR_CHAR_SIZE (strlen("█")) + + +/** Позволяет нарисовать unicode-art полоску, ширина которой отображается с разрешением 1/8 символа. + */ + + +namespace UnicodeBar +{ + template + double getWidth(T x, int64_t min, int64_t max, double max_width) + { + if (x <= min) + return 0; + + if (x >= max) + return max_width; + + return (x - min) * max_width / (max - min); + } + + inline size_t getWidthInBytes(double width) + { + return ceil(width - 1.0 / 8) * UNICODE_BAR_CHAR_SIZE; + } + + /// В dst должно быть место для barWidthInBytes(width) символов и завершающего нуля. + inline void render(double width, char * dst) + { + size_t floor_width = floor(width); + + for (size_t i = 0; i < floor_width; ++i) + { + memcpy(dst, "█", UNICODE_BAR_CHAR_SIZE); + dst += UNICODE_BAR_CHAR_SIZE; + } + + size_t remainder = floor((width - floor_width) * 8); + + if (remainder) + { + memcpy(dst, &"▏▎▍▌▋▋▊▉"[(remainder - 1) * UNICODE_BAR_CHAR_SIZE], UNICODE_BAR_CHAR_SIZE); + dst += UNICODE_BAR_CHAR_SIZE; + } + + *dst = 0; + } + + inline std::string render(double width) + { + std::string res(getWidthInBytes(width), '\0'); + render(width, &res[0]); + return res; + } +} diff --git a/dbms/include/DB/Functions/FunctionsMiscellaneous.h b/dbms/include/DB/Functions/FunctionsMiscellaneous.h index 088ce7a2127..902a8c10e4e 100644 --- a/dbms/include/DB/Functions/FunctionsMiscellaneous.h +++ b/dbms/include/DB/Functions/FunctionsMiscellaneous.h @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -672,47 +673,6 @@ private: return apply_visitor(FieldVisitorConvertToNumber(), column[0]); } - static constexpr size_t BAR_CHAR_SIZE = strlen("█"); - - template - static Float64 barWidth(T x, Int64 min, Int64 max, Float64 max_width) - { - if (x <= min) - return 0; - - if (x >= max) - return max_width; - - return (x - min) * max_width / (max - min); - } - - static size_t barWidthInBytes(Float64 width) - { - return ceil(width - 1.0 / 8) * BAR_CHAR_SIZE; - } - - /// В dst должно быть место для barWidthInBytes(width) символов и завершающего нуля. - static void renderBar(Float64 width, char * dst) - { - size_t floor_width = floor(width); - - for (size_t i = 0; i < floor_width; ++i) - { - memcpy(dst, "█", BAR_CHAR_SIZE); - dst += BAR_CHAR_SIZE; - } - - size_t remainder = floor((width - floor_width) * 8); - - if (remainder) - { - memcpy(dst, &"▏▎▍▌▋▋▊▉"[(remainder - 1) * BAR_CHAR_SIZE], BAR_CHAR_SIZE); - dst += BAR_CHAR_SIZE; - } - - *dst = 0; - } - template static void fill(const PODArray & src, ColumnString::Chars_t & dst_chars, ColumnString::Offsets_t & dst_offsets, Int64 min, Int64 max, Float64 max_width) @@ -721,14 +681,14 @@ private: size_t current_offset = 0; dst_offsets.resize(size); - dst_chars.reserve(size * (barWidthInBytes(max_width) + 1)); /// строки 0-terminated. + dst_chars.reserve(size * (UnicodeBar::getWidthInBytes(max_width) + 1)); /// строки 0-terminated. for (size_t i = 0; i < size; ++i) { - Float64 width = barWidth(src[i], min, max, max_width); - size_t next_size = current_offset + barWidthInBytes(width) + 1; + Float64 width = UnicodeBar::getWidth(src[i], min, max, max_width); + size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1; dst_chars.resize(next_size); - renderBar(width, reinterpret_cast(&dst_chars[current_offset])); + UnicodeBar::render(width, reinterpret_cast(&dst_chars[current_offset])); current_offset = next_size; dst_offsets[i] = current_offset; } @@ -738,9 +698,9 @@ private: static void fill(T src, String & dst_chars, Int64 min, Int64 max, Float64 max_width) { - Float64 width = barWidth(src, min, max, max_width); - dst_chars.resize(barWidthInBytes(width)); - renderBar(width, &dst_chars[0]); + Float64 width = UnicodeBar::getWidth(src, min, max, max_width); + dst_chars.resize(UnicodeBar::getWidthInBytes(width)); + UnicodeBar::render(width, &dst_chars[0]); } template diff --git a/dbms/src/Client/Client.cpp b/dbms/src/Client/Client.cpp index 841b0ea57e0..18f10e42f21 100644 --- a/dbms/src/Client/Client.cpp +++ b/dbms/src/Client/Client.cpp @@ -28,8 +28,6 @@ #include #include -#include - #include #include #include @@ -50,6 +48,8 @@ #include "InterruptListener.h" #include +#include +#include /// http://en.wikipedia.org/wiki/ANSI_escape_code @@ -88,6 +88,8 @@ private: bool is_interactive = true; /// Использовать readline интерфейс или batch режим. bool stdin_is_not_tty = false; /// stdin - не терминал. + winsize terminal_size {}; /// Размер терминала - для вывода прогресс-бара. + SharedPtr connection; /// Соединение с БД. String query; /// Текущий запрос. @@ -125,6 +127,7 @@ private: /// С сервера периодически приходит информация, о том, сколько прочитано данных за прошедшее время. Progress progress; + bool show_progress_bar = false; size_t written_progress_chars = 0; bool written_first_block = false; @@ -366,6 +369,9 @@ private: try { + /// Выясняем размер терминала. + ioctl(0, TIOCGWINSZ, &terminal_size); + if (!process(query)) break; } @@ -474,6 +480,7 @@ private: processed_rows = 0; progress.reset(); + show_progress_bar = false; written_progress_chars = 0; written_first_block = false; @@ -853,9 +860,6 @@ private: << std::fixed << std::setprecision(3) << " Progress: "; - if (progress.total_rows) - message << (100.0 * progress.rows / progress.total_rows) << "%, "; - message << formatReadableQuantity(progress.rows) << " rows, " << formatReadableSizeWithDecimalSuffix(progress.bytes); @@ -868,8 +872,31 @@ private: else message << ". "; - written_progress_chars = message.str().size() - 13; - std::cerr << DISABLE_LINE_WRAPPING << message.rdbuf() << ENABLE_LINE_WRAPPING; + written_progress_chars = message.str().size() - (increment % 8 == 7 ? 10 : 13); + std::cerr << DISABLE_LINE_WRAPPING << message.rdbuf(); + + /** Если известно приблизительное общее число строк, которых нужно обработать - можно вывести прогрессбар. + * Чтобы не было "мерцания", выводим его только если с момента начала выполнения запроса прошло хотя бы пол секунды, + * и если к этому моменту запрос обработан менее чем наполовину. + */ + ssize_t width_of_progress_bar = static_cast(terminal_size.ws_col) - written_progress_chars - strlen(" 99%"); + + if (show_progress_bar + || (width_of_progress_bar > 0 + && progress.total_rows + && elapsed_ns > 500000000 + && progress.rows * 2 < progress.total_rows)) + { + show_progress_bar = true; + + std::string bar = UnicodeBar::render(UnicodeBar::getWidth(progress.rows, 0, progress.total_rows, width_of_progress_bar)); + std::cerr << "\033[0;32m" << bar << "\033[0m"; + if (width_of_progress_bar > static_cast(bar.size() / UNICODE_BAR_CHAR_SIZE)) + std::cerr << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' '); + std::cerr << ' ' << (99 * progress.rows / progress.total_rows) << '%'; /// Чуть-чуть занижаем процент, чтобы не показывать 100%. + } + + std::cerr << ENABLE_LINE_WRAPPING; ++increment; } From 2d5192c4a020db9ae751513f5c78a77a29891d94 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Oct 2014 03:01:36 +0300 Subject: [PATCH 03/29] dbms: StorageBuffer: development [#METR-13297]. --- .../MergeTree/MergeTreeBlockInputStream.h | 6 +- dbms/include/DB/Storages/StorageBuffer.h | 125 +++++++ dbms/include/DB/Storages/StorageMemory.h | 8 +- .../Interpreters/InterpreterCreateQuery.cpp | 7 +- dbms/src/Storages/StorageBuffer.cpp | 312 ++++++++++++++++++ dbms/src/Storages/StorageFactory.cpp | 41 +++ dbms/src/Storages/StorageLog.cpp | 4 +- dbms/src/Storages/StorageTinyLog.cpp | 4 +- 8 files changed, 491 insertions(+), 16 deletions(-) create mode 100644 dbms/include/DB/Storages/StorageBuffer.h create mode 100644 dbms/src/Storages/StorageBuffer.cpp diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h b/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h index c339a233b13..67ad3efe293 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h @@ -41,10 +41,10 @@ public: /// (от storage ожидают получить только столбцы таблицы). remove_prewhere_column = !pre_name_set.count(prewhere_column); Names post_column_names; - for (size_t i = 0; i < column_names.size(); ++i) + for (const auto & name : column_names) { - if (!pre_name_set.count(column_names[i])) - post_column_names.push_back(column_names[i]); + if (!pre_name_set.count(name)) + post_column_names.push_back(name); } column_names = post_column_names; } diff --git a/dbms/include/DB/Storages/StorageBuffer.h b/dbms/include/DB/Storages/StorageBuffer.h new file mode 100644 index 00000000000..8c093ee1639 --- /dev/null +++ b/dbms/include/DB/Storages/StorageBuffer.h @@ -0,0 +1,125 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + + +/** При вставке, буферизует данные в оперативке, пока не превышены некоторые пороги. + * Когда пороги превышены - сбрасывает данные в другую таблицу. + * При чтении, читает как из своих буферов, так и из подчинённой таблицы. + * + * Буфер представляет собой набор из num_shards блоков. + * При записи, выбирается номер блока по остатку от деления ThreadNumber на num_buckets (или один из других), + * и в соответствующий блок добавляются строчки. + * При использовании блока, он блокируется некоторым mutex-ом. Если при записи, соответствующий блок уже занят + * - пробуем заблокировать следующий по кругу блок, и так не более num_buckets раз (далее блокируемся). + * Пороги проверяются при вставке, а также, периодически, в фоновом потоке (чтобы реализовать пороги по времени). + * Пороги действуют независимо для каждого shard-а. Каждый shard может быть сброшен независимо от других. + * Если в таблицу вставляется блок, который сам по себе превышает max-пороги, то он записывается сразу в подчинённую таблицу без буферизации. + * Пороги могут быть превышены. Например, если max_rows = 1 000 000, в буфере уже было 500 000 строк, + * и добавляется кусок из 800 000 строк, то в буфере окажется 1 300 000 строк, и затем такой блок будет записан в подчинённую таблицу + * + * При уничтожении таблицы типа Buffer и при завершении работы, все данные сбрасываются. + * Данные в буфере не реплицируются, не логгируются на диск, не индексируются. При грубом перезапуске сервера, данные пропадают. + */ +class StorageBuffer : public IStorage +{ +friend class BufferBlockInputStream; +friend class BufferBlockOutputStream; + +public: + /// Пороги. + struct Thresholds + { + time_t time; /// Количество секунд от момента вставки первой строчки в блок. + size_t rows; /// Количество строк в блоке. + size_t bytes; /// Количество (несжатых) байт в блоке. + }; + + /** num_shards - уровень внутреннего параллелизма (количество независимых буферов) + * Буфер сбрасывается, если превышены все минимальные пороги или хотя бы один из максимальных. + */ + static StoragePtr create(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_, + size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_, + const String & destination_database_, const String & destination_table_); + + std::string getName() const override { return "Buffer"; } + std::string getTableName() const override { return name; } + + const NamesAndTypesList & getColumnsList() const override { return *columns; } + + BlockInputStreams read( + const Names & column_names, + ASTPtr query, + const Settings & settings, + QueryProcessingStage::Enum & processed_stage, + size_t max_block_size = DEFAULT_BLOCK_SIZE, + unsigned threads = 1) override; + + BlockOutputStreamPtr write(ASTPtr query) override; + + /// Сбрасывает все буферы в подчинённую таблицу. + void shutdown() override; + + void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override { name = new_table_name; } + + bool supportsSampling() const override { return true; } + bool supportsFinal() const override { return true; } + bool supportsPrewhere() const override { return true; } + + /// в подтаблицах добавлять и удалять столбы нужно вручную + /// структура подтаблиц не проверяется + void alter(const AlterCommands & params, const String & database_name, const String & table_name, Context & context) override; + +private: + String name; + NamesAndTypesListPtr columns; + + Context & context; + + struct Buffer + { + time_t first_write_time = 0; + Block data; + std::mutex mutex; + }; + + /// Имеется num_shards независимых буферов. + const size_t num_shards; + std::vector buffers; + + const Thresholds min_thresholds; + const Thresholds max_thresholds; + + const String destination_database; + const String destination_table; + bool no_destination; /// Если задано - не записывать данные из буфера, а просто опустошать буфер. + + Logger * log; + + /// Выполняет сброс данных по таймауту. + std::thread flush_thread; + + StorageBuffer(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_, + size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_, + const String & destination_database_, const String & destination_table_); + + /// Сбросить буфер. Если выставлено check_thresholds - сбрасывает только если превышены пороги. + void flushBuffer(Buffer & buffer, bool check_thresholds); + bool checkThresholds(Buffer & buffer, time_t current_time, size_t additional_rows = 0, size_t additional_bytes = 0); + + Poco::Event shutdown_event; + void flushThread(); +}; + +} diff --git a/dbms/include/DB/Storages/StorageMemory.h b/dbms/include/DB/Storages/StorageMemory.h index d65048ede0e..dca9b9fdd5b 100644 --- a/dbms/include/DB/Storages/StorageMemory.h +++ b/dbms/include/DB/Storages/StorageMemory.h @@ -24,13 +24,13 @@ public: std::stringstream res; res << "Memory(" << &*begin << ", " << &*end; - for (size_t i = 0; i < column_names.size(); ++i) - res << ", " << column_names[i]; + for (const auto & name : column_names) + res << ", " << name; res << ")"; return res.str(); } - + protected: Block readImpl(); private: @@ -92,7 +92,7 @@ private: BlocksList data; Poco::FastMutex mutex; - + StorageMemory(const std::string & name_, NamesAndTypesListPtr columns_); }; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 6b392173859..7e7316ae6c8 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -77,12 +77,9 @@ StoragePtr InterpreterCreateQuery::execute(bool assume_metadata_exists) SharedPtr interpreter_select; Block select_sample; - /// Для таблиц типа вью, чтобы получить столбцы, может понадобиться sample block. + /// Для таблиц типа view, чтобы получить столбцы, может понадобиться sample_block. if (create.select && (!create.attach || (!create.columns && (create.is_view || create.is_materialized_view)))) - { - interpreter_select = new InterpreterSelectQuery(create.select, context); - select_sample = interpreter_select->getSampleBlock(); - } + select_sample = InterpreterSelectQuery(create.select, context).getSampleBlock(); StoragePtr res; String storage_name; diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp new file mode 100644 index 00000000000..e32215f6f78 --- /dev/null +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -0,0 +1,312 @@ +#include +#include +#include +#include + + +namespace DB +{ + + +StoragePtr StorageBuffer::create(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_, + size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_, + const String & destination_database_, const String & destination_table_) +{ + return (new StorageBuffer{ + name_, columns_, context_, num_shards_, min_thresholds_, max_thresholds_, destination_database_, destination_table_})->thisPtr(); +} + + +StorageBuffer::StorageBuffer(const std::string & name_, NamesAndTypesListPtr columns_, Context & context_, + size_t num_shards_, const Thresholds & min_thresholds_, const Thresholds & max_thresholds_, + const String & destination_database_, const String & destination_table_) + : name(name_), columns(columns_), context(context_), + num_shards(num_shards_), buffers(num_shards_), + min_thresholds(min_thresholds_), max_thresholds(max_thresholds_), + destination_database(destination_database_), destination_table(destination_table_), + no_destination(destination_database.empty() && destination_table.empty()), + log(&Logger::get("StorageBuffer (" + name + ")")), + flush_thread([this] { flushThread(); }) +{ +} + + +/// Читает из одного буфера (из одного блока) под его mutex-ом. +class BufferBlockInputStream : public IProfilingBlockInputStream +{ +public: + BufferBlockInputStream(const Names & column_names_, StorageBuffer::Buffer & buffer_) + : column_names(column_names_.begin(), column_names_.end()), buffer(buffer_) {} + + String getName() const { return "BufferBlockInputStream"; } + + String getID() const + { + std::stringstream res; + res << "Buffer(" << &buffer; + + for (const auto & name : column_names) + res << ", " << name; + + res << ")"; + return res.str(); + } + +protected: + Block readImpl() + { + std::lock_guard lock(buffer.mutex); + + Block res; + if (!buffer.data) + return res; + + for (size_t i = 0, size = buffer.data.columns(); i < size; ++i) + { + auto & col = buffer.data.unsafeGetByPosition(i); + if (column_names.count(col.name)) + res.insert(col); + } + + return res; + } + +private: + NameSet column_names; + StorageBuffer::Buffer & buffer; +}; + + +BlockInputStreams StorageBuffer::read( + const Names & column_names, + ASTPtr query, + const Settings & settings, + QueryProcessingStage::Enum & processed_stage, + size_t max_block_size, + unsigned threads) +{ + processed_stage = QueryProcessingStage::FetchColumns; + + BlockInputStreams streams_from_dst; + + if (!no_destination) + streams_from_dst = context.getTable(destination_database, destination_table)->read( + column_names, query, settings, processed_stage, max_block_size, threads); + + BlockInputStreams streams_from_buffers; + streams_from_buffers.reserve(num_shards); + for (auto & buf : buffers) + streams_from_buffers.push_back(new BufferBlockInputStream(column_names, buf)); + + /** Если источники из таблицы были обработаны до какой-то не начальной стадии выполнения запроса, + * то тогда источники из буферов надо тоже обернуть в конвейер обработки до той же стадии. + */ + if (processed_stage > QueryProcessingStage::FetchColumns) + for (auto & stream : streams_from_buffers) + stream = InterpreterSelectQuery(query, context, processed_stage, 0, stream).execute(); + + streams_from_dst.insert(streams_from_dst.end(), streams_from_buffers.begin(), streams_from_buffers.end()); + return streams_from_dst; +} + + +class BufferBlockOutputStream : public IBlockOutputStream +{ +public: + BufferBlockOutputStream(StorageBuffer & storage_) : storage(storage_) {} + + void write(const Block & block) + { + if (!block) + return; + + size_t rows = block.rowsInFirstColumn(); + if (!rows) + return; + + size_t bytes = block.bytes(); + + /// Если блок уже превышает максимальные ограничения, то пишем минуя буфер. + if (rows > storage.max_thresholds.rows || bytes > storage.max_thresholds.bytes) + { + LOG_TRACE(storage.log, "Writing block with " << rows << " rows, " << bytes << " bytes directly."); + writeDirect(block); + return; + } + + /// Распределяем нагрузку по шардам по номеру потока. + const auto start_shard_num = Poco::ThreadNumber::get() % storage.num_shards; + + /// Перебираем буферы по кругу, пытаясь заблокировать mutex. Не более одного круга. + auto shard_num = start_shard_num; + size_t try_no = 0; + for (; try_no != storage.num_shards; ++try_no) + { + std::unique_lock lock(storage.buffers[shard_num].mutex, std::try_to_lock_t()); + if (lock.owns_lock()) + { + insertIntoBuffer(block, storage.buffers[shard_num], std::move(lock)); + break; + } + + ++shard_num; + if (shard_num == storage.num_shards) + shard_num = 0; + } + + /// Если так и не удалось ничего сразу заблокировать, то будем ждать на mutex-е. + if (try_no == storage.num_shards) + insertIntoBuffer(block, storage.buffers[start_shard_num], std::unique_lock(storage.buffers[start_shard_num].mutex)); + } +private: + StorageBuffer & storage; + + void insertIntoBuffer(const Block & block, StorageBuffer::Buffer & buffer, std::unique_lock && lock) + { + if (!buffer.data) + { + buffer.first_write_time = time(0); + buffer.data = block.cloneEmpty(); + } + + /// Если после вставки в буфер, ограничения будут превышены, то будем сбрасывать буфер. + if (storage.checkThresholds(buffer, time(0), block.rowsInFirstColumn(), block.bytes())) + { + /// Вытащим из буфера блок, заменим буфер на пустой. После этого можно разблокировать mutex. + Block block_to_write; + buffer.data.swap(block_to_write); + buffer.first_write_time = 0; + lock.unlock(); + + appendBlock(block, block_to_write); + writeDirect(block_to_write); + } + else + appendBlock(block, buffer.data); + } + + void appendBlock(const Block & from, Block & to) + { + size_t rows = from.rows(); + for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no) + { + const IColumn & col_from = *from.getByPosition(column_no).column.get(); + IColumn & col_to = *to.unsafeGetByPosition(column_no).column.get(); + + for (size_t row_no = 0; row_no < rows; ++row_no) + col_to.insertFrom(col_from, row_no); + } + } + + void writeDirect(const Block & block) + { + auto table = storage.context.getTable(storage.destination_database, storage.destination_table); + auto dst = table->write(nullptr); + dst->writePrefix(); + dst->write(block); + dst->writeSuffix(); + } +}; + + +BlockOutputStreamPtr StorageBuffer::write(ASTPtr query) +{ + return new BufferBlockOutputStream(*this); +} + + +void StorageBuffer::shutdown() +{ + shutdown_event.set(); + + if (flush_thread.joinable()) + flush_thread.join(); + + for (auto & buf : buffers) + flushBuffer(buf, false); +} + + +bool StorageBuffer::checkThresholds(Buffer & buffer, time_t current_time, size_t additional_rows, size_t additional_bytes) +{ + time_t time_passed = 0; + if (buffer.first_write_time) + time_passed = current_time - buffer.first_write_time; + + size_t rows = buffer.data.rowsInFirstColumn() + additional_rows; + size_t bytes = buffer.data.bytes() + additional_bytes; + + bool res = + (time_passed > min_thresholds.time && rows > min_thresholds.rows && bytes > min_thresholds.bytes) + || (time_passed > max_thresholds.time || rows > max_thresholds.rows || bytes > max_thresholds.bytes); + + if (res) + LOG_TRACE(log, "Flushing buffer with " << rows << " rows, " << bytes << " bytes, age " << time_passed << " seconds."); + + return res; +} + + +void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) +{ + StoragePtr table; + Block block_to_write; + + if (!no_destination) + table = context.tryGetTable(destination_database, destination_table); + + time_t current_time = check_thresholds ? time(0) : 0; + + { + std::lock_guard lock(buffer.mutex); + + if (check_thresholds && !checkThresholds(buffer, current_time)) + return; + + buffer.data.swap(block_to_write); + buffer.first_write_time = 0; + } + + if (!table) + { + if (!no_destination) + LOG_ERROR(log, "Destination table " << destination_database << "." << destination_table << " doesn't exists."); + + return; + } + + if (block_to_write) + { + auto dst = table->write(nullptr); + dst->writePrefix(); + dst->write(block_to_write); + dst->writeSuffix(); + } +} + + +void StorageBuffer::flushThread() +{ + do + { + try + { + for (auto & buf : buffers) + flushBuffer(buf, true); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } while (!shutdown_event.tryWait(1000)); +} + + +void StorageBuffer::alter(const AlterCommands & params, const String & database_name, const String & table_name, Context & context) +{ + auto lock = lockStructureForAlter(); + params.apply(*columns); + InterpreterAlterQuery::updateMetadata(database_name, table_name, *columns, context); +} + +} diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 49c5625b871..67ec5eeb5d0 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -190,6 +191,46 @@ StoragePtr StorageFactory::get( return StorageDistributed::create( table_name, columns, remote_database, remote_table, cluster_name, context, sharding_key, data_path); } + else if (name == "Buffer") + { + /** Buffer(db, table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) + * + * db, table - в какую таблицу сбрасывать данные из буфера. + * num_buckets - уровень параллелизма. + * min_time, max_time, min_rows, max_rows, min_bytes, max_bytes - условия вытеснения из буфера. + */ + + ASTs & args_func = typeid_cast(*typeid_cast(*query).storage).children; + + if (args_func.size() != 1) + throw Exception("Storage Buffer requires 9 parameters: " + " destination database, destination table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + ASTs & args = typeid_cast(*args_func.at(0)).children; + + if (args.size() != 9) + throw Exception("Storage Buffer requires 9 parameters: " + " destination_database, destination_table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + String destination_database = reinterpretAsIdentifier(args[0], local_context).name; + String destination_table = typeid_cast(*args[1]).name; + + size_t num_buckets = apply_visitor(FieldVisitorConvertToNumber(), typeid_cast(*args[2]).value); + + time_t min_time = apply_visitor(FieldVisitorConvertToNumber(), typeid_cast(*args[3]).value); + time_t max_time = apply_visitor(FieldVisitorConvertToNumber(), typeid_cast(*args[4]).value); + size_t min_rows = apply_visitor(FieldVisitorConvertToNumber(), typeid_cast(*args[5]).value); + size_t max_rows = apply_visitor(FieldVisitorConvertToNumber(), typeid_cast(*args[6]).value); + size_t min_bytes = apply_visitor(FieldVisitorConvertToNumber(), typeid_cast(*args[7]).value); + size_t max_bytes = apply_visitor(FieldVisitorConvertToNumber(), typeid_cast(*args[8]).value); + + return StorageBuffer::create( + table_name, columns, context, + num_buckets, {min_time, min_rows, min_bytes}, {max_time, max_rows, max_bytes}, + destination_database, destination_table); + } else if (endsWith(name, "MergeTree")) { /** Движки [Replicated][Summing|Collapsing|Aggregating|]MergeTree (8 комбинаций) diff --git a/dbms/src/Storages/StorageLog.cpp b/dbms/src/Storages/StorageLog.cpp index 6a7cccc7de7..aff8516eed2 100644 --- a/dbms/src/Storages/StorageLog.cpp +++ b/dbms/src/Storages/StorageLog.cpp @@ -43,8 +43,8 @@ String LogBlockInputStream::getID() const std::stringstream res; res << "Log(" << storage.getTableName() << ", " << &storage << ", " << mark_number << ", " << rows_limit; - for (size_t i = 0; i < column_names.size(); ++i) - res << ", " << column_names[i]; + for (const auto & name : column_names) + res << ", " << name; res << ")"; return res.str(); diff --git a/dbms/src/Storages/StorageTinyLog.cpp b/dbms/src/Storages/StorageTinyLog.cpp index 92b5a0cc65a..dd5434e5dd5 100644 --- a/dbms/src/Storages/StorageTinyLog.cpp +++ b/dbms/src/Storages/StorageTinyLog.cpp @@ -38,8 +38,8 @@ String TinyLogBlockInputStream::getID() const std::stringstream res; res << "TinyLog(" << storage.getTableName() << ", " << &storage; - for (size_t i = 0; i < column_names.size(); ++i) - res << ", " << column_names[i]; + for (const auto & name : column_names) + res << ", " << name; res << ")"; return res.str(); From 13f825a7e53e91f712b89b30f4e4c2680b6912ef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Oct 2014 03:12:39 +0300 Subject: [PATCH 04/29] dbms: StorageBuffer: fixed error [#METR-13297]. --- dbms/src/Storages/StorageBuffer.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index e32215f6f78..f2be767130e 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -55,9 +55,14 @@ public: protected: Block readImpl() { + Block res; + + if (has_been_read) + return res; + has_been_read = true; + std::lock_guard lock(buffer.mutex); - Block res; if (!buffer.data) return res; @@ -74,6 +79,7 @@ protected: private: NameSet column_names; StorageBuffer::Buffer & buffer; + bool has_been_read = false; }; From 5cf02c4cc10db1ca830a7c4b297117385c627814 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Oct 2014 03:21:06 +0300 Subject: [PATCH 05/29] dbms: addition to prev. revision [#METR-13297]. --- dbms/include/DB/Storages/StorageBuffer.h | 1 + dbms/src/Storages/StorageBuffer.cpp | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/dbms/include/DB/Storages/StorageBuffer.h b/dbms/include/DB/Storages/StorageBuffer.h index 8c093ee1639..882afbca700 100644 --- a/dbms/include/DB/Storages/StorageBuffer.h +++ b/dbms/include/DB/Storages/StorageBuffer.h @@ -70,6 +70,7 @@ public: /// Сбрасывает все буферы в подчинённую таблицу. void shutdown() override; + bool optimize() override; void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override { name = new_table_name; } diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index f2be767130e..7b7cf042cb0 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -228,8 +228,16 @@ void StorageBuffer::shutdown() if (flush_thread.joinable()) flush_thread.join(); + optimize(); +} + + +bool StorageBuffer::optimize() +{ for (auto & buf : buffers) flushBuffer(buf, false); + + return true; } From fee4863a161ab2c17d1dbd6a88b8157f5ed12937 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Oct 2014 05:01:21 +0300 Subject: [PATCH 06/29] dbms: Client: tiny fix for progress-bar [#METR-2944]. --- dbms/src/Client/Client.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Client/Client.cpp b/dbms/src/Client/Client.cpp index 18f10e42f21..b325062f693 100644 --- a/dbms/src/Client/Client.cpp +++ b/dbms/src/Client/Client.cpp @@ -889,11 +889,13 @@ private: { show_progress_bar = true; - std::string bar = UnicodeBar::render(UnicodeBar::getWidth(progress.rows, 0, progress.total_rows, width_of_progress_bar)); + size_t total_rows_corrected = std::max(progress.rows, progress.total_rows); + + std::string bar = UnicodeBar::render(UnicodeBar::getWidth(progress.rows, 0, total_rows_corrected, width_of_progress_bar)); std::cerr << "\033[0;32m" << bar << "\033[0m"; if (width_of_progress_bar > static_cast(bar.size() / UNICODE_BAR_CHAR_SIZE)) std::cerr << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' '); - std::cerr << ' ' << (99 * progress.rows / progress.total_rows) << '%'; /// Чуть-чуть занижаем процент, чтобы не показывать 100%. + std::cerr << ' ' << (99 * progress.rows / total_rows_corrected) << '%'; /// Чуть-чуть занижаем процент, чтобы не показывать 100%. } std::cerr << ENABLE_LINE_WRAPPING; From b3c060b89e3215c07823b77231655cd10b4b0d21 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Oct 2014 05:59:46 +0300 Subject: [PATCH 07/29] dbms: fixed error [#METR-13297]. --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 7e7316ae6c8..5cd7fee1c11 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -79,7 +79,10 @@ StoragePtr InterpreterCreateQuery::execute(bool assume_metadata_exists) Block select_sample; /// Для таблиц типа view, чтобы получить столбцы, может понадобиться sample_block. if (create.select && (!create.attach || (!create.columns && (create.is_view || create.is_materialized_view)))) - select_sample = InterpreterSelectQuery(create.select, context).getSampleBlock(); + { + interpreter_select = new InterpreterSelectQuery(create.select, context); + select_sample = interpreter_select->getSampleBlock(); + } StoragePtr res; String storage_name; From e063ccc180580ec2b9367a4b6fb0db3d04d5747e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 27 Oct 2014 07:18:13 +0300 Subject: [PATCH 08/29] dbms: StorageBuffer: additions [#METR-13297]. --- dbms/include/DB/Storages/StorageBuffer.h | 10 +- dbms/src/Storages/StorageBuffer.cpp | 189 ++++++++++++++++++----- 2 files changed, 154 insertions(+), 45 deletions(-) diff --git a/dbms/include/DB/Storages/StorageBuffer.h b/dbms/include/DB/Storages/StorageBuffer.h index 882afbca700..86f67796740 100644 --- a/dbms/include/DB/Storages/StorageBuffer.h +++ b/dbms/include/DB/Storages/StorageBuffer.h @@ -19,10 +19,10 @@ namespace DB * При чтении, читает как из своих буферов, так и из подчинённой таблицы. * * Буфер представляет собой набор из num_shards блоков. - * При записи, выбирается номер блока по остатку от деления ThreadNumber на num_buckets (или один из других), + * При записи, выбирается номер блока по остатку от деления ThreadNumber на num_shards (или один из других), * и в соответствующий блок добавляются строчки. * При использовании блока, он блокируется некоторым mutex-ом. Если при записи, соответствующий блок уже занят - * - пробуем заблокировать следующий по кругу блок, и так не более num_buckets раз (далее блокируемся). + * - пробуем заблокировать следующий по кругу блок, и так не более num_shards раз (далее блокируемся). * Пороги проверяются при вставке, а также, периодически, в фоновом потоке (чтобы реализовать пороги по времени). * Пороги действуют независимо для каждого shard-а. Каждый shard может быть сброшен независимо от других. * Если в таблицу вставляется блок, который сам по себе превышает max-пороги, то он записывается сразу в подчинённую таблицу без буферизации. @@ -78,8 +78,7 @@ public: bool supportsFinal() const override { return true; } bool supportsPrewhere() const override { return true; } - /// в подтаблицах добавлять и удалять столбы нужно вручную - /// структура подтаблиц не проверяется + /// Структура подчинённой таблицы не проверяется и не изменяется. void alter(const AlterCommands & params, const String & database_name, const String & table_name, Context & context) override; private: @@ -119,6 +118,9 @@ private: void flushBuffer(Buffer & buffer, bool check_thresholds); bool checkThresholds(Buffer & buffer, time_t current_time, size_t additional_rows = 0, size_t additional_bytes = 0); + /// Аргумент table передаётся, так как иногда вычисляется заранее. Он должен соответствовать destination-у. + void writeBlockToDestination(const Block & block, StoragePtr table); + Poco::Event shutdown_event; void flushThread(); }; diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 7b7cf042cb0..85d14f4045c 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -1,8 +1,12 @@ #include +#include #include #include +#include #include +#include + namespace DB { @@ -116,6 +120,24 @@ BlockInputStreams StorageBuffer::read( } +static void appendBlock(const Block & from, Block & to) +{ + size_t rows = from.rows(); + for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no) + { + const IColumn & col_from = *from.getByPosition(column_no).column.get(); + IColumn & col_to = *to.getByPosition(column_no).column.get(); + + if (col_from.getName() != col_to.getName()) + throw Exception("Cannot append block to another: different type of columns at index " + toString(column_no) + + ". Block 1: " + from.dumpStructure() + ". Block 2: " + to.dumpStructure(), ErrorCodes::BLOCKS_HAS_DIFFERENT_STRUCTURE); + + for (size_t row_no = 0; row_no < rows; ++row_no) + col_to.insertFrom(col_from, row_no); + } +} + + class BufferBlockOutputStream : public IBlockOutputStream { public: @@ -130,13 +152,33 @@ public: if (!rows) return; + StoragePtr destination; + if (!storage.no_destination) + { + destination = storage.context.tryGetTable(storage.destination_database, storage.destination_table); + + /// Проверяем структуру таблицы. + try + { + destination->check(block, true); + } + catch (Exception & e) + { + e.addMessage("(when looking at destination table " + storage.destination_database + "." + storage.destination_table + ")"); + throw; + } + } + size_t bytes = block.bytes(); /// Если блок уже превышает максимальные ограничения, то пишем минуя буфер. if (rows > storage.max_thresholds.rows || bytes > storage.max_thresholds.bytes) { - LOG_TRACE(storage.log, "Writing block with " << rows << " rows, " << bytes << " bytes directly."); - writeDirect(block); + if (!storage.no_destination) + { + LOG_TRACE(storage.log, "Writing block with " << rows << " rows, " << bytes << " bytes directly."); + storage.writeBlockToDestination(block, destination); + } return; } @@ -184,34 +226,16 @@ private: buffer.first_write_time = 0; lock.unlock(); - appendBlock(block, block_to_write); - writeDirect(block_to_write); + if (!storage.no_destination) + { + appendBlock(block, block_to_write); + storage.writeBlockToDestination(block_to_write, + storage.context.tryGetTable(storage.destination_database, storage.destination_table)); + } } else appendBlock(block, buffer.data); } - - void appendBlock(const Block & from, Block & to) - { - size_t rows = from.rows(); - for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no) - { - const IColumn & col_from = *from.getByPosition(column_no).column.get(); - IColumn & col_to = *to.unsafeGetByPosition(column_no).column.get(); - - for (size_t row_no = 0; row_no < rows; ++row_no) - col_to.insertFrom(col_from, row_no); - } - } - - void writeDirect(const Block & block) - { - auto table = storage.context.getTable(storage.destination_database, storage.destination_table); - auto dst = table->write(nullptr); - dst->writePrefix(); - dst->write(block); - dst->writeSuffix(); - } }; @@ -263,14 +287,16 @@ bool StorageBuffer::checkThresholds(Buffer & buffer, time_t current_time, size_t void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) { - StoragePtr table; Block block_to_write; - - if (!no_destination) - table = context.tryGetTable(destination_database, destination_table); - time_t current_time = check_thresholds ? time(0) : 0; + /** Довольно много проблем из-за того, что хотим блокировать буфер лишь на короткое время. + * Под блокировкой, получаем из буфера блок, и заменяем в нём блок на новый пустой. + * Затем пытаемся записать полученный блок в подчинённую таблицу. + * Если этого не получилось - кладём данные обратно в буфер. + * Для этого также делается блокировка структуры таблицы. + * Замечание: может быть, стоит избавиться от такой сложности. + */ { std::lock_guard lock(buffer.mutex); @@ -281,21 +307,99 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) buffer.first_write_time = 0; } + if (no_destination) + return; + + try + { + writeBlockToDestination(block_to_write, context.tryGetTable(destination_database, destination_table)); + } + catch (...) + { + /// Возвращаем блок на место в буфер. + + std::lock_guard lock(buffer.mutex); + + if (buffer.data) + { + /** Так как структура таблицы не изменилась, можно склеить два блока. + * Замечание: остаётся проблема - из-за того, что в разных попытках вставляются разные блоки, + * теряется идемпотентность вставки в ReplicatedMergeTree. + */ + appendBlock(block_to_write, buffer.data); + buffer.data.swap(block_to_write); + } + + if (!buffer.first_write_time) + buffer.first_write_time = current_time; + + /// Через некоторое время будет следующая попытка записать. + throw; + } +} + + +void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr table) +{ + if (no_destination || !block) + return; + if (!table) { - if (!no_destination) - LOG_ERROR(log, "Destination table " << destination_database << "." << destination_table << " doesn't exists."); - + LOG_ERROR(log, "Destination table " << destination_database << "." << destination_table << " doesn't exist. Block of data is discarded."); return; } - if (block_to_write) + ASTInsertQuery * insert = new ASTInsertQuery; + ASTPtr ast_ptr = insert; + + insert->database = destination_database; + insert->table = destination_table; + + /** Будем вставлять столбцы, являющиеся пересечением множества столбцов таблицы-буфера и подчинённой таблицы. + * Это позволит поддержать часть случаев (но не все), когда структура таблицы не совпадает. + */ + Block structure_of_destination_table = table->getSampleBlock(); + Names columns_intersection; + columns_intersection.reserve(block.columns()); + for (size_t i : ext::range(0, structure_of_destination_table.columns())) { - auto dst = table->write(nullptr); - dst->writePrefix(); - dst->write(block_to_write); - dst->writeSuffix(); + auto dst_col = structure_of_destination_table.unsafeGetByPosition(i); + if (block.has(dst_col.name)) + { + if (block.getByName(dst_col.name).type->getName() != dst_col.type->getName()) + { + LOG_ERROR(log, "Destination table " << destination_database << "." << destination_table + << " have different type of column " << dst_col.name << ". Block of data is discarded."); + return; + } + + columns_intersection.push_back(dst_col.name); + } } + + if (columns_intersection.empty()) + { + LOG_ERROR(log, "Destination table " << destination_database << "." << destination_table << " have no common columns with block in buffer. Block of data is discarded."); + return; + } + + if (columns_intersection.size() != block.columns()) + LOG_WARNING(log, "Not all columns from block in buffer exist in destination table " + << destination_database << "." << destination_table << ". Some columns are discarded."); + + ASTExpressionList * list_of_columns = new ASTExpressionList; + insert->columns = list_of_columns; + list_of_columns->children.reserve(columns_intersection.size()); + for (const String & column : columns_intersection) + list_of_columns->children.push_back(new ASTIdentifier(StringRange(), column, ASTIdentifier::Column)); + + InterpreterInsertQuery interpreter{ast_ptr, context}; + + auto block_io = interpreter.execute(); + block_io.out->writePrefix(); + block_io.out->write(block); + block_io.out->writeSuffix(); } @@ -305,8 +409,7 @@ void StorageBuffer::flushThread() { try { - for (auto & buf : buffers) - flushBuffer(buf, true); + optimize(); } catch (...) { @@ -319,6 +422,10 @@ void StorageBuffer::flushThread() void StorageBuffer::alter(const AlterCommands & params, const String & database_name, const String & table_name, Context & context) { auto lock = lockStructureForAlter(); + + /// Чтобы не осталось блоков старой структуры. + optimize(); + params.apply(*columns); InterpreterAlterQuery::updateMetadata(database_name, table_name, *columns, context); } From 5d21d75e85af77b915540c83e367ecd4d4f2f37b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 27 Oct 2014 07:50:38 +0300 Subject: [PATCH 09/29] dbms: addition to prev. revision [#METR-13297]. --- dbms/src/Storages/StorageBuffer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 85d14f4045c..b3cedbd646a 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -294,7 +294,6 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) * Под блокировкой, получаем из буфера блок, и заменяем в нём блок на новый пустой. * Затем пытаемся записать полученный блок в подчинённую таблицу. * Если этого не получилось - кладём данные обратно в буфер. - * Для этого также делается блокировка структуры таблицы. * Замечание: может быть, стоит избавиться от такой сложности. */ { From 0d35ea0bf4d62c39096a54e2e448516d598b7c83 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 27 Oct 2014 18:16:11 +0300 Subject: [PATCH 10/29] dbms: implement firstSignificantSubdomain, cutToFirstSignificantSubdomain. [#METR-13151] --- dbms/include/DB/Functions/FunctionsURL.h | 94 ++++++++++++++++++++++++ dbms/src/Functions/FunctionsURL.cpp | 2 + 2 files changed, 96 insertions(+) diff --git a/dbms/include/DB/Functions/FunctionsURL.h b/dbms/include/DB/Functions/FunctionsURL.h index 0f2935bb569..f2d1d53efe4 100644 --- a/dbms/include/DB/Functions/FunctionsURL.h +++ b/dbms/include/DB/Functions/FunctionsURL.h @@ -113,6 +113,94 @@ struct ExtractDomain } }; +struct ExtractFirstSignificantSubdomain +{ + static size_t getReserveLengthForElement() { return 10; } + + static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size, Pos * out_domain_end = nullptr) + { + res_data = data; + res_size = 0; + + Pos tmp; + size_t domain_length; + ExtractDomain::execute(data, size, tmp, domain_length); + + if (domain_length == 0) + return; + + if (out_domain_end) + *out_domain_end = tmp + domain_length; + + /// cut useless dot + if (tmp[domain_length - 1] == '.') + --domain_length; + + res_data = tmp; + res_size = domain_length; + + auto begin = tmp; + auto end = begin + domain_length; + const char * last_3_periods[3]{}; + auto pos = static_cast(memchr(begin, '.', domain_length)); + + while (pos) + { + last_3_periods[2] = last_3_periods[1]; + last_3_periods[1] = last_3_periods[0]; + last_3_periods[0] = pos; + pos = static_cast(memchr(pos + 1, '.', end - pos - 1)); + } + + if (!last_3_periods[0]) + return; + + if (!last_3_periods[1]) + { + res_size = last_3_periods[0] - begin; + return; + } + + if (!last_3_periods[2]) + last_3_periods[2] = begin - 1; + + if (!strncmp(last_3_periods[1] + 1, "com", 3) || + !strncmp(last_3_periods[1] + 1, "net", 3) || + !strncmp(last_3_periods[1] + 1, "org", 3) || + !strncmp(last_3_periods[1] + 1, "co", 2)) + { + res_data += last_3_periods[2] + 1 - begin; + res_size = last_3_periods[1] - last_3_periods[2] - 1; + return; + } + + res_data += last_3_periods[1] + 1 - begin; + res_size = last_3_periods[0] - last_3_periods[1] - 1; + } +}; + +struct CutToFirstSignificantSubdomain +{ + static size_t getReserveLengthForElement() { return 15; } + + static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size) + { + res_data = data; + res_size = 0; + + Pos tmp_data; + size_t tmp_length; + Pos domain_end; + ExtractFirstSignificantSubdomain::execute(data, size, tmp_data, tmp_length, &domain_end); + + if (tmp_length == 0) + return; + + res_data = tmp_data; + res_size = domain_end - tmp_data; + } +}; + struct ExtractTopLevelDomain { static size_t getReserveLengthForElement() { return 5; } @@ -839,12 +927,15 @@ struct CutSubstringImpl struct NameProtocol { static const char * get() { return "protocol"; } }; struct NameDomain { static const char * get() { return "domain"; } }; struct NameDomainWithoutWWW { static const char * get() { return "domainWithoutWWW"; } }; +struct NameFirstSignificantSubdomain { static const char * get() { return "firstSignificantSubdomain"; } }; struct NameTopLevelDomain { static const char * get() { return "topLevelDomain"; } }; struct NamePath { static const char * get() { return "path"; } }; struct NameQueryString { static const char * get() { return "queryString"; } }; struct NameFragment { static const char * get() { return "fragment"; } }; struct NameQueryStringAndFragment { static const char * get() { return "queryStringAndFragment"; } }; +struct NameCutToFirstSignificantSubdomain { static const char * get() { return "cutToFirstSignificantSubdomain"; } }; + struct NameCutWWW { static const char * get() { return "cutWWW"; } }; struct NameCutQueryString { static const char * get() { return "cutQueryString"; } }; struct NameCutFragment { static const char * get() { return "cutFragment"; } }; @@ -856,12 +947,15 @@ struct NameCutURLParameter { static const char * get() { return "cutURLParam typedef FunctionStringToString, NameProtocol> FunctionProtocol; typedef FunctionStringToString >, NameDomain> FunctionDomain; typedef FunctionStringToString >, NameDomainWithoutWWW> FunctionDomainWithoutWWW; +typedef FunctionStringToString, NameFirstSignificantSubdomain> FunctionFirstSignificantSubdomain; typedef FunctionStringToString, NameTopLevelDomain> FunctionTopLevelDomain; typedef FunctionStringToString, NamePath> FunctionPath; typedef FunctionStringToString >, NameQueryString> FunctionQueryString; typedef FunctionStringToString >, NameFragment> FunctionFragment; typedef FunctionStringToString >, NameQueryStringAndFragment> FunctionQueryStringAndFragment; +typedef FunctionStringToString, NameCutToFirstSignificantSubdomain> FunctionCutToFirstSignificantSubdomain; + typedef FunctionStringToString, NameCutWWW> FunctionCutWWW; typedef FunctionStringToString >, NameCutQueryString> FunctionCutQueryString; typedef FunctionStringToString >, NameCutFragment> FunctionCutFragment; diff --git a/dbms/src/Functions/FunctionsURL.cpp b/dbms/src/Functions/FunctionsURL.cpp index 4fcaaa2ee27..bb134e33159 100644 --- a/dbms/src/Functions/FunctionsURL.cpp +++ b/dbms/src/Functions/FunctionsURL.cpp @@ -11,6 +11,7 @@ void registerFunctionsURL(FunctionFactory & factory) factory.registerFunction("protocol", F { return new FunctionProtocol; }); factory.registerFunction("domain", F { return new FunctionDomain; }); factory.registerFunction("domainWithoutWWW", F { return new FunctionDomainWithoutWWW; }); + factory.registerFunction("firstSignificantSubdomain", F { return new FunctionFirstSignificantSubdomain; }); factory.registerFunction("topLevelDomain", F { return new FunctionTopLevelDomain; }); factory.registerFunction("path", F { return new FunctionPath; }); factory.registerFunction("queryString", F { return new FunctionQueryString; }); @@ -21,6 +22,7 @@ void registerFunctionsURL(FunctionFactory & factory) factory.registerFunction("extractURLParameterNames", F { return new FunctionExtractURLParameterNames; }); factory.registerFunction("URLHierarchy", F { return new FunctionURLHierarchy; }); factory.registerFunction("URLPathHierarchy", F { return new FunctionURLPathHierarchy; }); + factory.registerFunction("cutToFirstSignificantSubdomain", F { return new FunctionCutToFirstSignificantSubdomain; }); factory.registerFunction("cutWWW", F { return new FunctionCutWWW; }); factory.registerFunction("cutQueryString", F { return new FunctionCutQueryString; }); factory.registerFunction("cutFragment", F { return new FunctionCutFragment; }); From 484073a95c77f5cb2f90093d5891e738a4120a45 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 28 Oct 2014 16:43:22 +0300 Subject: [PATCH 11/29] dbms: add emptyArrayType family of functions. #[METR-13151] --- dbms/include/DB/Columns/ColumnVector.h | 3 +- dbms/include/DB/DataTypes/DataTypeString.h | 4 +- dbms/include/DB/Functions/FunctionsArray.h | 55 ++++++++++++++++++++++ dbms/src/Functions/FunctionsArray.cpp | 16 ++++++- 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/dbms/include/DB/Columns/ColumnVector.h b/dbms/include/DB/Columns/ColumnVector.h index 822d35e9b19..322bbc89b0f 100644 --- a/dbms/include/DB/Columns/ColumnVector.h +++ b/dbms/include/DB/Columns/ColumnVector.h @@ -86,7 +86,8 @@ public: typedef PODArray Container_t; ColumnVector() {} - ColumnVector(size_t n) : data(n) {} + ColumnVector(const size_t n) : data{n} {} + ColumnVector(const size_t n, const value_type x) : data{n, x} {} bool isNumeric() const { return IsNumber::value; } bool isFixed() const { return IsNumber::value; } diff --git a/dbms/include/DB/DataTypes/DataTypeString.h b/dbms/include/DB/DataTypes/DataTypeString.h index 5ada07cba71..73ddbd86b70 100644 --- a/dbms/include/DB/DataTypes/DataTypeString.h +++ b/dbms/include/DB/DataTypes/DataTypeString.h @@ -16,6 +16,8 @@ using Poco::SharedPtr; class DataTypeString : public IDataType { public: + using FieldType = String; + std::string getName() const { return "String"; @@ -39,7 +41,7 @@ public: void serializeTextQuoted(const Field & field, WriteBuffer & ostr) const; void deserializeTextQuoted(Field & field, ReadBuffer & istr) const; - + void serializeTextJSON(const Field & field, WriteBuffer & ostr) const; ColumnPtr createColumn() const; diff --git a/dbms/include/DB/Functions/FunctionsArray.h b/dbms/include/DB/Functions/FunctionsArray.h index 94eb4ee4f87..b070acf34b1 100644 --- a/dbms/include/DB/Functions/FunctionsArray.h +++ b/dbms/include/DB/Functions/FunctionsArray.h @@ -2,6 +2,10 @@ #include #include +#include +#include +#include + #include #include @@ -1190,6 +1194,43 @@ private: }; +template struct TypeToColumnType { using ColumnType = ColumnVector; }; +template <> struct TypeToColumnType { using ColumnType = ColumnString; }; + +template struct DataTypeToName : TypeName { }; +template <> struct DataTypeToName { static std::string get() { return "Date"; } }; +template <> struct DataTypeToName { static std::string get() { return "DateTime"; } }; + +template +struct EmptyArray : public IFunction +{ + String getName() const + { + return "emptyArray" + DataTypeToName::get(); + } + + DataTypePtr getReturnType(const DataTypes & arguments) const + { + if (arguments.size() != 0) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 0.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + return new DataTypeArray{new DataType{}}; + } + + void execute(Block & block, const ColumnNumbers & arguments, size_t result) + { + using UnderlyingColumnType = typename TypeToColumnType::ColumnType; + + block.getByPosition(result).column = new ColumnArray{ + new UnderlyingColumnType, + new ColumnArray::ColumnOffsets_t{block.rowsInFirstColumn(), 0} + }; + } +}; + + struct NameHas { static const char * get() { return "has"; } }; struct NameIndexOf { static const char * get() { return "indexOf"; } }; struct NameCountEqual { static const char * get() { return "countEqual"; } }; @@ -1198,5 +1239,19 @@ typedef FunctionArrayIndex FunctionHas; typedef FunctionArrayIndex FunctionIndexOf; typedef FunctionArrayIndex FunctionCountEqual; +using FunctionEmptyArrayUInt8 = EmptyArray; +using FunctionEmptyArrayUInt16 = EmptyArray; +using FunctionEmptyArrayUInt32 = EmptyArray; +using FunctionEmptyArrayUInt64 = EmptyArray; +using FunctionEmptyArrayInt8 = EmptyArray; +using FunctionEmptyArrayInt16 = EmptyArray; +using FunctionEmptyArrayInt32 = EmptyArray; +using FunctionEmptyArrayInt64 = EmptyArray; +using FunctionEmptyArrayFloat32 = EmptyArray; +using FunctionEmptyArrayFloat64 = EmptyArray; +using FunctionEmptyArrayDate = EmptyArray; +using FunctionEmptyArrayDateTime = EmptyArray; +using FunctionEmptyArrayString = EmptyArray; + } diff --git a/dbms/src/Functions/FunctionsArray.cpp b/dbms/src/Functions/FunctionsArray.cpp index d9bb716e59d..6464c6a1b65 100644 --- a/dbms/src/Functions/FunctionsArray.cpp +++ b/dbms/src/Functions/FunctionsArray.cpp @@ -6,7 +6,7 @@ namespace DB void registerFunctionsArray(FunctionFactory & factory) { - #define F [](const Context & context) -> IFunction* + #define F [](const Context & context) -> IFunction * factory.registerFunction("array", F { return new FunctionArray; }); factory.registerFunction("arrayElement", F { return new FunctionArrayElement; }); @@ -15,6 +15,20 @@ void registerFunctionsArray(FunctionFactory & factory) factory.registerFunction("countEqual", F { return new FunctionCountEqual; }); factory.registerFunction("arrayEnumerate", F { return new FunctionArrayEnumerate; }); factory.registerFunction("arrayEnumerateUniq", F { return new FunctionArrayEnumerateUniq; }); + factory.registerFunction("emptyArrayUInt8", F { return new FunctionEmptyArrayUInt8; }); + factory.registerFunction("emptyArrayUInt16", F { return new FunctionEmptyArrayUInt16; }); + factory.registerFunction("emptyArrayUInt32", F { return new FunctionEmptyArrayUInt32; }); + factory.registerFunction("emptyArrayUInt64", F { return new FunctionEmptyArrayUInt64; }); + factory.registerFunction("emptyArrayInt8", F { return new FunctionEmptyArrayInt8; }); + factory.registerFunction("emptyArrayInt16", F { return new FunctionEmptyArrayInt16; }); + factory.registerFunction("emptyArrayInt32", F { return new FunctionEmptyArrayInt32; }); + factory.registerFunction("emptyArrayInt64", F { return new FunctionEmptyArrayInt64; }); + factory.registerFunction("emptyArrayFloat32", F { return new FunctionEmptyArrayFloat32; }); + factory.registerFunction("emptyArrayFloat64", F { return new FunctionEmptyArrayFloat64; }); + factory.registerFunction("emptyArrayDate", F { return new FunctionEmptyArrayDate; }); + factory.registerFunction("emptyArrayDateTime", F { return new FunctionEmptyArrayDateTime; }); + factory.registerFunction("emptyArrayString", F { return new FunctionEmptyArrayString; }); + #undef F } From 96d8c7484a61af67ef04556eaf0856f57479bf0f Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 28 Oct 2014 18:49:11 +0300 Subject: [PATCH 12/29] dbms: add arrayFirst, arrayFirstIndex functions. [#METR-13151] --- .../DB/Functions/FunctionsHigherOrder.h | 93 +++++++++++++++++++ dbms/src/Functions/FunctionsHigherOrder.cpp | 4 +- 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/dbms/include/DB/Functions/FunctionsHigherOrder.h b/dbms/include/DB/Functions/FunctionsHigherOrder.h index b8696081d08..1d0ff175778 100644 --- a/dbms/include/DB/Functions/FunctionsHigherOrder.h +++ b/dbms/include/DB/Functions/FunctionsHigherOrder.h @@ -288,6 +288,95 @@ struct ArraySumImpl } }; +struct ArrayFirstImpl +{ + static bool needBoolean() { return false; } + static bool needExpression() { return true; } + static bool needOneArray() { return false; } + + static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & array_element) + { + return array_element; + } + + static ColumnPtr execute(const ColumnArray * array, ColumnPtr mapped) + { + auto column_filter = typeid_cast *>(&*mapped); + if (!column_filter) + throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + + const auto & filter = column_filter->getData(); + const auto & offsets = array->getOffsets(); + const auto & data = array->getData(); + ColumnPtr out{data.cloneEmpty()}; + + size_t pos{}; + for (size_t i = 0; i < offsets.size(); ++i) + { + auto exists = false; + for (; pos < offsets[i]; ++pos) + { + if (filter[pos]) + { + out->insert(data[pos]); + exists = true; + pos = offsets[i]; + break; + } + } + + if (!exists) + out->insertDefault(); + } + + return out; + } +}; + +struct ArrayFirstIndexImpl +{ + static bool needBoolean() { return false; } + static bool needExpression() { return true; } + static bool needOneArray() { return false; } + + static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & array_element) + { + return new DataTypeUInt32; + } + + static ColumnPtr execute(const ColumnArray * array, ColumnPtr mapped) + { + auto column_filter = typeid_cast *>(&*mapped); + if (!column_filter) + throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + + const auto & filter = column_filter->getData(); + const auto & offsets = array->getOffsets(); + auto out_column = new ColumnVector{offsets.size()}; + ColumnPtr out_column_ptr{out_column}; + auto & out_index = out_column->getData(); + + size_t pos{}; + for (size_t i = 0; i < offsets.size(); ++i) + { + UInt32 index{}; + for (size_t idx{1}; pos < offsets[i]; ++pos, ++idx) + { + if (filter[pos]) + { + index = idx; + pos = offsets[i]; + break; + } + } + + out_index[i] = index; + } + + return out_column_ptr; + } +}; + template class FunctionArrayMapped : public IFunction { @@ -508,6 +597,8 @@ struct NameArrayCount { static const char * get() { return "arrayCount"; } }; struct NameArrayExists { static const char * get() { return "arrayExists"; } }; struct NameArrayAll { static const char * get() { return "arrayAll"; } }; struct NameArraySum { static const char * get() { return "arraySum"; } }; +struct NameArrayFirst { static const char * get() { return "arrayFirst"; } }; +struct NameArrayFirstIndex { static const char * get() { return "arrayFirstIndex"; } }; typedef FunctionArrayMapped FunctionArrayMap; typedef FunctionArrayMapped FunctionArrayFilter; @@ -515,5 +606,7 @@ typedef FunctionArrayMapped FunctionArrayCount typedef FunctionArrayMapped FunctionArrayExists; typedef FunctionArrayMapped FunctionArrayAll; typedef FunctionArrayMapped FunctionArraySum; +typedef FunctionArrayMapped FunctionArrayFirst; +typedef FunctionArrayMapped FunctionArrayFirstIndex; } diff --git a/dbms/src/Functions/FunctionsHigherOrder.cpp b/dbms/src/Functions/FunctionsHigherOrder.cpp index 90434a870ef..69caed90abc 100644 --- a/dbms/src/Functions/FunctionsHigherOrder.cpp +++ b/dbms/src/Functions/FunctionsHigherOrder.cpp @@ -6,7 +6,7 @@ namespace DB void registerFunctionsHigherOrder(FunctionFactory & factory) { - #define F [](const Context & context) -> IFunction* + #define F [](const Context & context) -> IFunction * factory.registerFunction("arrayMap", F { return new FunctionArrayMap; }); factory.registerFunction("arrayFilter", F { return new FunctionArrayFilter; }); @@ -14,6 +14,8 @@ void registerFunctionsHigherOrder(FunctionFactory & factory) factory.registerFunction("arrayExists", F { return new FunctionArrayExists; }); factory.registerFunction("arrayAll", F { return new FunctionArrayAll; }); factory.registerFunction("arraySum", F { return new FunctionArraySum; }); + factory.registerFunction("arrayFirst", F { return new FunctionArrayFirst; }); + factory.registerFunction("arrayFirstIndex", F { return new FunctionArrayFirstIndex; }); #undef F } From 8f687c604271aaa5917ea686ccf07dbd47def410 Mon Sep 17 00:00:00 2001 From: Pavel Kartavyy Date: Tue, 28 Oct 2014 20:55:07 +0300 Subject: [PATCH 13/29] RecountRequest: fixed wrong formatting, when chunk is empty [#METR-13437] --- libs/libmysqlxx/include/mysqlxx/DateTime.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libs/libmysqlxx/include/mysqlxx/DateTime.h b/libs/libmysqlxx/include/mysqlxx/DateTime.h index 8429d2256a8..46dccd3c38b 100644 --- a/libs/libmysqlxx/include/mysqlxx/DateTime.h +++ b/libs/libmysqlxx/include/mysqlxx/DateTime.h @@ -4,6 +4,7 @@ #include #include +#include namespace mysqlxx @@ -177,12 +178,15 @@ public: inline std::ostream & operator<< (std::ostream & ostr, const DateTime & datetime) { - return ostr << datetime.year() - << '-' << (datetime.month() / 10) << (datetime.month() % 10) + ostr << std::setfill('0') << std::setw(4) << datetime.year(); + + ostr << '-' << (datetime.month() / 10) << (datetime.month() % 10) << '-' << (datetime.day() / 10) << (datetime.day() % 10) << ' ' << (datetime.hour() / 10) << (datetime.hour() % 10) << ':' << (datetime.minute() / 10) << (datetime.minute() % 10) << ':' << (datetime.second() / 10) << (datetime.second() % 10); + + return ostr; } } From 425263970f0e4d4be937efabb3f25f0b314ff4dc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 29 Oct 2014 04:18:50 +0300 Subject: [PATCH 14/29] dbms: improved performance of aggregation by one numeric key (up to: 2x for UInt8, 5x for UInt16, 1.1x for UInt32, UInt64) [#METR-2944]. --- dbms/include/DB/Columns/ColumnVector.h | 2 +- dbms/include/DB/Core/Defines.h | 1 + dbms/include/DB/Interpreters/Aggregator.h | 96 +++++++++++++----- .../DB/Interpreters/SplittingAggregator.h | 3 + dbms/include/DB/Storages/StorageView.h | 5 +- dbms/src/Interpreters/Aggregator.cpp | 79 ++++++++++++--- dbms/src/Interpreters/SplittingAggregator.cpp | 99 +++++++++++-------- dbms/src/Storages/StorageSystemOne.cpp | 4 +- .../00019_quantiles_totals_distributed.sql | 2 +- 9 files changed, 210 insertions(+), 81 deletions(-) diff --git a/dbms/include/DB/Columns/ColumnVector.h b/dbms/include/DB/Columns/ColumnVector.h index 322bbc89b0f..a3299d1da4e 100644 --- a/dbms/include/DB/Columns/ColumnVector.h +++ b/dbms/include/DB/Columns/ColumnVector.h @@ -77,7 +77,7 @@ template <> struct CompareHelper : public FloatCompareHelper { /** Шаблон столбцов, которые используют для хранения простой массив. */ template -class ColumnVector : public IColumn +class ColumnVector final : public IColumn { private: typedef ColumnVector Self; diff --git a/dbms/include/DB/Core/Defines.h b/dbms/include/DB/Core/Defines.h index 950b48fb60d..b89e1e86c87 100644 --- a/dbms/include/DB/Core/Defines.h +++ b/dbms/include/DB/Core/Defines.h @@ -69,3 +69,4 @@ #define DBMS_DISTRIBUTED_DIRECTORY_MONITOR_SLEEP_TIME_MS 100 #define ALWAYS_INLINE __attribute__((__always_inline__)) +#define NO_INLINE __attribute__((__noinline__)) diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 1a2afa54eb6..45ea90ce147 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -22,6 +22,7 @@ #include #include #include +#include @@ -53,25 +54,59 @@ typedef HashMap AggregatedDataWithKeys12 typedef HashMap, UInt128TrivialHash> AggregatedDataHashed; -/// Для случая, когда есть один числовой ключ. -struct AggregationMethodKey64 +/// Специализации для UInt8, UInt16. +struct TrivialHash { - typedef AggregatedDataWithUInt64Key Data; - typedef Data::key_type Key; - typedef Data::mapped_type Mapped; - typedef Data::iterator iterator; - typedef Data::const_iterator const_iterator; + template + size_t operator() (T key) const + { + return key; + } +}; + +/// Превращает хэш-таблицу в что-то типа lookup-таблицы. Остаётся неоптимальность - в ячейках хранятся ключи. +template +struct HashTableFixedGrower +{ + size_t bufSize() const { return 1 << key_bits; } + size_t mask() const { return bufSize() - 1; } + size_t place(size_t x) const { return x; } + size_t next(size_t pos) const { __builtin_unreachable(); return pos; } + bool overflow(size_t elems) const { return false; } + + void increaseSize() { __builtin_unreachable(); } + void set(size_t num_elems) {} + void setBufSize(size_t buf_size_) {} +}; + +typedef HashMap> AggregatedDataWithUInt8Key; +typedef HashMap> AggregatedDataWithUInt16Key; + +template struct AggregatedDataWithUIntKey { using Type = AggregatedDataWithUInt64Key; }; +template <> struct AggregatedDataWithUIntKey { using Type = AggregatedDataWithUInt8Key; }; +template <> struct AggregatedDataWithUIntKey { using Type = AggregatedDataWithUInt16Key; }; + + +/// Для случая, когда есть один числовой ключ. +template /// UInt8/16/32/64 для любых типов соответствующей битности. +struct AggregationMethodOneNumber +{ + typedef typename AggregatedDataWithUIntKey::Type Data; + typedef typename Data::key_type Key; + typedef typename Data::mapped_type Mapped; + typedef typename Data::iterator iterator; + typedef typename Data::const_iterator const_iterator; Data data; - const IColumn * column; + const ColumnVector * column; /** Вызывается в начале обработки каждого блока. * Устанавливает переменные, необходимые для остальных методов, вызываемых во внутренних циклах. */ void init(ConstColumnPlainPtrs & key_columns) { - column = key_columns[0]; + column = static_cast *>(key_columns[0]); } /// Достать из ключевых столбцов ключ для вставки в хэш-таблицу. @@ -99,7 +134,7 @@ struct AggregationMethodKey64 */ static void insertKeyIntoColumns(const_iterator & it, ColumnPlainPtrs & key_columns, size_t keys_size, const Sizes & key_sizes) { - key_columns[0]->insertData(reinterpret_cast(&it->first), sizeof(it->first)); + static_cast *>(key_columns[0])->insertData(reinterpret_cast(&it->first), sizeof(it->first)); } }; @@ -317,7 +352,10 @@ struct AggregatedDataVariants : private boost::noncopyable */ AggregatedDataWithoutKey without_key = nullptr; - std::unique_ptr key64; + std::unique_ptr> key8; + std::unique_ptr> key16; + std::unique_ptr> key32; + std::unique_ptr> key64; std::unique_ptr key_string; std::unique_ptr key_fixed_string; std::unique_ptr keys128; @@ -325,13 +363,16 @@ struct AggregatedDataVariants : private boost::noncopyable enum Type { - EMPTY = 0, - WITHOUT_KEY = 1, - KEY_64 = 2, - KEY_STRING = 3, - KEY_FIXED_STRING = 4, - KEYS_128 = 5, - HASHED = 6, + EMPTY = 0, + WITHOUT_KEY, + KEY_8, + KEY_16, + KEY_32, + KEY_64, + KEY_STRING, + KEY_FIXED_STRING, + KEYS_128, + HASHED, }; Type type = EMPTY; @@ -348,11 +389,14 @@ struct AggregatedDataVariants : private boost::noncopyable { case EMPTY: break; case WITHOUT_KEY: break; - case KEY_64: key64 .reset(new AggregationMethodKey64); break; - case KEY_STRING: key_string .reset(new AggregationMethodString); break; - case KEY_FIXED_STRING: key_fixed_string.reset(new AggregationMethodFixedString); break; - case KEYS_128: keys128 .reset(new AggregationMethodKeys128); break; - case HASHED: hashed .reset(new AggregationMethodHashed); break; + case KEY_8: key8 .reset(new decltype(key8)::element_type); break; + case KEY_16: key16 .reset(new decltype(key16)::element_type); break; + case KEY_32: key32 .reset(new decltype(key32)::element_type); break; + case KEY_64: key64 .reset(new decltype(key64)::element_type); break; + case KEY_STRING: key_string .reset(new decltype(key_string)::element_type); break; + case KEY_FIXED_STRING: key_fixed_string.reset(new decltype(key_fixed_string)::element_type); break; + case KEYS_128: keys128 .reset(new decltype(keys128)::element_type); break; + case HASHED: hashed .reset(new decltype(hashed)::element_type); break; default: throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); @@ -365,6 +409,9 @@ struct AggregatedDataVariants : private boost::noncopyable { case EMPTY: return 0; case WITHOUT_KEY: return 1; + case KEY_8: return key8->data.size() + (without_key != nullptr); + case KEY_16: return key16->data.size() + (without_key != nullptr); + case KEY_32: return key32->data.size() + (without_key != nullptr); case KEY_64: return key64->data.size() + (without_key != nullptr); case KEY_STRING: return key_string->data.size() + (without_key != nullptr); case KEY_FIXED_STRING: return key_fixed_string->data.size() + (without_key != nullptr); @@ -382,6 +429,9 @@ struct AggregatedDataVariants : private boost::noncopyable { case EMPTY: return "EMPTY"; case WITHOUT_KEY: return "WITHOUT_KEY"; + case KEY_8: return "KEY_8"; + case KEY_16: return "KEY_16"; + case KEY_32: return "KEY_32"; case KEY_64: return "KEY_64"; case KEY_STRING: return "KEY_STRING"; case KEY_FIXED_STRING: return "KEY_FIXED_STRING"; diff --git a/dbms/include/DB/Interpreters/SplittingAggregator.h b/dbms/include/DB/Interpreters/SplittingAggregator.h index c52c78509e8..ca2ba4224b3 100644 --- a/dbms/include/DB/Interpreters/SplittingAggregator.h +++ b/dbms/include/DB/Interpreters/SplittingAggregator.h @@ -94,6 +94,9 @@ private: void calculateHashesThread(Block & block, size_t begin, size_t end, ExceptionPtr & exception, MemoryTracker * memory_tracker); void aggregateThread(Block & block, AggregatedDataVariants & result, size_t thread_no, ExceptionPtr & exception, MemoryTracker * memory_tracker); void convertToBlockThread(AggregatedDataVariants & data_variant, Block & block, bool final, ExceptionPtr & exception, MemoryTracker * memory_tracker); + + template + void aggregateOneNumber(AggregatedDataVariants & result, size_t thread_no, bool no_more_keys); }; diff --git a/dbms/include/DB/Storages/StorageView.h b/dbms/include/DB/Storages/StorageView.h index 4a42122c3ce..07397610849 100644 --- a/dbms/include/DB/Storages/StorageView.h +++ b/dbms/include/DB/Storages/StorageView.h @@ -7,7 +7,8 @@ namespace DB { -class StorageView : public IStorage { +class StorageView : public IStorage +{ public: static StoragePtr create(const String & table_name_, const String & database_name_, @@ -16,7 +17,7 @@ public: std::string getName() const override { return "View"; } std::string getTableName() const override { return table_name; } const NamesAndTypesList & getColumnsList() const override { return *columns; } - DB::ASTPtr getInnerQuery() const { return inner_query.clone(); }; + ASTPtr getInnerQuery() const { return inner_query.clone(); }; /// Пробрасывается внутрь запроса и решается на его уровне. bool supportsSampling() const override { return true; } diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 43e56a77f2a..c863d861732 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -125,7 +125,18 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu /// Если есть один числовой ключ, который помещается в 64 бита if (keys_size == 1 && key_columns[0]->isNumeric()) - return AggregatedDataVariants::KEY_64; + { + size_t size_of_field = key_columns[0]->sizeOfField(); + if (size_of_field == 1) + return AggregatedDataVariants::KEY_8; + if (size_of_field == 2) + return AggregatedDataVariants::KEY_16; + if (size_of_field == 4) + return AggregatedDataVariants::KEY_32; + if (size_of_field == 8) + return AggregatedDataVariants::KEY_64; + throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.", ErrorCodes::LOGICAL_ERROR); + } /// Если ключи помещаются в 128 бит, будем использовать хэш-таблицу по упакованным в 128-бит ключам if (keys_fit_128_bits) @@ -167,8 +178,12 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const } +/** Интересно - если убрать noinline, то gcc зачем-то инлайнит эту функцию, и производительность уменьшается (~10%). + * (Возможно из-за того, что после инлайна этой функции, перестают инлайниться более внутренние функции.) + * Инлайнить не имеет смысла, так как внутренний цикл находится целиком внутри этой функции. + */ template -void Aggregator::executeImpl( +void NO_INLINE Aggregator::executeImpl( Method & method, Arena * aggregates_pool, size_t rows, @@ -226,7 +241,7 @@ void Aggregator::executeImpl( template -void Aggregator::convertToBlockImpl( +void NO_INLINE Aggregator::convertToBlockImpl( Method & method, ColumnPlainPtrs & key_columns, AggregateColumnsData & aggregate_columns, @@ -262,7 +277,7 @@ void Aggregator::convertToBlockImpl( template -void Aggregator::mergeDataImpl( +void NO_INLINE Aggregator::mergeDataImpl( Method & method_dst, Method & method_src) const { @@ -294,7 +309,7 @@ void Aggregator::mergeDataImpl( template -void Aggregator::mergeStreamsImpl( +void NO_INLINE Aggregator::mergeStreamsImpl( Method & method, Arena * aggregates_pool, size_t start_row, @@ -336,7 +351,7 @@ void Aggregator::mergeStreamsImpl( template -void Aggregator::destroyImpl( +void NO_INLINE Aggregator::destroyImpl( Method & method) const { for (typename Method::const_iterator it = method.data.begin(); it != method.data.end(); ++it) @@ -372,8 +387,14 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result, /// Запоминаем столбцы, с которыми будем работать for (size_t i = 0; i < keys_size; ++i) + { key_columns[i] = block.getByPosition(keys[i]).column; + if (key_columns[i]->isConst()) + throw Exception("Constants is not allowed as GROUP BY keys" + " (but all of them must be eliminated in ExpressionAnalyzer)", ErrorCodes::ILLEGAL_COLUMN); + } + for (size_t i = 0; i < aggregates_size; ++i) { for (size_t j = 0; j < aggregate_columns[i].size(); ++j) @@ -434,7 +455,16 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result, AggregateDataPtr overflow_row_ptr = overflow_row ? result.without_key : nullptr; - if (result.type == AggregatedDataVariants::KEY_64) + if (result.type == AggregatedDataVariants::KEY_8) + executeImpl(*result.key8, result.aggregates_pool, rows, key_columns, aggregate_columns, + result.key_sizes, key, no_more_keys, overflow_row_ptr); + else if (result.type == AggregatedDataVariants::KEY_16) + executeImpl(*result.key16, result.aggregates_pool, rows, key_columns, aggregate_columns, + result.key_sizes, key, no_more_keys, overflow_row_ptr); + else if (result.type == AggregatedDataVariants::KEY_32) + executeImpl(*result.key32, result.aggregates_pool, rows, key_columns, aggregate_columns, + result.key_sizes, key, no_more_keys, overflow_row_ptr); + else if (result.type == AggregatedDataVariants::KEY_64) executeImpl(*result.key64, result.aggregates_pool, rows, key_columns, aggregate_columns, result.key_sizes, key, no_more_keys, overflow_row_ptr); else if (result.type == AggregatedDataVariants::KEY_STRING) @@ -590,7 +620,16 @@ Block Aggregator::convertToBlock(AggregatedDataVariants & data_variants, bool fi size_t start_row = overflow_row ? 1 : 0; - if (data_variants.type == AggregatedDataVariants::KEY_64) + if (data_variants.type == AggregatedDataVariants::KEY_8) + convertToBlockImpl(*data_variants.key8, key_columns, aggregate_columns, + final_aggregate_columns, data_variants.key_sizes, start_row, final); + else if (data_variants.type == AggregatedDataVariants::KEY_16) + convertToBlockImpl(*data_variants.key16, key_columns, aggregate_columns, + final_aggregate_columns, data_variants.key_sizes, start_row, final); + else if (data_variants.type == AggregatedDataVariants::KEY_32) + convertToBlockImpl(*data_variants.key32, key_columns, aggregate_columns, + final_aggregate_columns, data_variants.key_sizes, start_row, final); + else if (data_variants.type == AggregatedDataVariants::KEY_64) convertToBlockImpl(*data_variants.key64, key_columns, aggregate_columns, final_aggregate_columns, data_variants.key_sizes, start_row, final); else if (data_variants.type == AggregatedDataVariants::KEY_STRING) @@ -694,7 +733,13 @@ AggregatedDataVariantsPtr Aggregator::merge(ManyAggregatedDataVariants & data_va current_data = nullptr; } - if (res->type == AggregatedDataVariants::KEY_64) + if (res->type == AggregatedDataVariants::KEY_8) + mergeDataImpl(*res->key8, *current.key8); + else if (res->type == AggregatedDataVariants::KEY_16) + mergeDataImpl(*res->key16, *current.key16); + else if (res->type == AggregatedDataVariants::KEY_32) + mergeDataImpl(*res->key32, *current.key32); + else if (res->type == AggregatedDataVariants::KEY_64) mergeDataImpl(*res->key64, *current.key64); else if (res->type == AggregatedDataVariants::KEY_STRING) mergeDataImpl(*res->key_string, *current.key_string); @@ -782,7 +827,13 @@ void Aggregator::merge(BlockInputStreamPtr stream, AggregatedDataVariants & resu size_t start_row = overflow_row ? 1 : 0; - if (result.type == AggregatedDataVariants::KEY_64) + if (result.type == AggregatedDataVariants::KEY_8) + mergeStreamsImpl(*result.key8, result.aggregates_pool, start_row, rows, key_columns, aggregate_columns, key_sizes, key); + else if (result.type == AggregatedDataVariants::KEY_16) + mergeStreamsImpl(*result.key16, result.aggregates_pool, start_row, rows, key_columns, aggregate_columns, key_sizes, key); + else if (result.type == AggregatedDataVariants::KEY_32) + mergeStreamsImpl(*result.key32, result.aggregates_pool, start_row, rows, key_columns, aggregate_columns, key_sizes, key); + else if (result.type == AggregatedDataVariants::KEY_64) mergeStreamsImpl(*result.key64, result.aggregates_pool, start_row, rows, key_columns, aggregate_columns, key_sizes, key); else if (result.type == AggregatedDataVariants::KEY_STRING) mergeStreamsImpl(*result.key_string, result.aggregates_pool, start_row, rows, key_columns, aggregate_columns, key_sizes, key); @@ -818,7 +869,13 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) aggregate_functions[i]->destroy(res_data + offsets_of_aggregate_states[i]); } - if (result.type == AggregatedDataVariants::KEY_64) + if (result.type == AggregatedDataVariants::KEY_8) + destroyImpl(*result.key8); + else if (result.type == AggregatedDataVariants::KEY_16) + destroyImpl(*result.key16); + else if (result.type == AggregatedDataVariants::KEY_32) + destroyImpl(*result.key32); + else if (result.type == AggregatedDataVariants::KEY_64) destroyImpl(*result.key64); else if (result.type == AggregatedDataVariants::KEY_STRING) destroyImpl(*result.key_string); diff --git a/dbms/src/Interpreters/SplittingAggregator.cpp b/dbms/src/Interpreters/SplittingAggregator.cpp index 2b97be9e5b7..7850ef5de3b 100644 --- a/dbms/src/Interpreters/SplittingAggregator.cpp +++ b/dbms/src/Interpreters/SplittingAggregator.cpp @@ -48,7 +48,10 @@ void SplittingAggregator::execute(BlockInputStreamPtr stream, ManyAggregatedData method = chooseAggregationMethod(key_columns, key_sizes); /// Подготавливаем массивы, куда будут складываться ключи или хэши от ключей. - if (method == AggregatedDataVariants::KEY_64) + if (method == AggregatedDataVariants::KEY_8 /// TODO не использовать SplittingAggregator для маленьких ключей. + || method == AggregatedDataVariants::KEY_16 + || method == AggregatedDataVariants::KEY_32 + || method == AggregatedDataVariants::KEY_64) { keys64.resize(rows); } @@ -96,7 +99,7 @@ void SplittingAggregator::execute(BlockInputStreamPtr stream, ManyAggregatedData pool.wait(); - rethrowFirstException(exceptions); + rethrowFirstException(exceptions); /// TODO Заменить на future, packaged_task /// Параллельно агрегируем в независимые хэш-таблицы @@ -150,14 +153,17 @@ void SplittingAggregator::calculateHashesThread(Block & block, size_t begin, siz try { - if (method == AggregatedDataVariants::KEY_64) + if (method == AggregatedDataVariants::KEY_8 + || method == AggregatedDataVariants::KEY_16 + || method == AggregatedDataVariants::KEY_32 + || method == AggregatedDataVariants::KEY_64) { const IColumn & column = *key_columns[0]; for (size_t i = begin; i < end; ++i) { - keys64[i] = column.get64(i); - thread_nums[i] = intHash32<0xd1f93e3190506c7cULL>(keys64[i]) % threads; + keys64[i] = column.get64(i); /// TODO Убрать виртуальный вызов + thread_nums[i] = intHash32<0xd1f93e3190506c7cULL>(keys64[i]) % threads; /// TODO более эффективная хэш-функция } } else if (method == AggregatedDataVariants::KEY_STRING) @@ -216,6 +222,45 @@ void SplittingAggregator::calculateHashesThread(Block & block, size_t begin, siz } +template +void SplittingAggregator::aggregateOneNumber(AggregatedDataVariants & result, size_t thread_no, bool no_more_keys) +{ + AggregatedDataWithUInt64Key & res = result.key64->data; + + for (size_t i = 0; i < rows; ++i) + { + if (thread_nums[i] != thread_no) + continue; + + /// Берём ключ + UInt64 key = keys64[i]; + + AggregatedDataWithUInt64Key::iterator it; + bool inserted; + + if (!no_more_keys) + res.emplace(key, it, inserted); + else + { + inserted = false; + it = res.find(key); + if (res.end() == it) + continue; + } + + if (inserted) + { + it->second = result.aggregates_pool->alloc(total_size_of_aggregate_states); + createAggregateStates(it->second); + } + + /// Добавляем значения + for (size_t j = 0; j < aggregates_size; ++j) + aggregate_functions[j]->add(it->second + offsets_of_aggregate_states[j], &aggregate_columns[j][0], i); + } +} + + void SplittingAggregator::aggregateThread( Block & block, AggregatedDataVariants & result, size_t thread_no, ExceptionPtr & exception, MemoryTracker * memory_tracker) { @@ -233,42 +278,14 @@ void SplittingAggregator::aggregateThread( bool no_more_keys = max_rows_to_group_by && size_of_all_results > max_rows_to_group_by; size_t old_result_size = result.size(); - if (method == AggregatedDataVariants::KEY_64) - { - AggregatedDataWithUInt64Key & res = result.key64->data; - - for (size_t i = 0; i < rows; ++i) - { - if (thread_nums[i] != thread_no) - continue; - - /// Берём ключ - UInt64 key = keys64[i]; - - AggregatedDataWithUInt64Key::iterator it; - bool inserted; - - if (!no_more_keys) - res.emplace(key, it, inserted); - else - { - inserted = false; - it = res.find(key); - if (res.end() == it) - continue; - } - - if (inserted) - { - it->second = result.aggregates_pool->alloc(total_size_of_aggregate_states); - createAggregateStates(it->second); - } - - /// Добавляем значения - for (size_t j = 0; j < aggregates_size; ++j) - aggregate_functions[j]->add(it->second + offsets_of_aggregate_states[j], &aggregate_columns[j][0], i); - } - } + if (method == AggregatedDataVariants::KEY_8) + aggregateOneNumber(result, thread_no, no_more_keys); + else if (method == AggregatedDataVariants::KEY_16) + aggregateOneNumber(result, thread_no, no_more_keys); + else if (method == AggregatedDataVariants::KEY_32) + aggregateOneNumber(result, thread_no, no_more_keys); + else if (method == AggregatedDataVariants::KEY_64) + aggregateOneNumber(result, thread_no, no_more_keys); else if (method == AggregatedDataVariants::KEY_STRING) { AggregatedDataWithStringKey & res = result.key_string->data; diff --git a/dbms/src/Storages/StorageSystemOne.cpp b/dbms/src/Storages/StorageSystemOne.cpp index de12e93fa0b..a6d66fed732 100644 --- a/dbms/src/Storages/StorageSystemOne.cpp +++ b/dbms/src/Storages/StorageSystemOne.cpp @@ -34,9 +34,9 @@ BlockInputStreams StorageSystemOne::read( ColumnWithNameAndType col; col.name = "dummy"; col.type = new DataTypeUInt8; - col.column = new ColumnConstUInt8(1, 0); + col.column = ColumnConstUInt8(1, 0).convertToFullColumn(); block.insert(col); - + return BlockInputStreams(1, new OneBlockInputStream(block)); } diff --git a/dbms/tests/queries/0_stateless/00019_quantiles_totals_distributed.sql b/dbms/tests/queries/0_stateless/00019_quantiles_totals_distributed.sql index c075aeecc08..85965f4d8f6 100644 --- a/dbms/tests/queries/0_stateless/00019_quantiles_totals_distributed.sql +++ b/dbms/tests/queries/0_stateless/00019_quantiles_totals_distributed.sql @@ -1 +1 @@ -SELECT quantilesTiming(0.1, 0.5, 0.9)(materialize(dummy)) FROM remote('127.0.0.{1,2}', system, one) GROUP BY 1 WITH TOTALS +SELECT quantilesTiming(0.1, 0.5, 0.9)(dummy) FROM remote('127.0.0.{1,2}', system, one) GROUP BY 1 WITH TOTALS From 37e599934fd28018186ce3f6fa705b3d302a1682 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 29 Oct 2014 05:35:16 +0300 Subject: [PATCH 15/29] dbms: further [#METR-2944]. --- dbms/include/DB/Common/HashTable/HashTable.h | 18 ++++++---- dbms/include/DB/Interpreters/Aggregator.h | 37 +++++++++++++++++--- dbms/src/Interpreters/Aggregator.cpp | 6 ++-- 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index a3f4976eb1b..e67c890180f 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -455,7 +455,7 @@ public: const_iterator begin() const { if (this->hasZero()) - return const_iterator(this, this->zeroValue()); + return iteratorToZero(); const Cell * ptr = buf; while (ptr < buf + grower.bufSize() && ptr->isZero(*this)) @@ -467,7 +467,7 @@ public: iterator begin() { if (this->hasZero()) - return iterator(this, this->zeroValue()); + return iteratorToZero(); Cell * ptr = buf; while (ptr < buf + grower.bufSize() && ptr->isZero(*this)) @@ -481,6 +481,10 @@ public: protected: + const_iterator iteratorToZero() const { return const_iterator(this, this->zeroValue()); } + iterator iteratorToZero() { return iterator(this, this->zeroValue()); } + + /// Если ключ нулевой - вставить его в специальное место и вернуть true. bool emplaceIfZero(Key x, iterator & it, bool & inserted) { @@ -490,17 +494,17 @@ protected: if (Cell::isZero(x, *this)) { + it = iteratorToZero(); if (!this->hasZero()) { ++m_size; this->setHasZero(); + it.ptr->setHash(hash(x)); inserted = true; } else inserted = false; - it = begin(); - it.ptr->setHash(hash(x)); return true; } @@ -545,7 +549,7 @@ public: if (res.second) res.first.ptr->setMapped(x); - + return res; } @@ -583,7 +587,7 @@ public: iterator find(Key x) { if (Cell::isZero(x, *this)) - return this->hasZero() ? begin() : end(); + return this->hasZero() ? iteratorToZero() : end(); size_t place_value = findCell(x, grower.place(hash(x))); @@ -594,7 +598,7 @@ public: const_iterator find(Key x) const { if (Cell::isZero(x, *this)) - return this->hasZero() ? begin() : end(); + return this->hasZero() ? iteratorToZero() : end(); size_t place_value = findCell(x, grower.place(hash(x))); diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 45ea90ce147..60498edb4c7 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -69,9 +69,9 @@ template struct HashTableFixedGrower { size_t bufSize() const { return 1 << key_bits; } - size_t mask() const { return bufSize() - 1; } size_t place(size_t x) const { return x; } - size_t next(size_t pos) const { __builtin_unreachable(); return pos; } + /// Тут можно было бы написать __builtin_unreachable(), но компилятор не до конца всё оптимизирует, и получается менее эффективно. + size_t next(size_t pos) const { return pos + 1; } bool overflow(size_t elems) const { return false; } void increaseSize() { __builtin_unreachable(); } @@ -82,9 +82,26 @@ struct HashTableFixedGrower typedef HashMap> AggregatedDataWithUInt8Key; typedef HashMap> AggregatedDataWithUInt16Key; -template struct AggregatedDataWithUIntKey { using Type = AggregatedDataWithUInt64Key; }; -template <> struct AggregatedDataWithUIntKey { using Type = AggregatedDataWithUInt8Key; }; -template <> struct AggregatedDataWithUIntKey { using Type = AggregatedDataWithUInt16Key; }; +template +struct AggregatedDataWithUIntKey +{ + using Type = AggregatedDataWithUInt64Key; + static constexpr bool never_overflows = false; +}; + +template <> +struct AggregatedDataWithUIntKey +{ + using Type = AggregatedDataWithUInt8Key; + static constexpr bool never_overflows = true; /// Говорит о том, что в результате агрегации не может быть много записей. +}; + +template <> +struct AggregatedDataWithUIntKey +{ + using Type = AggregatedDataWithUInt16Key; + static constexpr bool never_overflows = true; +}; /// Для случая, когда есть один числовой ключ. @@ -97,6 +114,8 @@ struct AggregationMethodOneNumber typedef typename Data::iterator iterator; typedef typename Data::const_iterator const_iterator; + static constexpr bool never_overflows = AggregatedDataWithUIntKey::never_overflows; + Data data; const ColumnVector * column; @@ -148,6 +167,8 @@ struct AggregationMethodString typedef Data::iterator iterator; typedef Data::const_iterator const_iterator; + static constexpr bool never_overflows = false; + Data data; const ColumnString::Offsets_t * offsets; @@ -195,6 +216,8 @@ struct AggregationMethodFixedString typedef Data::iterator iterator; typedef Data::const_iterator const_iterator; + static constexpr bool never_overflows = false; + Data data; size_t n; @@ -242,6 +265,8 @@ struct AggregationMethodKeys128 typedef Data::iterator iterator; typedef Data::const_iterator const_iterator; + static constexpr bool never_overflows = false; + Data data; void init(ConstColumnPlainPtrs & key_columns) @@ -287,6 +312,8 @@ struct AggregationMethodHashed typedef Data::iterator iterator; typedef Data::const_iterator const_iterator; + static constexpr bool never_overflows = false; + Data data; void init(ConstColumnPlainPtrs & key_columns) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index c863d861732..50c873c7a10 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -206,7 +206,7 @@ void NO_INLINE Aggregator::executeImpl( /// Получаем ключ для вставки в хэш-таблицу. typename Method::Key key = method.getKey(key_columns, keys_size, i, key_sizes, keys); - if (!no_more_keys) /// Вставляем. + if (Method::never_overflows || !no_more_keys) /// Вставляем. method.data.emplace(key, it, inserted); else { @@ -218,7 +218,7 @@ void NO_INLINE Aggregator::executeImpl( } /// Если ключ не поместился, и данные не надо агрегировать в отдельную строку, то делать нечего. - if (overflow && !overflow_row) + if (!Method::never_overflows && overflow && !overflow_row) continue; /// Если вставили новый ключ - инициализируем состояния агрегатных функций, и возможно, что-нибудь связанное с ключом. @@ -231,7 +231,7 @@ void NO_INLINE Aggregator::executeImpl( createAggregateStates(aggregate_data); } - AggregateDataPtr value = !overflow ? Method::getAggregateData(it->second) : overflow_row; + AggregateDataPtr value = (Method::never_overflows || !overflow) ? Method::getAggregateData(it->second) : overflow_row; /// Добавляем значения в агрегатные функции. for (size_t j = 0; j < aggregates_size; ++j) From ee34ca31d8fcc7f91b656eb64a5666bdbc0e3746 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 29 Oct 2014 15:25:33 +0300 Subject: [PATCH 16/29] dbms: add MD5, SHA1, SHA224, SHA256, IPv6NumToString. [#METR-13151] --- dbms/include/DB/Columns/ColumnConst.h | 3 + dbms/include/DB/Common/SipHash.h | 13 +- dbms/include/DB/Functions/FunctionsCoding.h | 88 +++++++++++- dbms/include/DB/Functions/FunctionsHashing.h | 140 ++++++++++++++++++- dbms/src/Functions/FunctionsCoding.cpp | 3 +- dbms/src/Functions/FunctionsHashing.cpp | 7 +- 6 files changed, 247 insertions(+), 7 deletions(-) diff --git a/dbms/include/DB/Columns/ColumnConst.h b/dbms/include/DB/Columns/ColumnConst.h index f7f471f945f..c97d4207c9a 100644 --- a/dbms/include/DB/Columns/ColumnConst.h +++ b/dbms/include/DB/Columns/ColumnConst.h @@ -140,6 +140,9 @@ public: max = FieldType(data); } + DataTypePtr & getDataType() { return data_type; } + const DataTypePtr & getDataType() const { return data_type; } + private: size_t s; T data; diff --git a/dbms/include/DB/Common/SipHash.h b/dbms/include/DB/Common/SipHash.h index 267c89c7218..62289f72681 100644 --- a/dbms/include/DB/Common/SipHash.h +++ b/dbms/include/DB/Common/SipHash.h @@ -30,7 +30,7 @@ class SipHash private: typedef uint64_t u64; typedef uint8_t u8; - + /// Состояние. u64 v0; u64 v1; @@ -131,7 +131,7 @@ public: } /// Получить результат в некотором виде. Это можно сделать только один раз! - + void get128(char * out) { finalize(); @@ -158,7 +158,14 @@ public: #undef SIPROUND -inline uint64_t sipHash64(const char * data, size_t size) +inline void sipHash128(const char * data, const size_t size, char * out) +{ + SipHash hash; + hash.update(data, size); + hash.get128(out); +} + +inline uint64_t sipHash64(const char * data, const size_t size) { SipHash hash; hash.update(data, size); diff --git a/dbms/include/DB/Functions/FunctionsCoding.h b/dbms/include/DB/Functions/FunctionsCoding.h index 168f8a10a18..e1345d3f3f1 100644 --- a/dbms/include/DB/Functions/FunctionsCoding.h +++ b/dbms/include/DB/Functions/FunctionsCoding.h @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB @@ -35,6 +36,91 @@ namespace DB /// Включая нулевой символ в конце. #define MAX_UINT_HEX_LENGTH 20 +const auto ipv6_fixed_string_length = 16; + +class FunctionIPv6NumToString : public IFunction +{ +public: + String getName() const { return "IPv6NumToString"; } + + DataTypePtr getReturnType(const DataTypes & arguments) const + { + if (arguments.size() != 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto ptr = typeid_cast(arguments[0].get()); + if (!ptr || ptr->getN() != ipv6_fixed_string_length) + throw Exception("Illegal type " + arguments[0]->getName() + + " of argument of function " + getName() + + ", expected FixedString(" + toString(ipv6_fixed_string_length) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return new DataTypeString; + } + + void execute(Block & block, const ColumnNumbers & arguments, const size_t result) + { + const auto & col_name_type = block.getByPosition(arguments[0]); + const ColumnPtr & column = col_name_type.column; + + if (const auto col_in = typeid_cast(column.get())) + { + if (col_in->getN() != ipv6_fixed_string_length) + throw Exception("Illegal type " + col_name_type.type->getName() + + " of column " + col_in->getName() + + " argument of function " + getName() + + ", expected FixedString(" + toString(ipv6_fixed_string_length) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto size = col_in->size(); + const auto & vec_in = col_in->getChars(); + + auto col_res = new ColumnString; + block.getByPosition(result).column = col_res; + + ColumnString::Chars_t & vec_res = col_res->getChars(); + ColumnString::Offsets_t & offsets_res = col_res->getOffsets(); + vec_res.resize(size * INET6_ADDRSTRLEN); + offsets_res.resize(size); + + auto begin = reinterpret_cast(&vec_res[0]); + auto pos = begin; + + for (size_t i = 0; i < vec_in.size(); i += ipv6_fixed_string_length) + { + inet_ntop(AF_INET6, &vec_in[i], pos, INET6_ADDRSTRLEN); + pos = static_cast(memchr(pos, 0, INET6_ADDRSTRLEN)) + 1; + offsets_res[i] = pos - begin; + } + + vec_res.resize(pos - begin); + } + else if (const auto col_in = typeid_cast *>(column.get())) + { + const auto data_type_fixed_string = typeid_cast(col_in->getDataType().get()); + if (!data_type_fixed_string || data_type_fixed_string->getN() != ipv6_fixed_string_length) + throw Exception("Illegal type " + col_name_type.type->getName() + + " of column " + col_in->getName() + + " argument of function " + getName() + + ", expected FixedString(" + toString(ipv6_fixed_string_length) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto & data_in = col_in->getData(); + + char buf[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, data_in.data(), buf, sizeof(buf)); + + block.getByPosition(result).column = new ColumnConstString{col_in->size(), buf}; + } + else + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + class FunctionIPv4NumToString : public IFunction { public: @@ -108,7 +194,7 @@ public: ColumnString::Chars_t & vec_res = col_res->getChars(); ColumnString::Offsets_t & offsets_res = col_res->getOffsets(); - vec_res.resize(vec_in.size() * 16); /// самое длинное значение: 255.255.255.255\0 + vec_res.resize(vec_in.size() * INET_ADDRSTRLEN); /// самое длинное значение: 255.255.255.255\0 offsets_res.resize(vec_in.size()); char * begin = reinterpret_cast(&vec_res[0]); char * pos = begin; diff --git a/dbms/include/DB/Functions/FunctionsHashing.h b/dbms/include/DB/Functions/FunctionsHashing.h index 8664f22a66a..d64c32ba197 100644 --- a/dbms/include/DB/Functions/FunctionsHashing.h +++ b/dbms/include/DB/Functions/FunctionsHashing.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -64,6 +65,62 @@ struct HalfMD5Impl } }; +struct MD5Impl +{ + static constexpr auto name = "MD5"; + static constexpr auto length = 16; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + MD5_CTX ctx; + MD5_Init(&ctx); + MD5_Update(&ctx, reinterpret_cast(begin), size); + MD5_Final(out_char_data, &ctx); + } +}; + +struct SHA1Impl +{ + static constexpr auto name = "SHA1"; + static constexpr auto length = 20; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + SHA_CTX ctx; + SHA1_Init(&ctx); + SHA1_Update(&ctx, reinterpret_cast(begin), size); + SHA1_Final(out_char_data, &ctx); + } +}; + +struct SHA224Impl +{ + static constexpr auto name = "SHA224"; + static constexpr auto length = 28; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + SHA256_CTX ctx; + SHA224_Init(&ctx); + SHA224_Update(&ctx, reinterpret_cast(begin), size); + SHA224_Final(out_char_data, &ctx); + } +}; + +struct SHA256Impl +{ + static constexpr auto name = "SHA256"; + static constexpr auto length = 32; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + SHA256_CTX ctx; + SHA256_Init(&ctx); + SHA256_Update(&ctx, reinterpret_cast(begin), size); + SHA256_Final(out_char_data, &ctx); + } +}; + struct SipHash64Impl { static UInt64 apply(const char * begin, size_t size) @@ -72,6 +129,17 @@ struct SipHash64Impl } }; +struct SipHash128Impl +{ + static constexpr auto name = "SipHash128"; + static constexpr auto length = 16; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + sipHash128(begin, size, reinterpret_cast(out_char_data)); + } +}; + struct IntHash32Impl { typedef UInt32 ReturnType; @@ -152,6 +220,72 @@ public: }; +template +class FunctionStringHashFixedString : public IFunction +{ +public: + /// Получить имя функции. + String getName() const + { + return Impl::name; + } + + /// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. + DataTypePtr getReturnType(const DataTypes & arguments) const + { + if (arguments.size() != 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (!typeid_cast(&*arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return new DataTypeFixedString{Impl::length}; + } + + /// Выполнить функцию над блоком. + void execute(Block & block, const ColumnNumbers & arguments, size_t result) + { + if (const ColumnString * col_from = typeid_cast(&*block.getByPosition(arguments[0]).column)) + { + auto col_to = new ColumnFixedString{Impl::length}; + block.getByPosition(result).column = col_to; + + const typename ColumnString::Chars_t & data = col_from->getChars(); + const typename ColumnString::Offsets_t & offsets = col_from->getOffsets(); + auto & chars_to = col_to->getChars(); + const auto size = offsets.size(); + chars_to.resize(size * Impl::length); + + for (size_t i = 0; i < size; ++i) + Impl::apply( + reinterpret_cast(&data[i == 0 ? 0 : offsets[i - 1]]), + i == 0 ? offsets[i] - 1 : (offsets[i] - 1 - offsets[i - 1]), + &chars_to[i * Impl::length]); + } + else if (const ColumnConstString * col_from = typeid_cast(&*block.getByPosition(arguments[0]).column)) + { + const auto & data = col_from->getData(); + + String hash(Impl::length, 0); + Impl::apply(data.data(), data.size(), reinterpret_cast(&hash[0])); + + block.getByPosition(result).column = new ColumnConst{ + col_from->size(), + hash, + new DataTypeFixedString{Impl::length} + }; + } + else + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + template class FunctionIntHash : public IFunction { @@ -465,6 +599,10 @@ typedef FunctionStringHash64 FunctionHalfMD5; typedef FunctionStringHash64 FunctionSipHash64; typedef FunctionIntHash FunctionIntHash32; typedef FunctionIntHash FunctionIntHash64; - +typedef FunctionStringHashFixedString FunctionMD5; +typedef FunctionStringHashFixedString FunctionSHA1; +typedef FunctionStringHashFixedString FunctionSHA224; +typedef FunctionStringHashFixedString FunctionSHA256; +typedef FunctionStringHashFixedString FunctionSipHash128; } diff --git a/dbms/src/Functions/FunctionsCoding.cpp b/dbms/src/Functions/FunctionsCoding.cpp index 8d9bcaa0a5a..1ee0c094220 100644 --- a/dbms/src/Functions/FunctionsCoding.cpp +++ b/dbms/src/Functions/FunctionsCoding.cpp @@ -6,9 +6,10 @@ namespace DB void registerFunctionsCoding(FunctionFactory & factory) { - #define F [](const Context & context) -> IFunction* + #define F [](const Context & context) -> IFunction * factory.registerFunction("toStringCutToZero", F { return new FunctionToStringCutToZero; }); + factory.registerFunction("IPv6NumToString", F { return new FunctionIPv6NumToString; }); factory.registerFunction("IPv4NumToString", F { return new FunctionIPv4NumToString; }); factory.registerFunction("IPv4StringToNum", F { return new FunctionIPv4StringToNum; }); factory.registerFunction("hex", F { return new FunctionHex; }); diff --git a/dbms/src/Functions/FunctionsHashing.cpp b/dbms/src/Functions/FunctionsHashing.cpp index e2aa12dabf3..5d0703ff013 100644 --- a/dbms/src/Functions/FunctionsHashing.cpp +++ b/dbms/src/Functions/FunctionsHashing.cpp @@ -7,10 +7,15 @@ namespace DB void registerFunctionsHashing(FunctionFactory & factory) { - #define F [](const Context & context) -> IFunction* + #define F [](const Context & context) -> IFunction * factory.registerFunction("halfMD5", F { return new FunctionHalfMD5; }); + factory.registerFunction("MD5", F { return new FunctionMD5; }); + factory.registerFunction("SHA1", F { return new FunctionSHA1; }); + factory.registerFunction("SHA224", F { return new FunctionSHA224; }); + factory.registerFunction("SHA256", F { return new FunctionSHA256; }); factory.registerFunction("sipHash64", F { return new FunctionSipHash64; }); + factory.registerFunction("sipHash128", F { return new FunctionSipHash128; }); factory.registerFunction("cityHash64", F { return new FunctionCityHash64; }); factory.registerFunction("intHash32", F { return new FunctionIntHash32; }); factory.registerFunction("intHash64", F { return new FunctionIntHash64; }); From a5905c608ac5b60a7e130d67798180e88f5783cd Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 29 Oct 2014 15:49:19 +0300 Subject: [PATCH 17/29] dbms: IPv6StringToNum. [#METR-13151] --- dbms/include/DB/Functions/FunctionsCoding.h | 64 ++++++++++++++++++++- dbms/src/Functions/FunctionsCoding.cpp | 1 + 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/dbms/include/DB/Functions/FunctionsCoding.h b/dbms/include/DB/Functions/FunctionsCoding.h index e1345d3f3f1..eb7c55b4623 100644 --- a/dbms/include/DB/Functions/FunctionsCoding.h +++ b/dbms/include/DB/Functions/FunctionsCoding.h @@ -121,6 +121,68 @@ public: } }; +class FunctionIPv6StringToNum : public IFunction +{ +public: + String getName() const { return "IPv6StringToNum"; } + + DataTypePtr getReturnType(const DataTypes & arguments) const + { + if (arguments.size() != 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (!typeid_cast(&*arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return new DataTypeFixedString{ipv6_fixed_string_length}; + } + + void execute(Block & block, const ColumnNumbers & arguments, size_t result) + { + const ColumnPtr & column = block.getByPosition(arguments[0]).column; + + if (const auto col_in = typeid_cast(&*column)) + { + const auto col_res = new ColumnFixedString{ipv6_fixed_string_length}; + block.getByPosition(result).column = col_res; + + auto & vec_res = col_res->getChars(); + vec_res.resize(col_in->size() * ipv6_fixed_string_length); + + const ColumnString::Chars_t & vec_src = col_in->getChars(); + const ColumnString::Offsets_t & offsets_src = col_in->getOffsets(); + size_t src_offset = 0; + + for (size_t out_offset = 0, i = 0; + out_offset < vec_res.size(); + out_offset += ipv6_fixed_string_length, ++i) + { + inet_pton(AF_INET6, reinterpret_cast(&vec_src[src_offset]), &vec_res[out_offset]); + src_offset = offsets_src[i]; + } + } + else if (const auto col_in = typeid_cast(&*column)) + { + String out(ipv6_fixed_string_length, 0); + inet_pton(AF_INET6, col_in->getData().data(), &out[0]); + + block.getByPosition(result).column = new ColumnConst{ + col_in->size(), + out, + new DataTypeFixedString{ipv6_fixed_string_length} + }; + } + else + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + class FunctionIPv4NumToString : public IFunction { public: @@ -247,7 +309,7 @@ public: return new DataTypeUInt32; } - static inline bool isDigit(char c) + static bool isDigit(char c) { return c >= '0' && c <= '9'; } diff --git a/dbms/src/Functions/FunctionsCoding.cpp b/dbms/src/Functions/FunctionsCoding.cpp index 1ee0c094220..90f2bfa8063 100644 --- a/dbms/src/Functions/FunctionsCoding.cpp +++ b/dbms/src/Functions/FunctionsCoding.cpp @@ -10,6 +10,7 @@ void registerFunctionsCoding(FunctionFactory & factory) factory.registerFunction("toStringCutToZero", F { return new FunctionToStringCutToZero; }); factory.registerFunction("IPv6NumToString", F { return new FunctionIPv6NumToString; }); + factory.registerFunction("IPv6StringToNum", F { return new FunctionIPv6StringToNum; }); factory.registerFunction("IPv4NumToString", F { return new FunctionIPv4NumToString; }); factory.registerFunction("IPv4StringToNum", F { return new FunctionIPv4StringToNum; }); factory.registerFunction("hex", F { return new FunctionHex; }); From 74b2f521d0a5c9c8fe6412ec4502cfef27279087 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Wed, 29 Oct 2014 16:52:39 +0300 Subject: [PATCH 18/29] dbms: fix IPv6NumToString out of bounds access. [#METR-13151] --- dbms/include/DB/Functions/FunctionsCoding.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsCoding.h b/dbms/include/DB/Functions/FunctionsCoding.h index eb7c55b4623..aa8e1bfb69d 100644 --- a/dbms/include/DB/Functions/FunctionsCoding.h +++ b/dbms/include/DB/Functions/FunctionsCoding.h @@ -88,9 +88,9 @@ public: auto begin = reinterpret_cast(&vec_res[0]); auto pos = begin; - for (size_t i = 0; i < vec_in.size(); i += ipv6_fixed_string_length) + for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += ipv6_fixed_string_length, ++i) { - inet_ntop(AF_INET6, &vec_in[i], pos, INET6_ADDRSTRLEN); + inet_ntop(AF_INET6, &vec_in[offset], pos, INET6_ADDRSTRLEN); pos = static_cast(memchr(pos, 0, INET6_ADDRSTRLEN)) + 1; offsets_res[i] = pos - begin; } From d93b251fa4279b7e54d28147d96a4ee23ade36fe Mon Sep 17 00:00:00 2001 From: Pavel Kartavyy Date: Thu, 30 Oct 2014 16:28:45 +0300 Subject: [PATCH 19/29] =?UTF-8?q?=E2=96=88=E2=96=88=E2=96=88=E2=96=88?= =?UTF-8?q?=E2=96=88=E2=96=88=E2=96=88=E2=96=88=E2=96=88=E2=96=88=E2=96=88?= =?UTF-8?q?:=20restart=20if=20unrecoverable=20error=20happened=20[#METR-10?= =?UTF-8?q?969]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/libzkutil/include/zkutil/KeeperException.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/libzkutil/include/zkutil/KeeperException.h b/libs/libzkutil/include/zkutil/KeeperException.h index 4ac9f12c7eb..cc3b097eb6a 100644 --- a/libs/libzkutil/include/zkutil/KeeperException.h +++ b/libs/libzkutil/include/zkutil/KeeperException.h @@ -23,6 +23,11 @@ public: const char * className() const throw() { return "zkutil::KeeperException"; } KeeperException * clone() const { return new KeeperException(*this); } + /// при этих ошибках надо переинициализировать сессию с zookeeper + bool isUnrecoverable() const + { + return code == ZINVALIDSTATE || code == ZSESSIONEXPIRED; + } int32_t code; private: @@ -30,6 +35,7 @@ private: { ProfileEvents::increment(ProfileEvents::ZooKeeperExceptions); } + }; }; From a88c5e74aba29637ab0112b2d07df2e17d1d6abc Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Thu, 30 Oct 2014 17:43:21 +0300 Subject: [PATCH 20/29] dbms: improve performance of IPv6NumToString. [#METR-13151] --- dbms/include/DB/Functions/FunctionsCoding.h | 156 +++++++++++++++++--- 1 file changed, 139 insertions(+), 17 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsCoding.h b/dbms/include/DB/Functions/FunctionsCoding.h index aa8e1bfb69d..26ff0feadde 100644 --- a/dbms/include/DB/Functions/FunctionsCoding.h +++ b/dbms/include/DB/Functions/FunctionsCoding.h @@ -12,7 +12,10 @@ #include #include #include + #include +#include +#include namespace DB @@ -36,7 +39,7 @@ namespace DB /// Включая нулевой символ в конце. #define MAX_UINT_HEX_LENGTH 20 -const auto ipv6_fixed_string_length = 16; +const auto ipv6_string_length = 16; class FunctionIPv6NumToString : public IFunction { @@ -51,15 +54,134 @@ public: ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const auto ptr = typeid_cast(arguments[0].get()); - if (!ptr || ptr->getN() != ipv6_fixed_string_length) + if (!ptr || ptr->getN() != ipv6_string_length) throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName() + - ", expected FixedString(" + toString(ipv6_fixed_string_length) + ")", + ", expected FixedString(" + toString(ipv6_string_length) + ")", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return new DataTypeString; } + /// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base) + static constexpr uint32_t int_log(const uint32_t value, const uint32_t base, const bool carry = false) + { + return value >= base ? 1 + int_log(value / base, base, value % base || carry) : value % base > 1 || carry; + } + + /// mapping of digits up to base 16 + static constexpr auto && digits = "0123456789abcdef"; + + /// print integer in desired base, faster than sprintf + template + static void print_integer(char *& out, T value) + { + if (value == 0) + *out++ = '0'; + else + { + char buf[buffer_size]; + auto ptr = buf; + + while (value > 0) + { + *ptr++ = digits[value % base]; + value /= base; + } + + while (ptr != buf) + *out++ = *--ptr; + } + } + + /// print IPv4 address as %u.%u.%u.%u + static void ipv4_format(const unsigned char * src, char *& out) + { + constexpr auto size = sizeof(UInt32); + + for (const auto i : ext::range(0, size)) + { + print_integer<10, UInt8>(out, src[i]); + + if (i != size - 1) + *out++ = '.'; + } + } + + /** rewritten inet_ntop6 from http://www.cs.cmu.edu/afs/cs/academic/class/15213-f00/unpv12e/libfree/inet_ntop.c + * performs significantly faster than the reference implementation due to the absence of sprintf calls, + * bounds checking, unnecessary string copying and length calculation */ + static const void ipv6_format(const unsigned char * src, char *& out) + { + struct { int base, len; } best{-1}, cur{-1}; + std::array words{}; + + /** Preprocess: + * Copy the input (bytewise) array into a wordwise array. + * Find the longest run of 0x00's in src[] for :: shorthanding. */ + for (const auto i : ext::range(0, ipv6_string_length)) + words[i / 2] |= src[i] << ((1 - (i % 2)) << 3); + + for (const auto i : ext::range(0, words.size())) + { + if (words[i] == 0) { + if (cur.base == -1) + cur.base = i, cur.len = 1; + else + cur.len++; + } + else + { + if (cur.base != -1) + { + if (best.base == -1 || cur.len > best.len) + best = cur; + cur.base = -1; + } + } + } + + if (cur.base != -1) + { + if (best.base == -1 || cur.len > best.len) + best = cur; + } + + if (best.base != -1 && best.len < 2) + best.base = -1; + + /// Format the result. + for (const int i : ext::range(0, words.size())) + { + /// Are we inside the best run of 0x00's? + if (best.base != -1 && i >= best.base && i < (best.base + best.len)) + { + if (i == best.base) + *out++ = ':'; + continue; + } + + /// Are we following an initial run of 0x00s or any real hex? + if (i != 0) + *out++ = ':'; + + /// Is this address an encapsulated IPv4? + if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) + { + ipv4_format(src + 12, out); + break; + } + + print_integer<16>(out, words[i]); + } + + /// Was it a trailing run of 0x00's? + if (best.base != -1 && (best.base + best.len) == words.size()) + *out++ = ':'; + + *out++ = '\0'; + } + void execute(Block & block, const ColumnNumbers & arguments, const size_t result) { const auto & col_name_type = block.getByPosition(arguments[0]); @@ -67,11 +189,11 @@ public: if (const auto col_in = typeid_cast(column.get())) { - if (col_in->getN() != ipv6_fixed_string_length) + if (col_in->getN() != ipv6_string_length) throw Exception("Illegal type " + col_name_type.type->getName() + " of column " + col_in->getName() + " argument of function " + getName() + - ", expected FixedString(" + toString(ipv6_fixed_string_length) + ")", + ", expected FixedString(" + toString(ipv6_string_length) + ")", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); const auto size = col_in->size(); @@ -88,10 +210,9 @@ public: auto begin = reinterpret_cast(&vec_res[0]); auto pos = begin; - for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += ipv6_fixed_string_length, ++i) + for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += ipv6_string_length, ++i) { - inet_ntop(AF_INET6, &vec_in[offset], pos, INET6_ADDRSTRLEN); - pos = static_cast(memchr(pos, 0, INET6_ADDRSTRLEN)) + 1; + ipv6_format(&vec_in[offset], pos); offsets_res[i] = pos - begin; } @@ -100,17 +221,18 @@ public: else if (const auto col_in = typeid_cast *>(column.get())) { const auto data_type_fixed_string = typeid_cast(col_in->getDataType().get()); - if (!data_type_fixed_string || data_type_fixed_string->getN() != ipv6_fixed_string_length) + if (!data_type_fixed_string || data_type_fixed_string->getN() != ipv6_string_length) throw Exception("Illegal type " + col_name_type.type->getName() + " of column " + col_in->getName() + " argument of function " + getName() + - ", expected FixedString(" + toString(ipv6_fixed_string_length) + ")", + ", expected FixedString(" + toString(ipv6_string_length) + ")", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); const auto & data_in = col_in->getData(); char buf[INET6_ADDRSTRLEN]; - inet_ntop(AF_INET6, data_in.data(), buf, sizeof(buf)); + char * dst = buf; + ipv6_format(reinterpret_cast(data_in.data()), dst); block.getByPosition(result).column = new ColumnConstString{col_in->size(), buf}; } @@ -137,7 +259,7 @@ public: throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return new DataTypeFixedString{ipv6_fixed_string_length}; + return new DataTypeFixedString{ipv6_string_length}; } void execute(Block & block, const ColumnNumbers & arguments, size_t result) @@ -146,11 +268,11 @@ public: if (const auto col_in = typeid_cast(&*column)) { - const auto col_res = new ColumnFixedString{ipv6_fixed_string_length}; + const auto col_res = new ColumnFixedString{ipv6_string_length}; block.getByPosition(result).column = col_res; auto & vec_res = col_res->getChars(); - vec_res.resize(col_in->size() * ipv6_fixed_string_length); + vec_res.resize(col_in->size() * ipv6_string_length); const ColumnString::Chars_t & vec_src = col_in->getChars(); const ColumnString::Offsets_t & offsets_src = col_in->getOffsets(); @@ -158,7 +280,7 @@ public: for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); - out_offset += ipv6_fixed_string_length, ++i) + out_offset += ipv6_string_length, ++i) { inet_pton(AF_INET6, reinterpret_cast(&vec_src[src_offset]), &vec_res[out_offset]); src_offset = offsets_src[i]; @@ -166,13 +288,13 @@ public: } else if (const auto col_in = typeid_cast(&*column)) { - String out(ipv6_fixed_string_length, 0); + String out(ipv6_string_length, 0); inet_pton(AF_INET6, col_in->getData().data(), &out[0]); block.getByPosition(result).column = new ColumnConst{ col_in->size(), out, - new DataTypeFixedString{ipv6_fixed_string_length} + new DataTypeFixedString{ipv6_string_length} }; } else From 152ca18a214273c0d03a83ff110943a15dd99826 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 31 Oct 2014 13:50:20 +0300 Subject: [PATCH 21/29] dbms: drastic increase in IPv6StringToNum performance. [#METR-13151] --- dbms/include/DB/Functions/FunctionsCoding.h | 212 +++++++++++++++++--- 1 file changed, 183 insertions(+), 29 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsCoding.h b/dbms/include/DB/Functions/FunctionsCoding.h index 26ff0feadde..800a252aec3 100644 --- a/dbms/include/DB/Functions/FunctionsCoding.h +++ b/dbms/include/DB/Functions/FunctionsCoding.h @@ -39,7 +39,8 @@ namespace DB /// Включая нулевой символ в конце. #define MAX_UINT_HEX_LENGTH 20 -const auto ipv6_string_length = 16; +const auto ipv4_bytes_length = 4; +const auto ipv6_bytes_length = 16; class FunctionIPv6NumToString : public IFunction { @@ -54,10 +55,10 @@ public: ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const auto ptr = typeid_cast(arguments[0].get()); - if (!ptr || ptr->getN() != ipv6_string_length) + if (!ptr || ptr->getN() != ipv6_bytes_length) throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName() + - ", expected FixedString(" + toString(ipv6_string_length) + ")", + ", expected FixedString(" + toString(ipv6_bytes_length) + ")", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return new DataTypeString; @@ -95,31 +96,31 @@ public: } /// print IPv4 address as %u.%u.%u.%u - static void ipv4_format(const unsigned char * src, char *& out) + static void ipv4_format(const unsigned char * src, char *& dst) { constexpr auto size = sizeof(UInt32); for (const auto i : ext::range(0, size)) { - print_integer<10, UInt8>(out, src[i]); + print_integer<10, UInt8>(dst, src[i]); if (i != size - 1) - *out++ = '.'; + *dst++ = '.'; } } - /** rewritten inet_ntop6 from http://www.cs.cmu.edu/afs/cs/academic/class/15213-f00/unpv12e/libfree/inet_ntop.c + /** rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c * performs significantly faster than the reference implementation due to the absence of sprintf calls, * bounds checking, unnecessary string copying and length calculation */ - static const void ipv6_format(const unsigned char * src, char *& out) + static const void ipv6_format(const unsigned char * src, char *& dst) { struct { int base, len; } best{-1}, cur{-1}; - std::array words{}; + std::array words{}; /** Preprocess: * Copy the input (bytewise) array into a wordwise array. * Find the longest run of 0x00's in src[] for :: shorthanding. */ - for (const auto i : ext::range(0, ipv6_string_length)) + for (const auto i : ext::range(0, ipv6_bytes_length)) words[i / 2] |= src[i] << ((1 - (i % 2)) << 3); for (const auto i : ext::range(0, words.size())) @@ -157,29 +158,29 @@ public: if (best.base != -1 && i >= best.base && i < (best.base + best.len)) { if (i == best.base) - *out++ = ':'; + *dst++ = ':'; continue; } /// Are we following an initial run of 0x00s or any real hex? if (i != 0) - *out++ = ':'; + *dst++ = ':'; /// Is this address an encapsulated IPv4? if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) { - ipv4_format(src + 12, out); + ipv4_format(src + 12, dst); break; } - print_integer<16>(out, words[i]); + print_integer<16>(dst, words[i]); } /// Was it a trailing run of 0x00's? if (best.base != -1 && (best.base + best.len) == words.size()) - *out++ = ':'; + *dst++ = ':'; - *out++ = '\0'; + *dst++ = '\0'; } void execute(Block & block, const ColumnNumbers & arguments, const size_t result) @@ -189,11 +190,11 @@ public: if (const auto col_in = typeid_cast(column.get())) { - if (col_in->getN() != ipv6_string_length) + if (col_in->getN() != ipv6_bytes_length) throw Exception("Illegal type " + col_name_type.type->getName() + " of column " + col_in->getName() + " argument of function " + getName() + - ", expected FixedString(" + toString(ipv6_string_length) + ")", + ", expected FixedString(" + toString(ipv6_bytes_length) + ")", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); const auto size = col_in->size(); @@ -210,7 +211,7 @@ public: auto begin = reinterpret_cast(&vec_res[0]); auto pos = begin; - for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += ipv6_string_length, ++i) + for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += ipv6_bytes_length, ++i) { ipv6_format(&vec_in[offset], pos); offsets_res[i] = pos - begin; @@ -221,11 +222,11 @@ public: else if (const auto col_in = typeid_cast *>(column.get())) { const auto data_type_fixed_string = typeid_cast(col_in->getDataType().get()); - if (!data_type_fixed_string || data_type_fixed_string->getN() != ipv6_string_length) + if (!data_type_fixed_string || data_type_fixed_string->getN() != ipv6_bytes_length) throw Exception("Illegal type " + col_name_type.type->getName() + " of column " + col_in->getName() + " argument of function " + getName() + - ", expected FixedString(" + toString(ipv6_string_length) + ")", + ", expected FixedString(" + toString(ipv6_bytes_length) + ")", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); const auto & data_in = col_in->getData(); @@ -259,7 +260,160 @@ public: throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return new DataTypeFixedString{ipv6_string_length}; + return new DataTypeFixedString{ipv6_bytes_length}; + } + + + static bool isDigit(char c) { return c >= '0' && c <= '9'; } + + static bool ipv4_scan(const char * src, unsigned char * dst) + { + constexpr auto size = sizeof(UInt32); + char bytes[size]{}; + + for (const auto i : ext::range(0, size)) + { + UInt32 value = 0; + size_t len = 0; + while (isDigit(*src) && len <= 3) + { + value = value * 10 + (*src - '0'); + ++len; + ++src; + } + + if (len == 0 || value > 255 || (i < size - 1 && *src != '.')) + { + memset(dst, 0, size); + return false; + } + bytes[i] = value; + ++src; + } + + if (src[-1] != '\0') + { + memset(dst, 0, size); + return false; + } + + memcpy(dst, bytes, sizeof(bytes)); + return true; + } + + /// slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c + static void ipv6_scan(const char * src, unsigned char * dst) + { + const auto clear_dst = [dst] { + memset(dst, '\0', ipv6_bytes_length); + }; + + /// Leading :: requires some special handling. + if (*src == ':') + if (*++src != ':') + return clear_dst(); + + /// get integer value for a hexademical char digit, or -1 + const auto number_by_char = [] (const char ch) { + if ('A' <= ch && ch <= 'F') + return 10 + ch - 'A'; + + if ('a' <= ch && ch <= 'f') + return 10 + ch - 'a'; + + if ('0' <= ch && ch <= '9') + return ch - '0'; + + return -1; + }; + + unsigned char tmp[ipv6_bytes_length]{}; + auto tp = tmp; + auto endp = tp + ipv6_bytes_length; + auto curtok = src; + auto saw_xdigit = false; + uint16_t val{}; + unsigned char * colonp = nullptr; + + while (const auto ch = *src++) + { + const auto num = number_by_char(ch); + + if (num != -1) + { + val <<= 4; + val |= num; + if (val > 0xffffu) + return clear_dst(); + + saw_xdigit = 1; + continue; + } + + if (ch == ':') + { + curtok = src; + if (!saw_xdigit) + { + if (colonp) + return clear_dst(); + + colonp = tp; + continue; + } + + if (tp + sizeof(uint16_t) > endp) + return clear_dst(); + + *tp++ = static_cast((val >> 8) & 0xffu); + *tp++ = static_cast(val & 0xffu); + saw_xdigit = false; + val = 0; + continue; + } + + if (ch == '.' && (tp + ipv4_bytes_length) <= endp) + { + if (!ipv4_scan(curtok, tp)) + return clear_dst(); + + tp += ipv4_bytes_length; + saw_xdigit = false; + break; /* '\0' was seen by ipv4_scan(). */ + } + + return clear_dst(); + } + + if (saw_xdigit) + { + if (tp + sizeof(uint16_t) > endp) + return clear_dst(); + + *tp++ = static_cast((val >> 8) & 0xffu); + *tp++ = static_cast(val & 0xffu); + } + + if (colonp) + { + /* + * Since some memmove()'s erroneously fail to handle + * overlapping regions, we'll do the shift by hand. + */ + const auto n = tp - colonp; + + for (int i = 1; i <= n; i++) + { + endp[- i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + + if (tp != endp) + return clear_dst(); + + memcpy(dst, tmp, sizeof(tmp)); } void execute(Block & block, const ColumnNumbers & arguments, size_t result) @@ -268,11 +422,11 @@ public: if (const auto col_in = typeid_cast(&*column)) { - const auto col_res = new ColumnFixedString{ipv6_string_length}; + const auto col_res = new ColumnFixedString{ipv6_bytes_length}; block.getByPosition(result).column = col_res; auto & vec_res = col_res->getChars(); - vec_res.resize(col_in->size() * ipv6_string_length); + vec_res.resize(col_in->size() * ipv6_bytes_length); const ColumnString::Chars_t & vec_src = col_in->getChars(); const ColumnString::Offsets_t & offsets_src = col_in->getOffsets(); @@ -280,21 +434,21 @@ public: for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); - out_offset += ipv6_string_length, ++i) + out_offset += ipv6_bytes_length, ++i) { - inet_pton(AF_INET6, reinterpret_cast(&vec_src[src_offset]), &vec_res[out_offset]); + ipv6_scan(reinterpret_cast(&vec_src[src_offset]), &vec_res[out_offset]); src_offset = offsets_src[i]; } } else if (const auto col_in = typeid_cast(&*column)) { - String out(ipv6_string_length, 0); - inet_pton(AF_INET6, col_in->getData().data(), &out[0]); + String out(ipv6_bytes_length, 0); + ipv6_scan(col_in->getData().data(), reinterpret_cast(&out[0])); block.getByPosition(result).column = new ColumnConst{ col_in->size(), out, - new DataTypeFixedString{ipv6_string_length} + new DataTypeFixedString{ipv6_bytes_length} }; } else From 5465bb3d509b607ab9e8dea64a716b000c8a049d Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 31 Oct 2014 15:03:13 +0300 Subject: [PATCH 22/29] dbms: add tests for IPv4/6 coding functions. [#METR-13151] --- .../0_stateless/00076_ip_coding_functions.reference | 9 +++++++++ .../queries/0_stateless/00076_ip_coding_functions.sql | 10 ++++++++++ 2 files changed, 19 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00076_ip_coding_functions.reference create mode 100644 dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql diff --git a/dbms/tests/queries/0_stateless/00076_ip_coding_functions.reference b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.reference new file mode 100644 index 00000000000..a9b85cf5320 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.reference @@ -0,0 +1,9 @@ +1 +1 +1 1 +1 +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql new file mode 100644 index 00000000000..d139ded3ebb --- /dev/null +++ b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql @@ -0,0 +1,10 @@ +select IPv4StringToNum('') == 0; +select IPv4StringToNum('not an ip string') == 0; +select IPv4StringToNum('127.0.0.1' as p) == (0x7f000001 as n), IPv4NumToString(n) == p; +select IPv4NumToString(toUInt32(0)) == '0.0.0.0'; + +select IPv6NumToString(toFixedString('', 16)) == '::'; +select IPv6NumToString(IPv6StringToNum('::ffff:127.0.0.1' as p) as n) == p; +select IPv6NumToString(toFixedString(unhex('20010DB800000003000001FF0000002E'), 16)) == '2001:db8:0:3:0:1ff:0:2e'; +select IPv6StringToNum('') == toFixedString(materialize(''), 16); +select IPv6StringToNum('not an ip string') == toFixedString(materialize(''), 16); From 70f376b8e68543128cc4a220e039fe4f1f79c191 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 31 Oct 2014 15:46:57 +0300 Subject: [PATCH 23/29] dbms: remove redundant memset(0) from IPv6StringToNum. [#METR-13151] --- dbms/include/DB/Functions/FunctionsCoding.h | 27 +++++++-------------- 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsCoding.h b/dbms/include/DB/Functions/FunctionsCoding.h index 800a252aec3..107d26c7480 100644 --- a/dbms/include/DB/Functions/FunctionsCoding.h +++ b/dbms/include/DB/Functions/FunctionsCoding.h @@ -283,19 +283,14 @@ public: } if (len == 0 || value > 255 || (i < size - 1 && *src != '.')) - { - memset(dst, 0, size); return false; - } + bytes[i] = value; ++src; } if (src[-1] != '\0') - { - memset(dst, 0, size); return false; - } memcpy(dst, bytes, sizeof(bytes)); return true; @@ -304,14 +299,10 @@ public: /// slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c static void ipv6_scan(const char * src, unsigned char * dst) { - const auto clear_dst = [dst] { - memset(dst, '\0', ipv6_bytes_length); - }; - /// Leading :: requires some special handling. if (*src == ':') if (*++src != ':') - return clear_dst(); + return; /// get integer value for a hexademical char digit, or -1 const auto number_by_char = [] (const char ch) { @@ -344,7 +335,7 @@ public: val <<= 4; val |= num; if (val > 0xffffu) - return clear_dst(); + return; saw_xdigit = 1; continue; @@ -356,14 +347,14 @@ public: if (!saw_xdigit) { if (colonp) - return clear_dst(); + return; colonp = tp; continue; } if (tp + sizeof(uint16_t) > endp) - return clear_dst(); + return; *tp++ = static_cast((val >> 8) & 0xffu); *tp++ = static_cast(val & 0xffu); @@ -375,20 +366,20 @@ public: if (ch == '.' && (tp + ipv4_bytes_length) <= endp) { if (!ipv4_scan(curtok, tp)) - return clear_dst(); + return; tp += ipv4_bytes_length; saw_xdigit = false; break; /* '\0' was seen by ipv4_scan(). */ } - return clear_dst(); + return; } if (saw_xdigit) { if (tp + sizeof(uint16_t) > endp) - return clear_dst(); + return; *tp++ = static_cast((val >> 8) & 0xffu); *tp++ = static_cast(val & 0xffu); @@ -411,7 +402,7 @@ public: } if (tp != endp) - return clear_dst(); + return; memcpy(dst, tmp, sizeof(tmp)); } From c62bcdcd181430909f47ed0a844354ec98f72ecc Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 31 Oct 2014 15:57:30 +0300 Subject: [PATCH 24/29] Revert "dbms: remove redundant memset(0) from IPv6StringToNum. [#METR-13151]" This reverts commit 37cc08166507494428c8b858a4670156de356c32. --- dbms/include/DB/Functions/FunctionsCoding.h | 27 ++++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsCoding.h b/dbms/include/DB/Functions/FunctionsCoding.h index 107d26c7480..800a252aec3 100644 --- a/dbms/include/DB/Functions/FunctionsCoding.h +++ b/dbms/include/DB/Functions/FunctionsCoding.h @@ -283,14 +283,19 @@ public: } if (len == 0 || value > 255 || (i < size - 1 && *src != '.')) + { + memset(dst, 0, size); return false; - + } bytes[i] = value; ++src; } if (src[-1] != '\0') + { + memset(dst, 0, size); return false; + } memcpy(dst, bytes, sizeof(bytes)); return true; @@ -299,10 +304,14 @@ public: /// slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c static void ipv6_scan(const char * src, unsigned char * dst) { + const auto clear_dst = [dst] { + memset(dst, '\0', ipv6_bytes_length); + }; + /// Leading :: requires some special handling. if (*src == ':') if (*++src != ':') - return; + return clear_dst(); /// get integer value for a hexademical char digit, or -1 const auto number_by_char = [] (const char ch) { @@ -335,7 +344,7 @@ public: val <<= 4; val |= num; if (val > 0xffffu) - return; + return clear_dst(); saw_xdigit = 1; continue; @@ -347,14 +356,14 @@ public: if (!saw_xdigit) { if (colonp) - return; + return clear_dst(); colonp = tp; continue; } if (tp + sizeof(uint16_t) > endp) - return; + return clear_dst(); *tp++ = static_cast((val >> 8) & 0xffu); *tp++ = static_cast(val & 0xffu); @@ -366,20 +375,20 @@ public: if (ch == '.' && (tp + ipv4_bytes_length) <= endp) { if (!ipv4_scan(curtok, tp)) - return; + return clear_dst(); tp += ipv4_bytes_length; saw_xdigit = false; break; /* '\0' was seen by ipv4_scan(). */ } - return; + return clear_dst(); } if (saw_xdigit) { if (tp + sizeof(uint16_t) > endp) - return; + return clear_dst(); *tp++ = static_cast((val >> 8) & 0xffu); *tp++ = static_cast(val & 0xffu); @@ -402,7 +411,7 @@ public: } if (tp != endp) - return; + return clear_dst(); memcpy(dst, tmp, sizeof(tmp)); } From 11a7b05f484e5d2b1d66d2dc237ff4beddd9a32e Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 31 Oct 2014 15:57:41 +0300 Subject: [PATCH 25/29] dbms: add tests for ip coding functions with non-const arguments. [#METR-13151] --- .../0_stateless/00076_ip_coding_functions.reference | 9 +++++++++ .../queries/0_stateless/00076_ip_coding_functions.sql | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00076_ip_coding_functions.reference b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.reference index a9b85cf5320..788f414fa87 100644 --- a/dbms/tests/queries/0_stateless/00076_ip_coding_functions.reference +++ b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.reference @@ -1,5 +1,8 @@ 1 1 +1 +1 +1 1 1 1 1 1 @@ -7,3 +10,9 @@ 1 1 1 +1 +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql index d139ded3ebb..36bf6b3712b 100644 --- a/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql +++ b/dbms/tests/queries/0_stateless/00076_ip_coding_functions.sql @@ -1,10 +1,19 @@ select IPv4StringToNum('') == 0; +select IPv4StringToNum(materialize('')) == 0; select IPv4StringToNum('not an ip string') == 0; +select IPv4StringToNum(materialize('not an ip string')) == 0; select IPv4StringToNum('127.0.0.1' as p) == (0x7f000001 as n), IPv4NumToString(n) == p; +select IPv4StringToNum(materialize('127.0.0.1') as p) == (materialize(0x7f000001) as n), IPv4NumToString(n) == p; select IPv4NumToString(toUInt32(0)) == '0.0.0.0'; +select IPv4NumToString(materialize(toUInt32(0))) == materialize('0.0.0.0'); select IPv6NumToString(toFixedString('', 16)) == '::'; +select IPv6NumToString(toFixedString(materialize(''), 16)) == materialize('::'); select IPv6NumToString(IPv6StringToNum('::ffff:127.0.0.1' as p) as n) == p; +select IPv6NumToString(IPv6StringToNum(materialize('::ffff:127.0.0.1') as p) as n) == p; select IPv6NumToString(toFixedString(unhex('20010DB800000003000001FF0000002E'), 16)) == '2001:db8:0:3:0:1ff:0:2e'; +select IPv6NumToString(toFixedString(unhex(materialize('20010DB800000003000001FF0000002E')), 16)) == materialize('2001:db8:0:3:0:1ff:0:2e'); select IPv6StringToNum('') == toFixedString(materialize(''), 16); +select IPv6StringToNum(materialize('')) == toFixedString(materialize(''), 16); select IPv6StringToNum('not an ip string') == toFixedString(materialize(''), 16); +select IPv6StringToNum(materialize('not an ip string')) == toFixedString(materialize(''), 16); From 8e801e716329a1d7a1dd785d41dca09160e8f0fc Mon Sep 17 00:00:00 2001 From: Pavel Kartavyy Date: Fri, 31 Oct 2014 15:59:31 +0300 Subject: [PATCH 26/29] zkutil: fixed bug --- libs/libzkutil/src/ZooKeeper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/libzkutil/src/ZooKeeper.cpp b/libs/libzkutil/src/ZooKeeper.cpp index 844d11b4410..cf4e999d3d8 100644 --- a/libs/libzkutil/src/ZooKeeper.cpp +++ b/libs/libzkutil/src/ZooKeeper.cpp @@ -246,7 +246,7 @@ int32_t ZooKeeper::tryCreate(const std::string & path, const std::string & data, int32_t ZooKeeper::tryCreateWithRetries(const std::string & path, const std::string & data, int32_t mode, std::string & pathCreated, size_t* attempt) { - return retry([&path, &data, mode, &pathCreated, this] { return tryCreate(path, data, mode, pathCreated); }); + return retry([&path, &data, mode, &pathCreated, this] { return tryCreate(path, data, mode, pathCreated); }, attempt); } From 23bf89d490e9319cb269ab048eac745fcf819daa Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Fri, 31 Oct 2014 16:48:30 +0300 Subject: [PATCH 27/29] dbms: properly resize const strings passed into toFixedString. --- dbms/include/DB/Functions/FunctionsConversion.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/include/DB/Functions/FunctionsConversion.h b/dbms/include/DB/Functions/FunctionsConversion.h index e15d6eafa31..60145e18029 100644 --- a/dbms/include/DB/Functions/FunctionsConversion.h +++ b/dbms/include/DB/Functions/FunctionsConversion.h @@ -453,7 +453,11 @@ public: if (column_const->getData().size() > n) throw Exception("String too long for type FixedString(" + toString(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE); - block.getByPosition(result).column = new ColumnConst(column_const->size(), column_const->getData(), new DataTypeFixedString(n)); + + auto resized_string = column_const->getData(); + resized_string.resize(n); + + block.getByPosition(result).column = new ColumnConst(column_const->size(), std::move(resized_string), new DataTypeFixedString(n)); } else if(const ColumnString * column_string = typeid_cast(&*column)) { From bc454aec71cab0de7e7d0350243428c98f93fbed Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 1 Nov 2014 01:15:17 +0300 Subject: [PATCH 28/29] dbms: added override to all columns [#METR-2944]. --- .../DB/Columns/ColumnAggregateFunction.h | 40 +++++++------- dbms/include/DB/Columns/ColumnArray.h | 43 ++++++++------- dbms/include/DB/Columns/ColumnConst.h | 50 ++++++++--------- dbms/include/DB/Columns/ColumnExpression.h | 20 +++---- dbms/include/DB/Columns/ColumnFixedString.h | 40 +++++++------- dbms/include/DB/Columns/ColumnNested.h | 41 +++++++------- dbms/include/DB/Columns/ColumnReplicated.h | 6 +-- dbms/include/DB/Columns/ColumnSet.h | 8 +-- dbms/include/DB/Columns/ColumnString.h | 42 +++++++-------- dbms/include/DB/Columns/ColumnTuple.h | 54 +++++++++---------- dbms/include/DB/Columns/ColumnVector.h | 46 ++++++++-------- dbms/include/DB/Columns/IColumnDummy.h | 34 ++++++------ 12 files changed, 211 insertions(+), 213 deletions(-) diff --git a/dbms/include/DB/Columns/ColumnAggregateFunction.h b/dbms/include/DB/Columns/ColumnAggregateFunction.h index 983a20dfdb6..16a6649c651 100644 --- a/dbms/include/DB/Columns/ColumnAggregateFunction.h +++ b/dbms/include/DB/Columns/ColumnAggregateFunction.h @@ -113,21 +113,21 @@ public: return res; } - std::string getName() const { return "ColumnAggregateFunction"; } + std::string getName() const override { return "ColumnAggregateFunction"; } - size_t sizeOfField() const { return sizeof(getData()[0]); } + size_t sizeOfField() const override { return sizeof(getData()[0]); } - size_t size() const + size_t size() const override { return getData().size(); } - ColumnPtr cloneEmpty() const - { + ColumnPtr cloneEmpty() const override + { return new ColumnAggregateFunction(holder->func, Arenas(1, new Arena)); }; - Field operator[](size_t n) const + Field operator[](size_t n) const override { Field field = String(); { @@ -137,7 +137,7 @@ public: return field; } - void get(size_t n, Field & res) const + void get(size_t n, Field & res) const override { res = String(); { @@ -146,17 +146,17 @@ public: } } - StringRef getDataAt(size_t n) const + StringRef getDataAt(size_t n) const override { return StringRef(reinterpret_cast(&getData()[n]), sizeof(getData()[n])); } - void insertData(const char * pos, size_t length) + void insertData(const char * pos, size_t length) override { getData().push_back(*reinterpret_cast(pos)); } - void insertFrom(const IColumn & src, size_t n) + void insertFrom(const IColumn & src, size_t n) override { getData().push_back(static_cast(src).getData()[n]); } @@ -167,7 +167,7 @@ public: holder.get()->func.get()->merge(getData().back(), static_cast(src).getData()[n]); } - void insert(const Field & x) + void insert(const Field & x) override { IAggregateFunction * function = holder.get()->func; @@ -177,17 +177,17 @@ public: function->deserializeMerge(getData().back(), read_buffer); } - void insertDefault() + void insertDefault() override { throw Exception("Method insertDefault is not supported for ColumnAggregateFunction.", ErrorCodes::NOT_IMPLEMENTED); } - size_t byteSize() const + size_t byteSize() const override { return getData().size() * sizeof(getData()[0]); } - ColumnPtr cut(size_t start, size_t length) const + ColumnPtr cut(size_t start, size_t length) const override { if (start + length > getData().size()) throw Exception("Parameters start = " @@ -205,7 +205,7 @@ public: return res; } - ColumnPtr filter(const Filter & filter) const + ColumnPtr filter(const Filter & filter) const override { size_t size = getData().size(); if (size != filter.size()) @@ -225,7 +225,7 @@ public: return res; } - ColumnPtr permute(const Permutation & perm, size_t limit) const + ColumnPtr permute(const Permutation & perm, size_t limit) const override { size_t size = getData().size(); @@ -247,22 +247,22 @@ public: return res; } - ColumnPtr replicate(const Offsets_t & offsets) const + ColumnPtr replicate(const Offsets_t & offsets) const override { throw Exception("Method replicate is not supported for ColumnAggregateFunction.", ErrorCodes::NOT_IMPLEMENTED); } - void getExtremes(Field & min, Field & max) const + void getExtremes(Field & min, Field & max) const override { throw Exception("Method getExtremes is not supported for ColumnAggregateFunction.", ErrorCodes::NOT_IMPLEMENTED); } - int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const + int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { return 0; } - void getPermutation(bool reverse, size_t limit, Permutation & res) const + void getPermutation(bool reverse, size_t limit, Permutation & res) const override { size_t s = getData().size(); res.resize(s); diff --git a/dbms/include/DB/Columns/ColumnArray.h b/dbms/include/DB/Columns/ColumnArray.h index 93d349a5e3f..b71a0efed63 100644 --- a/dbms/include/DB/Columns/ColumnArray.h +++ b/dbms/include/DB/Columns/ColumnArray.h @@ -42,19 +42,19 @@ public: } } - std::string getName() const { return "ColumnArray(" + data->getName() + ")"; } + std::string getName() const override { return "ColumnArray(" + data->getName() + ")"; } - ColumnPtr cloneEmpty() const + ColumnPtr cloneEmpty() const override { return new ColumnArray(data->cloneEmpty()); } - size_t size() const + size_t size() const override { return getOffsets().size(); } - Field operator[](size_t n) const + Field operator[](size_t n) const override { size_t offset = offsetAt(n); size_t size = sizeAt(n); @@ -66,7 +66,7 @@ public: return res; } - void get(size_t n, Field & res) const + void get(size_t n, Field & res) const override { size_t offset = offsetAt(n); size_t size = sizeAt(n); @@ -77,7 +77,7 @@ public: data->get(offset + i, res_arr[i]); } - StringRef getDataAt(size_t n) const + StringRef getDataAt(size_t n) const override { /** Работает для массивов значений фиксированной длины. * Для массивов строк и массивов массивов полученный кусок памяти может не взаимно-однозначно соответствовать элементам. @@ -87,7 +87,7 @@ public: return StringRef(begin.data, end.data - begin.data); } - void insertData(const char * pos, size_t length) + void insertData(const char * pos, size_t length) override { /** Аналогично - только для массивов значений фиксированной длины. */ @@ -98,14 +98,17 @@ public: size_t field_size = data_->sizeOfField(); const char * end = pos + length; - for (; pos + field_size <= end; pos += field_size) + size_t elems = 0; + for (; pos + field_size <= end; pos += field_size, ++elems) data_->insertData(pos, field_size); if (pos != end) throw Exception("Incorrect length argument for method ColumnArray::insertData", ErrorCodes::BAD_ARGUMENTS); + + getOffsets().push_back((getOffsets().size() == 0 ? 0 : getOffsets().back()) + elems); } - ColumnPtr cut(size_t start, size_t length) const + ColumnPtr cut(size_t start, size_t length) const override { if (length == 0) return new ColumnArray(data); @@ -138,7 +141,7 @@ public: return res; } - void insert(const Field & x) + void insert(const Field & x) override { const Array & array = DB::get(x); size_t size = array.size(); @@ -147,7 +150,7 @@ public: getOffsets().push_back((getOffsets().size() == 0 ? 0 : getOffsets().back()) + size); } - void insertFrom(const IColumn & src_, size_t n) + void insertFrom(const IColumn & src_, size_t n) override { const ColumnArray & src = static_cast(src_); size_t size = src.sizeAt(n); @@ -159,12 +162,12 @@ public: getOffsets().push_back((getOffsets().size() == 0 ? 0 : getOffsets().back()) + size); } - void insertDefault() + void insertDefault() override { getOffsets().push_back(getOffsets().size() == 0 ? 0 : getOffsets().back()); } - ColumnPtr filter(const Filter & filt) const + ColumnPtr filter(const Filter & filt) const override { size_t size = getOffsets().size(); if (size != filt.size()) @@ -203,7 +206,7 @@ public: return res; } - ColumnPtr permute(const Permutation & perm, size_t limit) const + ColumnPtr permute(const Permutation & perm, size_t limit) const override { size_t size = getOffsets().size(); @@ -241,7 +244,7 @@ public: return res; } - int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const final + int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { const ColumnArray & rhs = static_cast(rhs_); @@ -276,7 +279,7 @@ public: } }; - void getPermutation(bool reverse, size_t limit, Permutation & res) const + void getPermutation(bool reverse, size_t limit, Permutation & res) const override { size_t s = size(); if (limit >= s) @@ -302,18 +305,18 @@ public: } } - void reserve(size_t n) + void reserve(size_t n) override { getOffsets().reserve(n); getData().reserve(n); /// Средний размер массивов тут никак не учитывается. Или считается, что он не больше единицы. } - size_t byteSize() const + size_t byteSize() const override { return data->byteSize() + getOffsets().size() * sizeof(getOffsets()[0]); } - void getExtremes(Field & min, Field & max) const + void getExtremes(Field & min, Field & max) const override { min = Array(); max = Array(); @@ -350,7 +353,7 @@ public: const ColumnPtr & getOffsetsColumn() const { return offsets; } - ColumnPtr replicate(const Offsets_t & replicate_offsets) const + ColumnPtr replicate(const Offsets_t & replicate_offsets) const override { /// Не получается реализовать в общем случае. diff --git a/dbms/include/DB/Columns/ColumnConst.h b/dbms/include/DB/Columns/ColumnConst.h index c97d4207c9a..4d599448b63 100644 --- a/dbms/include/DB/Columns/ColumnConst.h +++ b/dbms/include/DB/Columns/ColumnConst.h @@ -19,7 +19,7 @@ using Poco::SharedPtr; class IColumnConst : public IColumn { public: - bool isConst() const { return true; } + bool isConst() const override { return true; } virtual ColumnPtr convertToFullColumn() const = 0; }; @@ -37,21 +37,21 @@ public: /// Для ColumnConst data_type_ должен быть ненулевым, если тип данных FixedString. ColumnConst(size_t s_, const T & data_, DataTypePtr data_type_ = DataTypePtr()) : s(s_), data(data_), data_type(data_type_) {} - std::string getName() const { return "ColumnConst<" + TypeName::get() + ">"; } - bool isNumeric() const { return IsNumber::value; } - bool isFixed() const { return IsNumber::value; } - size_t sizeOfField() const { return sizeof(T); } - ColumnPtr cloneResized(size_t s_) const { return new ColumnConst(s_, data); } - size_t size() const { return s; } - Field operator[](size_t n) const { return FieldType(data); } - void get(size_t n, Field & res) const { res = FieldType(data); } + std::string getName() const override { return "ColumnConst<" + TypeName::get() + ">"; } + bool isNumeric() const override { return IsNumber::value; } + bool isFixed() const override { return IsNumber::value; } + size_t sizeOfField() const override { return sizeof(T); } + ColumnPtr cloneResized(size_t s_) const override { return new ColumnConst(s_, data); } + size_t size() const override { return s; } + Field operator[](size_t n) const override { return FieldType(data); } + void get(size_t n, Field & res) const override { res = FieldType(data); } - ColumnPtr cut(size_t start, size_t length) const + ColumnPtr cut(size_t start, size_t length) const override { return new ColumnConst(length, data, data_type); } - void insert(const Field & x) + void insert(const Field & x) override { if (x.get() != FieldType(data)) throw Exception("Cannot insert different element into constant column " + getName(), @@ -59,12 +59,12 @@ public: ++s; } - void insertData(const char * pos, size_t length) + void insertData(const char * pos, size_t length) override { throw Exception("Cannot insert element into constant column " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - void insertFrom(const IColumn & src, size_t n) + void insertFrom(const IColumn & src, size_t n) override { if (data != static_cast &>(src).data) throw Exception("Cannot insert different element into constant column " + getName(), @@ -72,9 +72,9 @@ public: ++s; } - void insertDefault() { ++s; } + void insertDefault() override { ++s; } - ColumnPtr filter(const Filter & filt) const + ColumnPtr filter(const Filter & filt) const override { if (s != filt.size()) throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); @@ -82,7 +82,7 @@ public: return new ColumnConst(countBytesInFilter(filt), data, data_type); } - ColumnPtr replicate(const Offsets_t & offsets) const + ColumnPtr replicate(const Offsets_t & offsets) const override { if (s != offsets.size()) throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); @@ -91,9 +91,9 @@ public: return new ColumnConst(replicated_size, data, data_type); } - size_t byteSize() const { return sizeof(data) + sizeof(s); } + size_t byteSize() const override { return sizeof(data) + sizeof(s); } - ColumnPtr permute(const Permutation & perm, size_t limit) const + ColumnPtr permute(const Permutation & perm, size_t limit) const override { if (limit == 0) limit = s; @@ -106,7 +106,7 @@ public: return new ColumnConst(limit, data, data_type); } - int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const + int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { const ColumnConst & rhs = static_cast &>(rhs_); return data < rhs.data /// TODO: правильное сравнение NaN-ов в константных столбцах. @@ -116,25 +116,25 @@ public: : 1); } - void getPermutation(bool reverse, size_t limit, Permutation & res) const + void getPermutation(bool reverse, size_t limit, Permutation & res) const override { res.resize(s); for (size_t i = 0; i < s; ++i) res[i] = i; } - StringRef getDataAt(size_t n) const; - StringRef getDataAtWithTerminatingZero(size_t n) const; - UInt64 get64(size_t n) const; + StringRef getDataAt(size_t n) const override; + StringRef getDataAtWithTerminatingZero(size_t n) const override; + UInt64 get64(size_t n) const override; /** Более эффективные методы манипуляции */ T & getData() { return data; } const T & getData() const { return data; } /** Преобразование из константы в полноценный столбец */ - ColumnPtr convertToFullColumn() const; + ColumnPtr convertToFullColumn() const override; - void getExtremes(Field & min, Field & max) const + void getExtremes(Field & min, Field & max) const override { min = FieldType(data); max = FieldType(data); diff --git a/dbms/include/DB/Columns/ColumnExpression.h b/dbms/include/DB/Columns/ColumnExpression.h index 535a3af37a7..712229b094a 100644 --- a/dbms/include/DB/Columns/ColumnExpression.h +++ b/dbms/include/DB/Columns/ColumnExpression.h @@ -6,7 +6,7 @@ namespace DB { - + /** Столбец, содержащий лямбда-выражение. * Ведёт себя как столбец-константа. Содержит выражение, но не входные или выходные данные. */ @@ -15,19 +15,19 @@ class ColumnExpression final : public IColumnDummy public: ColumnExpression(size_t s_, ExpressionActionsPtr expression_, const NamesAndTypes & arguments_, DataTypePtr return_type_, std::string return_name_) : IColumnDummy(s_), expression(expression_), arguments(arguments_), return_type(return_type_), return_name(return_name_) {} - + ColumnExpression(size_t s_, ExpressionActionsPtr expression_, const NamesAndTypesList & arguments_, DataTypePtr return_type_, std::string return_name_) : IColumnDummy(s_), expression(expression_), arguments(arguments_.begin(), arguments_.end()), return_type(return_type_), return_name(return_name_) {} - - std::string getName() const { return "ColumnExpression"; } - ColumnPtr cloneDummy(size_t s_) const { return new ColumnExpression(s_, expression, arguments, return_type, return_name); } - + + std::string getName() const override { return "ColumnExpression"; } + ColumnPtr cloneDummy(size_t s_) const override { return new ColumnExpression(s_, expression, arguments, return_type, return_name); } + const ExpressionActionsPtr & getExpression() const { return expression; } const DataTypePtr & getReturnType() const { return return_type; } const std::string & getReturnName() const { return return_name; } - + const NamesAndTypes & getArguments() const { return arguments; } - + Names getArgumentNames() const { Names res(arguments.size()); @@ -35,12 +35,12 @@ public: res[i] = arguments[i].name; return res; } - + private: ExpressionActionsPtr expression; NamesAndTypes arguments; DataTypePtr return_type; std::string return_name; }; - + } diff --git a/dbms/include/DB/Columns/ColumnFixedString.h b/dbms/include/DB/Columns/ColumnFixedString.h index c18af6e21da..d362adee5ef 100644 --- a/dbms/include/DB/Columns/ColumnFixedString.h +++ b/dbms/include/DB/Columns/ColumnFixedString.h @@ -27,49 +27,49 @@ public: /** Создать пустой столбец строк фиксированной длины n */ ColumnFixedString(size_t n_) : n(n_) {} - std::string getName() const { return "ColumnFixedString"; } + std::string getName() const override { return "ColumnFixedString"; } - ColumnPtr cloneEmpty() const + ColumnPtr cloneEmpty() const override { return new ColumnFixedString(n); } - size_t size() const + size_t size() const override { return chars.size() / n; } - size_t sizeOfField() const + size_t sizeOfField() const override { return n; } - bool isFixed() const + bool isFixed() const override { return true; } - size_t byteSize() const + size_t byteSize() const override { return chars.size() + sizeof(n); } - Field operator[](size_t index) const + Field operator[](size_t index) const override { return String(reinterpret_cast(&chars[n * index]), n); } - void get(size_t index, Field & res) const + void get(size_t index, Field & res) const override { res.assignString(reinterpret_cast(&chars[n * index]), n); } - StringRef getDataAt(size_t index) const + StringRef getDataAt(size_t index) const override { return StringRef(&chars[n * index], n); } - void insert(const Field & x) + void insert(const Field & x) override { const String & s = DB::get(x); @@ -81,7 +81,7 @@ public: memcpy(&chars[old_size], s.data(), s.size()); } - void insertFrom(const IColumn & src_, size_t index) + void insertFrom(const IColumn & src_, size_t index) override { const ColumnFixedString & src = static_cast(src_); @@ -93,7 +93,7 @@ public: memcpy(&chars[old_size], &src.chars[n * index], n); } - void insertData(const char * pos, size_t length) + void insertData(const char * pos, size_t length) override { if (length > n) throw Exception("Too large string for FixedString column", ErrorCodes::TOO_LARGE_STRING_SIZE); @@ -103,12 +103,12 @@ public: memcpy(&chars[old_size], pos, length); } - void insertDefault() + void insertDefault() override { chars.resize_fill(chars.size() + n); } - int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int nan_direction_hint) const + int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int nan_direction_hint) const override { const ColumnFixedString & rhs = static_cast(rhs_); return memcmp(&chars[p1 * n], &rhs.chars[p2 * n], n); @@ -127,7 +127,7 @@ public: } }; - void getPermutation(bool reverse, size_t limit, Permutation & res) const + void getPermutation(bool reverse, size_t limit, Permutation & res) const override { size_t s = size(); res.resize(s); @@ -153,7 +153,7 @@ public: } } - ColumnPtr cut(size_t start, size_t length) const + ColumnPtr cut(size_t start, size_t length) const override { ColumnFixedString * res_ = new ColumnFixedString(n); ColumnPtr res = res_; @@ -162,7 +162,7 @@ public: return res; } - ColumnPtr filter(const IColumn::Filter & filt) const + ColumnPtr filter(const IColumn::Filter & filt) const override { size_t col_size = size(); if (col_size != filt.size()) @@ -185,7 +185,7 @@ public: return res; } - ColumnPtr permute(const Permutation & perm, size_t limit) const + ColumnPtr permute(const Permutation & perm, size_t limit) const override { size_t col_size = size(); @@ -214,7 +214,7 @@ public: return res; } - ColumnPtr replicate(const Offsets_t & offsets) const + ColumnPtr replicate(const Offsets_t & offsets) const override { size_t col_size = size(); if (col_size != offsets.size()) @@ -243,7 +243,7 @@ public: return res; } - void getExtremes(Field & min, Field & max) const + void getExtremes(Field & min, Field & max) const override { min = String(); max = String(); diff --git a/dbms/include/DB/Columns/ColumnNested.h b/dbms/include/DB/Columns/ColumnNested.h index 4551150c4fe..401ac076f3f 100644 --- a/dbms/include/DB/Columns/ColumnNested.h +++ b/dbms/include/DB/Columns/ColumnNested.h @@ -47,7 +47,7 @@ public: } } - std::string getName() const + std::string getName() const override { std::string res; { @@ -63,7 +63,7 @@ public: return "ColumnNested(" + res + ")"; } - ColumnPtr cloneEmpty() const + ColumnPtr cloneEmpty() const override { Columns res(data.size()); for (size_t i = 0; i < data.size(); ++i) @@ -71,32 +71,32 @@ public: return new ColumnNested(res); } - size_t size() const + size_t size() const override { return getOffsets().size(); } - Field operator[](size_t n) const + Field operator[](size_t n) const override { throw Exception("Method operator[] is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - void get(size_t n, Field & res) const + void get(size_t n, Field & res) const override { throw Exception("Method get is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - StringRef getDataAt(size_t n) const + StringRef getDataAt(size_t n) const override { throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - void insertData(const char * pos, size_t length) + void insertData(const char * pos, size_t length) override { throw Exception("Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - ColumnPtr cut(size_t start, size_t length) const + ColumnPtr cut(size_t start, size_t length) const override { if (length == 0) return new ColumnNested(data); @@ -131,12 +131,12 @@ public: return res; } - void insert(const Field & x) + void insert(const Field & x) override { throw Exception("Method insert is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - void insertFrom(const IColumn & src_, size_t n) + void insertFrom(const IColumn & src_, size_t n) override { const ColumnNested & src = static_cast(src_); @@ -158,14 +158,14 @@ public: getOffsets().push_back((getOffsets().size() == 0 ? 0 : getOffsets().back()) + size); } - void insertDefault() + void insertDefault() override { for (size_t i = 0; i < data.size(); ++i) data[i]->insertDefault(); getOffsets().push_back(getOffsets().size() == 0 ? 1 : (getOffsets().back() + 1)); } - ColumnPtr filter(const Filter & filt) const + ColumnPtr filter(const Filter & filt) const override { size_t size = getOffsets().size(); if (size != filt.size()) @@ -205,12 +205,12 @@ public: return res; } - ColumnPtr replicate(const Offsets_t & offsets) const + ColumnPtr replicate(const Offsets_t & offsets) const override { throw Exception("Replication of ColumnNested is not implemented.", ErrorCodes::NOT_IMPLEMENTED); } - ColumnPtr permute(const Permutation & perm, size_t limit) const + ColumnPtr permute(const Permutation & perm, size_t limit) const override { size_t size = getOffsets().size(); if (size != perm.size()) @@ -255,24 +255,24 @@ public: return res; } - int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const + int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { throw Exception("Method compareAt is not supported for ColumnNested.", ErrorCodes::NOT_IMPLEMENTED); } - void getPermutation(bool reverse, size_t limit, Permutation & res) const + void getPermutation(bool reverse, size_t limit, Permutation & res) const override { throw Exception("Method getPermutation is not supported for ColumnNested.", ErrorCodes::NOT_IMPLEMENTED); } - void reserve(size_t n) + void reserve(size_t n) override { getOffsets().reserve(n); for (Columns::iterator it = data.begin(); it != data.end(); ++it) (*it)->reserve(n); } - size_t byteSize() const + size_t byteSize() const override { size_t size = getOffsets().size() * sizeof(getOffsets()[0]); for (Columns::const_iterator it = data.begin(); it != data.end(); ++it) @@ -280,7 +280,7 @@ public: return size; } - void getExtremes(Field & min, Field & max) const + void getExtremes(Field & min, Field & max) const override { throw Exception("Method getExtremes is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } @@ -289,9 +289,6 @@ public: Columns & getData() { return data; } const Columns & getData() const { return data; } -// ColumnPtr & getDataPtr() { return data; } -// const ColumnPtr & getDataPtr() const { return data; } - Offsets_t & ALWAYS_INLINE getOffsets() { return static_cast(*offsets.get()).getData(); diff --git a/dbms/include/DB/Columns/ColumnReplicated.h b/dbms/include/DB/Columns/ColumnReplicated.h index 6fb9fc5f2c4..32abf953c67 100644 --- a/dbms/include/DB/Columns/ColumnReplicated.h +++ b/dbms/include/DB/Columns/ColumnReplicated.h @@ -5,7 +5,7 @@ namespace DB { - + /** Содержит промежуточные данные для вычисления выражений в функциях высшего порядка. * Это - вложенный столбец произвольного размера. * Сам ColumnReplicated притворяется, как столбец указанного в конструкторе размера. @@ -14,8 +14,8 @@ class ColumnReplicated final : public IColumnDummy { public: ColumnReplicated(size_t s_, ColumnPtr nested_) : IColumnDummy(s_), nested(nested_) {} - std::string getName() const { return "ColumnReplicated"; } - ColumnPtr cloneDummy(size_t s_) const { return new ColumnReplicated(s_, nested); } + std::string getName() const override { return "ColumnReplicated"; } + ColumnPtr cloneDummy(size_t s_) const override { return new ColumnReplicated(s_, nested); } ColumnPtr & getData() { return nested; } private: diff --git a/dbms/include/DB/Columns/ColumnSet.h b/dbms/include/DB/Columns/ColumnSet.h index 30b31eabe35..22b701815fb 100644 --- a/dbms/include/DB/Columns/ColumnSet.h +++ b/dbms/include/DB/Columns/ColumnSet.h @@ -17,11 +17,11 @@ public: ColumnSet(size_t s_, SetPtr data_) : IColumnDummy(s_), data(data_) {} /// Столбец не константный. Иначе столбец будет использоваться в вычислениях в ExpressionActions::prepare, когда множество из подзапроса ещё не готово. - bool isConst() const { return false; } + bool isConst() const override { return false; } + + std::string getName() const override { return "ColumnSet"; } + ColumnPtr cloneDummy(size_t s_) const override { return new ColumnSet(s_, data); } - std::string getName() const { return "ColumnSet"; } - ColumnPtr cloneDummy(size_t s_) const { return new ColumnSet(s_, data); } - SetPtr & getData() { return data; } const SetPtr & getData() const { return data; } diff --git a/dbms/include/DB/Columns/ColumnString.h b/dbms/include/DB/Columns/ColumnString.h index 990df17baf9..d5ff0e05a78 100644 --- a/dbms/include/DB/Columns/ColumnString.h +++ b/dbms/include/DB/Columns/ColumnString.h @@ -36,44 +36,44 @@ public: /** Создать пустой столбец строк */ ColumnString() {} - std::string getName() const { return "ColumnString"; } + std::string getName() const override { return "ColumnString"; } - size_t size() const + size_t size() const override { return offsets.size(); } - size_t byteSize() const + size_t byteSize() const override { return chars.size() + offsets.size() * sizeof(offsets[0]); } - ColumnPtr cloneEmpty() const + ColumnPtr cloneEmpty() const override { return new ColumnString; } - Field operator[](size_t n) const + Field operator[](size_t n) const override { return Field(&chars[offsetAt(n)], sizeAt(n) - 1); } - void get(size_t n, Field & res) const + void get(size_t n, Field & res) const override { res.assignString(&chars[offsetAt(n)], sizeAt(n) - 1); } - StringRef getDataAt(size_t n) const + StringRef getDataAt(size_t n) const override { return StringRef(&chars[offsetAt(n)], sizeAt(n) - 1); } - StringRef getDataAtWithTerminatingZero(size_t n) const + StringRef getDataAtWithTerminatingZero(size_t n) const override { return StringRef(&chars[offsetAt(n)], sizeAt(n)); } - void insert(const Field & x) + void insert(const Field & x) override { const String & s = DB::get(x); size_t old_size = chars.size(); @@ -84,7 +84,7 @@ public: offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + size_to_append); } - void insertFrom(const IColumn & src_, size_t n) + void insertFrom(const IColumn & src_, size_t n) override { const ColumnString & src = static_cast(src_); size_t old_size = chars.size(); @@ -96,7 +96,7 @@ public: offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + size_to_append); } - void insertData(const char * pos, size_t length) + void insertData(const char * pos, size_t length) override { size_t old_size = chars.size(); @@ -106,7 +106,7 @@ public: offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + length + 1); } - void insertDataWithTerminatingZero(const char * pos, size_t length) + void insertDataWithTerminatingZero(const char * pos, size_t length) override { size_t old_size = chars.size(); @@ -115,7 +115,7 @@ public: offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + length); } - ColumnPtr cut(size_t start, size_t length) const + ColumnPtr cut(size_t start, size_t length) const override { if (length == 0) return new ColumnString; @@ -150,7 +150,7 @@ public: return res; } - ColumnPtr filter(const Filter & filt) const + ColumnPtr filter(const Filter & filt) const override { const size_t size = offsets.size(); if (size != filt.size()) @@ -257,7 +257,7 @@ public: return res_; } - ColumnPtr permute(const Permutation & perm, size_t limit) const + ColumnPtr permute(const Permutation & perm, size_t limit) const override { size_t size = offsets.size(); @@ -300,13 +300,13 @@ public: return res; } - void insertDefault() + void insertDefault() override { chars.push_back(0); offsets.push_back(offsets.size() == 0 ? 1 : (offsets.back() + 1)); } - int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const + int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { const ColumnString & rhs = static_cast(rhs_); @@ -344,7 +344,7 @@ public: } }; - void getPermutation(bool reverse, size_t limit, Permutation & res) const + void getPermutation(bool reverse, size_t limit, Permutation & res) const override { size_t s = offsets.size(); res.resize(s); @@ -415,7 +415,7 @@ public: } } - ColumnPtr replicate(const Offsets_t & replicate_offsets) const + ColumnPtr replicate(const Offsets_t & replicate_offsets) const override { size_t col_size = size(); if (col_size != replicate_offsets.size()) @@ -457,13 +457,13 @@ public: return res; } - void reserve(size_t n) + void reserve(size_t n) override { offsets.reserve(n); chars.reserve(n * DBMS_APPROX_STRING_SIZE); } - void getExtremes(Field & min, Field & max) const + void getExtremes(Field & min, Field & max) const override { min = String(); max = String(); diff --git a/dbms/include/DB/Columns/ColumnTuple.h b/dbms/include/DB/Columns/ColumnTuple.h index 911fd7b695d..4a85f036180 100644 --- a/dbms/include/DB/Columns/ColumnTuple.h +++ b/dbms/include/DB/Columns/ColumnTuple.h @@ -25,22 +25,20 @@ public: for (size_t i = 0; i < size; ++i) columns[i] = data.getByPosition(i).column; } - - std::string getName() const { return "Tuple"; } - - SharedPtr cloneEmpty() const + + std::string getName() const override { return "Tuple"; } + + SharedPtr cloneEmpty() const override { return new ColumnTuple(data.cloneEmpty()); } - size_t size() const + size_t size() const override { return data.rows(); } - bool empty() const { return size() == 0; } - - Field operator[](size_t n) const + Field operator[](size_t n) const override { Array res; @@ -50,7 +48,7 @@ public: return res; } - void get(size_t n, Field & res) const + void get(size_t n, Field & res) const override { size_t size = columns.size(); res = Array(size); @@ -59,17 +57,17 @@ public: columns[i]->get(n, res_arr[i]); } - StringRef getDataAt(size_t n) const + StringRef getDataAt(size_t n) const override { throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - void insertData(const char * pos, size_t length) + void insertData(const char * pos, size_t length) override { throw Exception("Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - void insert(const Field & x) + void insert(const Field & x) override { const Array & arr = DB::get(x); @@ -81,36 +79,36 @@ public: columns[i]->insert(arr[i]); } - void insertFrom(const IColumn & src_, size_t n) + void insertFrom(const IColumn & src_, size_t n) override { const ColumnTuple & src = static_cast(src_); - + size_t size = columns.size(); if (src.columns.size() != size) throw Exception("Cannot insert value of different size into tuple", ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE); - + for (size_t i = 0; i < size; ++i) columns[i]->insertFrom(*src.columns[i], n); } - void insertDefault() + void insertDefault() override { for (Columns::iterator it = columns.begin(); it != columns.end(); ++it) (*it)->insertDefault(); } - ColumnPtr cut(size_t start, size_t length) const + ColumnPtr cut(size_t start, size_t length) const override { Block res_block = data.cloneEmpty(); - + for (size_t i = 0; i < columns.size(); ++i) res_block.getByPosition(i).column = data.getByPosition(i).column->cut(start, length); return new ColumnTuple(res_block); } - ColumnPtr filter(const Filter & filt) const + ColumnPtr filter(const Filter & filt) const override { Block res_block = data.cloneEmpty(); @@ -120,7 +118,7 @@ public: return new ColumnTuple(res_block); } - ColumnPtr permute(const Permutation & perm, size_t limit) const + ColumnPtr permute(const Permutation & perm, size_t limit) const override { Block res_block = data.cloneEmpty(); @@ -129,8 +127,8 @@ public: return new ColumnTuple(res_block); } - - ColumnPtr replicate(const Offsets_t & offsets) const + + ColumnPtr replicate(const Offsets_t & offsets) const override { Block res_block = data.cloneEmpty(); @@ -140,13 +138,13 @@ public: return new ColumnTuple(res_block); } - int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const + int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override { size_t size = columns.size(); for (size_t i = 0; i < size; ++i) if (int res = columns[i]->compareAt(n, m, *static_cast(rhs).columns[i], nan_direction_hint)) return res; - + return 0; } @@ -175,7 +173,7 @@ public: } }; - void getPermutation(bool reverse, size_t limit, Permutation & res) const + void getPermutation(bool reverse, size_t limit, Permutation & res) const override { size_t rows = size(); res.resize(rows); @@ -201,13 +199,13 @@ public: } } - void reserve(size_t n) + void reserve(size_t n) override { for (Columns::iterator it = columns.begin(); it != columns.end(); ++it) (*it)->reserve(n); } - size_t byteSize() const + size_t byteSize() const override { size_t res = 0; for (Columns::const_iterator it = columns.begin(); it != columns.end(); ++it) @@ -215,7 +213,7 @@ public: return res; } - void getExtremes(Field & min, Field & max) const + void getExtremes(Field & min, Field & max) const override { throw Exception("Method getExtremes is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } diff --git a/dbms/include/DB/Columns/ColumnVector.h b/dbms/include/DB/Columns/ColumnVector.h index a3299d1da4e..bccc6ab831b 100644 --- a/dbms/include/DB/Columns/ColumnVector.h +++ b/dbms/include/DB/Columns/ColumnVector.h @@ -89,42 +89,42 @@ public: ColumnVector(const size_t n) : data{n} {} ColumnVector(const size_t n, const value_type x) : data{n, x} {} - bool isNumeric() const { return IsNumber::value; } - bool isFixed() const { return IsNumber::value; } + bool isNumeric() const override { return IsNumber::value; } + bool isFixed() const override { return IsNumber::value; } - size_t sizeOfField() const { return sizeof(T); } + size_t sizeOfField() const override { return sizeof(T); } - size_t size() const + size_t size() const override { return data.size(); } - StringRef getDataAt(size_t n) const + StringRef getDataAt(size_t n) const override { return StringRef(reinterpret_cast(&data[n]), sizeof(data[n])); } - void insertFrom(const IColumn & src, size_t n) + void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } - void insertData(const char * pos, size_t length) + void insertData(const char * pos, size_t length) override { data.push_back(*reinterpret_cast(pos)); } - void insertDefault() + void insertDefault() override { data.push_back(T()); } - size_t byteSize() const + size_t byteSize() const override { return data.size() * sizeof(data[0]); } - int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const + int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { return CompareHelper::compare(data[n], static_cast(rhs_).data[m], nan_direction_hint); } @@ -143,7 +143,7 @@ public: bool operator()(size_t lhs, size_t rhs) const { return CompareHelper::greater(parent.data[lhs], parent.data[rhs]); } }; - void getPermutation(bool reverse, size_t limit, Permutation & res) const + void getPermutation(bool reverse, size_t limit, Permutation & res) const override { size_t s = data.size(); res.resize(s); @@ -169,36 +169,36 @@ public: } } - void reserve(size_t n) + void reserve(size_t n) override { data.reserve(n); } - std::string getName() const { return "ColumnVector<" + TypeName::get() + ">"; } + std::string getName() const override { return "ColumnVector<" + TypeName::get() + ">"; } - ColumnPtr cloneEmpty() const + ColumnPtr cloneEmpty() const override { return new ColumnVector; } - Field operator[](size_t n) const + Field operator[](size_t n) const override { return typename NearestFieldType::Type(data[n]); } - void get(size_t n, Field & res) const + void get(size_t n, Field & res) const override { res = typename NearestFieldType::Type(data[n]); } - UInt64 get64(size_t n) const; + UInt64 get64(size_t n) const override; - void insert(const Field & x) + void insert(const Field & x) override { data.push_back(DB::get::Type>(x)); } - ColumnPtr cut(size_t start, size_t length) const + ColumnPtr cut(size_t start, size_t length) const override { if (start + length > data.size()) throw Exception("Parameters start = " @@ -212,7 +212,7 @@ public: return res; } - ColumnPtr filter(const IColumn::Filter & filt) const + ColumnPtr filter(const IColumn::Filter & filt) const override { size_t size = data.size(); if (size != filt.size()) @@ -270,7 +270,7 @@ public: return res; } - ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const + ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override { size_t size = data.size(); @@ -291,7 +291,7 @@ public: return res; } - ColumnPtr replicate(const IColumn::Offsets_t & offsets) const + ColumnPtr replicate(const IColumn::Offsets_t & offsets) const override { size_t size = data.size(); if (size != offsets.size()) @@ -318,7 +318,7 @@ public: return res; } - void getExtremes(Field & min, Field & max) const + void getExtremes(Field & min, Field & max) const override { size_t size = data.size(); diff --git a/dbms/include/DB/Columns/IColumnDummy.h b/dbms/include/DB/Columns/IColumnDummy.h index 1ab49024fc8..ab12d3e11d0 100644 --- a/dbms/include/DB/Columns/IColumnDummy.h +++ b/dbms/include/DB/Columns/IColumnDummy.h @@ -17,30 +17,30 @@ public: virtual ColumnPtr cloneDummy(size_t s_) const = 0; - ColumnPtr cloneResized(size_t s_) const { return cloneDummy(s_); } - bool isConst() const { return true; } - size_t size() const { return s; } - void insertDefault() { ++s; } - size_t byteSize() const { return 0; } - int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const { return 0; } + ColumnPtr cloneResized(size_t s_) const override { return cloneDummy(s_); } + bool isConst() const override { return true; } + size_t size() const override { return s; } + void insertDefault() override { ++s; } + size_t byteSize() const override { return 0; } + int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { return 0; } - Field operator[](size_t n) const { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - void get(size_t n, Field & res) const { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); }; - void insert(const Field & x) { throw Exception("Cannot insert element into " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - StringRef getDataAt(size_t n) const { throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - void insertData(const char * pos, size_t length) { throw Exception("Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + Field operator[](size_t n) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + void get(size_t n, Field & res) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); }; + void insert(const Field & x) override { throw Exception("Cannot insert element into " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + StringRef getDataAt(size_t n) const override { throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + void insertData(const char * pos, size_t length) override { throw Exception("Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - void getExtremes(Field & min, Field & max) const + void getExtremes(Field & min, Field & max) const override { throw Exception("Method getExtremes is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - ColumnPtr cut(size_t start, size_t length) const + ColumnPtr cut(size_t start, size_t length) const override { return cloneDummy(length); } - ColumnPtr filter(const Filter & filt) const + ColumnPtr filter(const Filter & filt) const override { size_t new_size = 0; for (Filter::const_iterator it = filt.begin(); it != filt.end(); ++it) @@ -50,7 +50,7 @@ public: return cloneDummy(new_size); } - ColumnPtr permute(const Permutation & perm, size_t limit) const + ColumnPtr permute(const Permutation & perm, size_t limit) const override { if (s != perm.size()) throw Exception("Size of permutation doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); @@ -58,14 +58,14 @@ public: return cloneDummy(limit ? std::min(s, limit) : s); } - void getPermutation(bool reverse, size_t limit, Permutation & res) const + void getPermutation(bool reverse, size_t limit, Permutation & res) const override { res.resize(s); for (size_t i = 0; i < s; ++i) res[i] = i; } - ColumnPtr replicate(const Offsets_t & offsets) const + ColumnPtr replicate(const Offsets_t & offsets) const override { if (s != offsets.size()) throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); From ec08b393dec6f513f13bcc8654055dea7069dc8b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 1 Nov 2014 02:20:18 +0300 Subject: [PATCH 29/29] dbms: tiny improvement [#METR-2944]. --- dbms/include/DB/Interpreters/Aggregator.h | 39 +++++++++++++++++++++-- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 60498edb4c7..7530c74c1cb 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -118,14 +118,14 @@ struct AggregationMethodOneNumber Data data; - const ColumnVector * column; + const FieldType * column; /** Вызывается в начале обработки каждого блока. * Устанавливает переменные, необходимые для остальных методов, вызываемых во внутренних циклах. */ void init(ConstColumnPlainPtrs & key_columns) { - column = static_cast *>(key_columns[0]); + column = &static_cast *>(key_columns[0])->getData()[0]; } /// Достать из ключевых столбцов ключ для вставки в хэш-таблицу. @@ -136,7 +136,7 @@ struct AggregationMethodOneNumber const Sizes & key_sizes, /// Если ключи фиксированной длины - их длины. Не используется в методах агрегации по ключам переменной длины. StringRefs & keys) const /// Сюда могут быть записаны ссылки на данные ключей в столбцах. Они могут быть использованы в дальнейшем. { - return column->get64(i); + return get64(column[i]); } /// Из значения в хэш-таблице получить AggregateDataPtr. @@ -155,8 +155,41 @@ struct AggregationMethodOneNumber { static_cast *>(key_columns[0])->insertData(reinterpret_cast(&it->first), sizeof(it->first)); } + +private: + UInt64 get64(FieldType x) const + { + return x; + } }; +template <> +inline UInt64 AggregationMethodOneNumber::get64(Float64 x) const +{ + union + { + Float64 src; + UInt64 res; + }; + + src = x; + return res; +} + +template <> +inline UInt64 AggregationMethodOneNumber::get64(Float32 x) const +{ + union + { + Float32 src; + UInt64 res; + }; + + res = 0; + src = x; + return res; +} + /// Для случая, когда есть один строковый ключ. struct AggregationMethodString