From fd957f332e0fa0764aa782c3fe4065f2a149d137 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 12 Jul 2021 10:06:24 +0000 Subject: [PATCH 01/10] Add column sizes for Log tables --- src/Storages/StorageLog.cpp | 18 ++++++++++++++++++ src/Storages/StorageLog.h | 1 + src/Storages/System/StorageSystemColumns.cpp | 2 +- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 61fbbbc3086..87168148faa 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -719,6 +719,24 @@ CheckResults StorageLog::checkData(const ASTPtr & /* query */, ContextPtr contex } +IStorage::ColumnSizeByName StorageLog::getColumnSizes() const +{ + std::shared_lock lock(rwlock); + ColumnSizeByName column_sizes; + for (const auto & it : files) + { + const String & name = column_names_by_idx[it.second.column_index]; + if (!it.second.marks.empty()) + { + ColumnSize & size = column_sizes[name]; + size.data_compressed += it.second.marks.back().offset; + size.marks += it.second.marks.size() * sizeof(Mark); + } + + } + return column_sizes; +} + void registerStorageLog(StorageFactory & factory) { StorageFactory::StorageFeatures features{ diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 6fea00edefd..7002dc7c6cb 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -45,6 +45,7 @@ public: bool storesDataOnDisk() const override { return true; } Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } bool supportsSubcolumns() const override { return true; } + ColumnSizeByName getColumnSizes() const override; protected: /** Attach the table with the appropriate name, along the appropriate path (with / at the end), diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 8f65147bb11..0058b58f537 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -98,7 +98,7 @@ protected: Names cols_required_for_sorting_key; Names cols_required_for_primary_key; Names cols_required_for_sampling; - MergeTreeData::ColumnSizeByName column_sizes; + IStorage::ColumnSizeByName column_sizes; { StoragePtr storage = storages.at(std::make_pair(database_name, table_name)); From dc18f68b7236fa245215b0bbf1540b32ee58419f Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 12 Jul 2021 10:21:17 +0000 Subject: [PATCH 02/10] Minor fix --- src/Storages/StorageLog.cpp | 2 +- src/Storages/StorageLog.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 87168148faa..7c99c28f4e3 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -721,7 +721,7 @@ CheckResults StorageLog::checkData(const ASTPtr & /* query */, ContextPtr contex IStorage::ColumnSizeByName StorageLog::getColumnSizes() const { - std::shared_lock lock(rwlock); + std::shared_lock lock(rwlock, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC); ColumnSizeByName column_sizes; for (const auto & it : files) { diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 7002dc7c6cb..799bad26c7c 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -88,7 +88,7 @@ private: DiskPtr disk; String table_path; - std::shared_timed_mutex rwlock; + mutable std::shared_timed_mutex rwlock; Files files; From 7a8361c562a928dc288f8b957b77739ac5cc8aed Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 12 Jul 2021 10:51:05 +0000 Subject: [PATCH 03/10] Add columns size for Tiny Log --- src/Storages/StorageTinyLog.cpp | 23 +++++++++++++++++++++++ src/Storages/StorageTinyLog.h | 1 + 2 files changed, 24 insertions(+) diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 689b1307f4d..cbec3f964b8 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -523,6 +523,29 @@ CheckResults StorageTinyLog::checkData(const ASTPtr & /* query */, ContextPtr co return file_checker.check(); } +IStorage::ColumnSizeByName StorageTinyLog::getColumnSizes() const +{ + std::shared_lock lock(rwlock, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC); + ColumnSizeByName column_sizes; + FileChecker::Map file_sizes = file_checker.getFileSizes(); + + for (const auto & column : getInMemoryMetadata().getColumns().getAllPhysical()) + { + ISerialization::StreamCallback stream_callback = [&] (const ISerialization::SubstreamPath & substream_path) + { + String stream_name = ISerialization::getFileNameForStream(column, substream_path); + ColumnSize & size = column_sizes[column.name]; + size.data_compressed += file_sizes[stream_name]; + }; + + ISerialization::SubstreamPath substream_path; + auto serialization = type->getDefaultSerialization(); + serialization->enumerateStreams(stream_callback, substream_path); + } + + return column_sizes; +} + void StorageTinyLog::truncate( const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) { diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index 71763a6403e..0a8814450d4 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -45,6 +45,7 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) override; + ColumnSizeByName getColumnSizes() const override; protected: StorageTinyLog( DiskPtr disk_, From fb661bc38d954c37c5d0ed39bf61b02a28a8d02b Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 12 Jul 2021 10:58:53 +0000 Subject: [PATCH 04/10] Minor fixes --- src/Storages/StorageLog.cpp | 24 +++++++++++++++--------- src/Storages/StorageTinyLog.cpp | 5 +++-- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 7c99c28f4e3..080ab5edaee 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -28,6 +28,7 @@ #include #include +#include #define DBMS_STORAGE_LOG_DATA_FILE_EXTENSION ".bin" @@ -721,19 +722,24 @@ CheckResults StorageLog::checkData(const ASTPtr & /* query */, ContextPtr contex IStorage::ColumnSizeByName StorageLog::getColumnSizes() const { - std::shared_lock lock(rwlock, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC); + std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); ColumnSizeByName column_sizes; - for (const auto & it : files) + FileChecker::Map file_sizes = file_checker.getFileSizes(); + + for (const auto & column : getInMemoryMetadata().getColumns().getAllPhysical()) { - const String & name = column_names_by_idx[it.second.column_index]; - if (!it.second.marks.empty()) + ISerialization::StreamCallback stream_callback = [&] (const ISerialization::SubstreamPath & substream_path) { - ColumnSize & size = column_sizes[name]; - size.data_compressed += it.second.marks.back().offset; - size.marks += it.second.marks.size() * sizeof(Mark); - } - + String stream_name = ISerialization::getFileNameForStream(column, substream_path); + ColumnSize & size = column_sizes[column.name]; + size.data_compressed += file_sizes[stream_name]; + }; + + ISerialization::SubstreamPath substream_path; + auto serialization = column.type->getDefaultSerialization(); + serialization->enumerateStreams(stream_callback, substream_path); } + return column_sizes; } diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index cbec3f964b8..8ebc63fdc81 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -4,6 +4,7 @@ #include #include +#include #include @@ -525,7 +526,7 @@ CheckResults StorageTinyLog::checkData(const ASTPtr & /* query */, ContextPtr co IStorage::ColumnSizeByName StorageTinyLog::getColumnSizes() const { - std::shared_lock lock(rwlock, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC); + std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); ColumnSizeByName column_sizes; FileChecker::Map file_sizes = file_checker.getFileSizes(); @@ -539,7 +540,7 @@ IStorage::ColumnSizeByName StorageTinyLog::getColumnSizes() const }; ISerialization::SubstreamPath substream_path; - auto serialization = type->getDefaultSerialization(); + auto serialization = column.type->getDefaultSerialization(); serialization->enumerateStreams(stream_callback, substream_path); } From ab3ab9fa41f803aa066a19da684a7ca16bcd5d8c Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 12 Jul 2021 11:00:26 +0000 Subject: [PATCH 05/10] Fix mutable --- src/Storages/StorageTinyLog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index 0a8814450d4..849b0731a47 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -72,7 +72,7 @@ private: Files files; FileChecker file_checker; - std::shared_timed_mutex rwlock; + mutable std::shared_timed_mutex rwlock; Poco::Logger * log; From 8bdd1e93ae75f72eba58f50a56bc5b9cd72eb013 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 12 Jul 2021 13:40:22 +0000 Subject: [PATCH 06/10] Fix --- src/Storages/StorageLog.cpp | 8 +++++--- src/Storages/StorageTinyLog.cpp | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 080ab5edaee..874485dd3ca 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -725,14 +725,16 @@ IStorage::ColumnSizeByName StorageLog::getColumnSizes() const std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); ColumnSizeByName column_sizes; FileChecker::Map file_sizes = file_checker.getFileSizes(); - + for (const auto & column : getInMemoryMetadata().getColumns().getAllPhysical()) { - ISerialization::StreamCallback stream_callback = [&] (const ISerialization::SubstreamPath & substream_path) + ISerialization::StreamCallback stream_callback = [&, this] (const ISerialization::SubstreamPath & substream_path) { String stream_name = ISerialization::getFileNameForStream(column, substream_path); ColumnSize & size = column_sizes[column.name]; - size.data_compressed += file_sizes[stream_name]; + auto it = files.find(stream_name); + if (it != files.end()) + size.data_compressed += file_sizes[fileName(it->second.data_file_path)]; }; ISerialization::SubstreamPath substream_path; diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 8ebc63fdc81..97716063664 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -529,14 +529,16 @@ IStorage::ColumnSizeByName StorageTinyLog::getColumnSizes() const std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); ColumnSizeByName column_sizes; FileChecker::Map file_sizes = file_checker.getFileSizes(); - + for (const auto & column : getInMemoryMetadata().getColumns().getAllPhysical()) { - ISerialization::StreamCallback stream_callback = [&] (const ISerialization::SubstreamPath & substream_path) + ISerialization::StreamCallback stream_callback = [&, this] (const ISerialization::SubstreamPath & substream_path) { String stream_name = ISerialization::getFileNameForStream(column, substream_path); ColumnSize & size = column_sizes[column.name]; - size.data_compressed += file_sizes[stream_name]; + auto it = files.find(stream_name); + if (it != files.end()) + size.data_compressed += file_sizes[fileName(it->second.data_file_path)]; }; ISerialization::SubstreamPath substream_path; From f0ddbb1ca5f81726df1a3ceaabc002fc419b93af Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 12 Jul 2021 13:41:10 +0000 Subject: [PATCH 07/10] Add functional test --- .../0_stateless/01943_log_column_sizes.reference | 6 ++++++ .../queries/0_stateless/01943_log_column_sizes.sql | 14 ++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tests/queries/0_stateless/01943_log_column_sizes.reference create mode 100644 tests/queries/0_stateless/01943_log_column_sizes.sql diff --git a/tests/queries/0_stateless/01943_log_column_sizes.reference b/tests/queries/0_stateless/01943_log_column_sizes.reference new file mode 100644 index 00000000000..91ae12e38ce --- /dev/null +++ b/tests/queries/0_stateless/01943_log_column_sizes.reference @@ -0,0 +1,6 @@ +27 +33 +105 +27 +33 +105 diff --git a/tests/queries/0_stateless/01943_log_column_sizes.sql b/tests/queries/0_stateless/01943_log_column_sizes.sql new file mode 100644 index 00000000000..f3fceafb727 --- /dev/null +++ b/tests/queries/0_stateless/01943_log_column_sizes.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS test_log; +DROP TABLE IF EXISTS test_tiny_log; + +CREATE TABLE test_log (x UInt8, s String, a Array(Nullable(String))) ENGINE = Log; +CREATE TABLE test_tiny_log (x UInt8, s String, a Array(Nullable(String))) ENGINE = TinyLog; + +INSERT INTO test_log VALUES (64, 'Value1', ['Value2', 'Value3', NULL]); +INSERT INTO test_tiny_log VALUES (64, 'Value1', ['Value2', 'Value3', NULL]); + +SELECT data_compressed_bytes FROM system.columns WHERE table = 'test_log'; +SELECT data_compressed_bytes FROM system.columns WHERE table = 'test_tiny_log'; + +DROP TABLE test_log; +DROP TABLE test_tiny_log; \ No newline at end of file From e0ae04297a860cd6ab5e43ebedff1e54baebf096 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Jul 2021 18:19:49 +0300 Subject: [PATCH 08/10] Fix fast test --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d820cbbae45..bd9d7516f0f 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -399,7 +399,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( view = nullptr; } - if (try_move_to_prewhere && storage && query.where() && !query.prewhere()) + if (try_move_to_prewhere && storage && storage->supportsPrewhere() && query.where() && !query.prewhere()) { /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty()) From 8d2da5933c6bdca8bdabe138eed4197803e9f160 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 12 Jul 2021 19:04:53 +0000 Subject: [PATCH 09/10] Minor fixes --- src/Storages/StorageLog.cpp | 3 +++ src/Storages/StorageTinyLog.cpp | 3 +++ tests/queries/0_stateless/01943_log_column_sizes.sql | 4 ++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 874485dd3ca..b2072180dfd 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -723,6 +723,9 @@ CheckResults StorageLog::checkData(const ASTPtr & /* query */, ContextPtr contex IStorage::ColumnSizeByName StorageLog::getColumnSizes() const { std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + ColumnSizeByName column_sizes; FileChecker::Map file_sizes = file_checker.getFileSizes(); diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 97716063664..26721037058 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -527,6 +527,9 @@ CheckResults StorageTinyLog::checkData(const ASTPtr & /* query */, ContextPtr co IStorage::ColumnSizeByName StorageTinyLog::getColumnSizes() const { std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + ColumnSizeByName column_sizes; FileChecker::Map file_sizes = file_checker.getFileSizes(); diff --git a/tests/queries/0_stateless/01943_log_column_sizes.sql b/tests/queries/0_stateless/01943_log_column_sizes.sql index f3fceafb727..c6cd48c33d9 100644 --- a/tests/queries/0_stateless/01943_log_column_sizes.sql +++ b/tests/queries/0_stateless/01943_log_column_sizes.sql @@ -7,8 +7,8 @@ CREATE TABLE test_tiny_log (x UInt8, s String, a Array(Nullable(String))) ENGINE INSERT INTO test_log VALUES (64, 'Value1', ['Value2', 'Value3', NULL]); INSERT INTO test_tiny_log VALUES (64, 'Value1', ['Value2', 'Value3', NULL]); -SELECT data_compressed_bytes FROM system.columns WHERE table = 'test_log'; -SELECT data_compressed_bytes FROM system.columns WHERE table = 'test_tiny_log'; +SELECT data_compressed_bytes FROM system.columns WHERE table = 'test_log' AND database = currentDatabase(); +SELECT data_compressed_bytes FROM system.columns WHERE table = 'test_tiny_log' AND database = currentDatabase(); DROP TABLE test_log; DROP TABLE test_tiny_log; \ No newline at end of file From 75be358250c1fc18f1efe48e7bd034a7bf43456f Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 12 Jul 2021 19:47:13 +0000 Subject: [PATCH 10/10] Fix style --- src/Storages/StorageLog.cpp | 2 +- src/Storages/StorageTinyLog.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index b2072180dfd..b43cb6d71a0 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -725,7 +725,7 @@ IStorage::ColumnSizeByName StorageLog::getColumnSizes() const std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); if (!lock) throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); - + ColumnSizeByName column_sizes; FileChecker::Map file_sizes = file_checker.getFileSizes(); diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 26721037058..342101d91cc 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -529,7 +529,7 @@ IStorage::ColumnSizeByName StorageTinyLog::getColumnSizes() const std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); if (!lock) throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); - + ColumnSizeByName column_sizes; FileChecker::Map file_sizes = file_checker.getFileSizes();