From 8200bab8592c2b8ea9b5922c5168d805f5304346 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 13 Oct 2020 17:54:52 +0300 Subject: [PATCH 01/37] Add setting do_not_merge_across_partitions --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 288 ++++++++++-------- src/Storages/MergeTree/MergeTreeSettings.h | 2 + 2 files changed, 167 insertions(+), 123 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 8c1dc845d26..dc210d7bc33 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1237,144 +1237,186 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (sum_marks > max_marks_to_use_cache) use_uncompressed_cache = false; - Pipe pipe; - - { - Pipes pipes; - - for (const auto & part : parts) - { - auto source_processor = std::make_shared( - data, metadata_snapshot, part.data_part, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, - use_uncompressed_cache, - query_info.prewhere_info, true, reader_settings, - virt_columns, part.part_index_in_query); - - pipes.emplace_back(std::move(source_processor)); - } - - pipe = Pipe::unitePipes(std::move(pipes)); - } - - /// Drop temporary columns, added by 'sorting_key_expr' - if (!out_projection) - out_projection = createProjection(pipe, data); - - pipe.addSimpleTransform([&metadata_snapshot](const Block & header) - { - return std::make_shared(header, metadata_snapshot->getSortingKey().expression); - }); - - Names sort_columns = metadata_snapshot->getSortingKeyColumns(); - SortDescription sort_description; - size_t sort_columns_size = sort_columns.size(); - sort_description.reserve(sort_columns_size); - - Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names; - - Block header = pipe.getHeader(); - for (size_t i = 0; i < sort_columns_size; ++i) - sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); - - auto get_merging_processor = [&]() -> MergingTransformPtr - { - switch (data.merging_params.mode) - { - case MergeTreeData::MergingParams::Ordinary: - { - return std::make_shared(header, pipe.numOutputPorts(), - sort_description, max_block_size); - } - - case MergeTreeData::MergingParams::Collapsing: - return std::make_shared(header, pipe.numOutputPorts(), - sort_description, data.merging_params.sign_column, true, max_block_size); - - case MergeTreeData::MergingParams::Summing: - return std::make_shared(header, pipe.numOutputPorts(), - sort_description, data.merging_params.columns_to_sum, partition_key_columns, max_block_size); - - case MergeTreeData::MergingParams::Aggregating: - return std::make_shared(header, pipe.numOutputPorts(), - sort_description, max_block_size); - - case MergeTreeData::MergingParams::Replacing: - return std::make_shared(header, pipe.numOutputPorts(), - sort_description, data.merging_params.version_column, max_block_size); - - case MergeTreeData::MergingParams::VersionedCollapsing: - return std::make_shared(header, pipe.numOutputPorts(), - sort_description, data.merging_params.sign_column, max_block_size); - - case MergeTreeData::MergingParams::Graphite: - throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); - } - - __builtin_unreachable(); - }; - if (num_streams > settings.max_final_threads) num_streams = settings.max_final_threads; - if (num_streams <= 1 || sort_description.empty()) + std::vector parts_to_merge_ranges; + auto it = parts.begin(); + parts_to_merge_ranges.push_back(it); + + if (data_settings->do_not_merge_across_partitions_select_final) { - pipe.addTransform(get_merging_processor()); - return pipe; - } - - ColumnNumbers key_columns; - key_columns.reserve(sort_description.size()); - - for (auto & desc : sort_description) - { - if (!desc.column_name.empty()) - key_columns.push_back(header.getPositionByName(desc.column_name)); - else - key_columns.emplace_back(desc.column_number); - } - - pipe.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header, num_streams, key_columns); - }); - - pipe.transform([&](OutputPortRawPtrs ports) - { - Processors processors; - std::vector output_ports; - processors.reserve(ports.size() + num_streams); - output_ports.reserve(ports.size()); - - for (auto & port : ports) + while (it != parts.end()) { - auto copier = std::make_shared(header, num_streams); - connect(*port, copier->getInputPort()); - output_ports.emplace_back(copier->getOutputs().begin()); - processors.emplace_back(std::move(copier)); + it = std::find_if( + it, parts.end(), [&it](auto & part) { return it->data_part->info.partition_id != part.data_part->info.partition_id; }); + parts_to_merge_ranges.push_back(it); } + num_streams /= (parts_to_merge_ranges.size() - 1); + } + else + { + parts_to_merge_ranges.push_back(parts.end()); + } + + Pipes select_and_merge_pipes; + + for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index) + { + Pipe pipe; - for (size_t i = 0; i < num_streams; ++i) { - auto merge = get_merging_processor(); - merge->setSelectorPosition(i); - auto input = merge->getInputs().begin(); + Pipes pipes; - /// Connect i-th merge with i-th input port of every copier. - for (size_t j = 0; j < ports.size(); ++j) + for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it) { - connect(*output_ports[j], *input); - ++output_ports[j]; - ++input; + LOG_DEBUG(log, "Partition id: {}", part_it->data_part->info.partition_id); + auto source_processor = std::make_shared( + data, + metadata_snapshot, + part_it->data_part, + max_block_size, + settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, + column_names, + part_it->ranges, + use_uncompressed_cache, + query_info.prewhere_info, + true, + reader_settings, + virt_columns, + part_it->part_index_in_query); + + pipes.emplace_back(std::move(source_processor)); } - processors.emplace_back(std::move(merge)); + pipe = Pipe::unitePipes(std::move(pipes)); } - return processors; - }); + /// Drop temporary columns, added by 'sorting_key_expr' + if (!out_projection) + out_projection = createProjection(pipe, data); - return pipe; + if (std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && parts_to_merge_ranges[range_index]->data_part->info.level > 0) + { + select_and_merge_pipes.emplace_back(std::move(pipe)); + continue; + } + + pipe.addSimpleTransform([&metadata_snapshot](const Block & header) { + return std::make_shared(header, metadata_snapshot->getSortingKey().expression); + }); + + Names sort_columns = metadata_snapshot->getSortingKeyColumns(); + SortDescription sort_description; + size_t sort_columns_size = sort_columns.size(); + sort_description.reserve(sort_columns_size); + + Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names; + + Block header = pipe.getHeader(); + for (size_t i = 0; i < sort_columns_size; ++i) + sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); + + auto get_merging_processor = [&]() -> MergingTransformPtr { + switch (data.merging_params.mode) + { + case MergeTreeData::MergingParams::Ordinary: { + return std::make_shared(header, pipe.numOutputPorts(), sort_description, max_block_size); + } + + case MergeTreeData::MergingParams::Collapsing: + return std::make_shared( + header, pipe.numOutputPorts(), sort_description, data.merging_params.sign_column, true, max_block_size); + + case MergeTreeData::MergingParams::Summing: + return std::make_shared( + header, + pipe.numOutputPorts(), + sort_description, + data.merging_params.columns_to_sum, + partition_key_columns, + max_block_size); + + case MergeTreeData::MergingParams::Aggregating: + return std::make_shared(header, pipe.numOutputPorts(), sort_description, max_block_size); + + case MergeTreeData::MergingParams::Replacing: + return std::make_shared( + header, pipe.numOutputPorts(), sort_description, data.merging_params.version_column, max_block_size); + + case MergeTreeData::MergingParams::VersionedCollapsing: + return std::make_shared( + header, pipe.numOutputPorts(), sort_description, data.merging_params.sign_column, max_block_size); + + case MergeTreeData::MergingParams::Graphite: + throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); + } + + __builtin_unreachable(); + }; + + if (num_streams <= 1 || sort_description.empty()) + { + pipe.addTransform(get_merging_processor()); + select_and_merge_pipes.emplace_back(std::move(pipe)); + continue; + } + + ColumnNumbers key_columns; + key_columns.reserve(sort_description.size()); + + for (auto & desc : sort_description) + { + if (!desc.column_name.empty()) + key_columns.push_back(header.getPositionByName(desc.column_name)); + else + key_columns.emplace_back(desc.column_number); + } + + pipe.addSimpleTransform([&](const Block & stream_header) { + return std::make_shared(stream_header, num_streams, key_columns); + }); + + pipe.transform([&](OutputPortRawPtrs ports) { + Processors processors; + std::vector output_ports; + processors.reserve(ports.size() + num_streams); + output_ports.reserve(ports.size()); + + LOG_DEBUG(log, "Output ports size: {}", ports.size()); + + for (auto & port : ports) + { + auto copier = std::make_shared(header, num_streams); + connect(*port, copier->getInputPort()); + output_ports.emplace_back(copier->getOutputs().begin()); + processors.emplace_back(std::move(copier)); + } + + for (size_t i = 0; i < num_streams; ++i) + { + auto merge = get_merging_processor(); + merge->setSelectorPosition(i); + auto input = merge->getInputs().begin(); + + /// Connect i-th merge with i-th input port of every copier. + for (size_t j = 0; j < ports.size(); ++j) + { + connect(*output_ports[j], *input); + ++output_ports[j]; + ++input; + } + + processors.emplace_back(std::move(merge)); + } + + return processors; + }); + select_and_merge_pipes.emplace_back(std::move(pipe)); + } + + return Pipe::unitePipes(std::move(select_and_merge_pipes)); } /// Calculates a set of mark ranges, that could possibly contain keys, required by condition. diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 97bc73caf5b..a9bd4248259 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -112,6 +112,8 @@ struct Settings; /** Obsolete settings. Kept for backward compatibility only. */ \ M(UInt64, min_relative_delay_to_yield_leadership, 120, "Obsolete setting, does nothing.", 0) \ M(UInt64, check_delay_period, 60, "Obsolete setting, does nothing.", 0) \ + /** Select settings */ \ + M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \ /// Settings that should not change after the creation of a table. #define APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(M) \ From be0cb31d211a7f98bc765720ece209cacdfd3030 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 13 Oct 2020 21:55:03 +0300 Subject: [PATCH 02/37] Add tests and comments --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 25 +++++++++++++------ ...e_across_partitions_select_final.reference | 6 +++++ ...t_merge_across_partitions_select_final.sql | 15 +++++++++++ 3 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference create mode 100644 tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index dc210d7bc33..77559c52c4e 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1240,6 +1240,11 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (num_streams > settings.max_final_threads) num_streams = settings.max_final_threads; + /// If setting do_not_merge_across_partitions_select_final is true than we won't merge parts from different partitions. + /// We have all parts in parts vector, where parts with same partition are nerby. + /// So we will store iterators pointed to the beginning of each partition range (and parts.end()), + /// then we will create a pipe for each partition that will run selecting processor and merging processor + /// for the parts with this partition. In the end we will unite all the pipes. std::vector parts_to_merge_ranges; auto it = parts.begin(); parts_to_merge_ranges.push_back(it); @@ -1252,14 +1257,17 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( it, parts.end(), [&it](auto & part) { return it->data_part->info.partition_id != part.data_part->info.partition_id; }); parts_to_merge_ranges.push_back(it); } + /// We divide threads for each partition equally. But we will create at least the number of partitions threads. + /// (So, the total number of threads could be more than initial num_streams. num_streams /= (parts_to_merge_ranges.size() - 1); } else { + /// If do_not_merge_across_partitions_select_final is false we just merge all the parts. parts_to_merge_ranges.push_back(parts.end()); } - Pipes select_and_merge_pipes; + Pipes partition_pipes; for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index) { @@ -1270,7 +1278,6 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it) { - LOG_DEBUG(log, "Partition id: {}", part_it->data_part->info.partition_id); auto source_processor = std::make_shared( data, metadata_snapshot, @@ -1297,9 +1304,13 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (!out_projection) out_projection = createProjection(pipe, data); - if (std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && parts_to_merge_ranges[range_index]->data_part->info.level > 0) + /// If do_not_merge_across_partitions_select_final is true, there is only one part in partition and it's level > 0 + /// then we won't merge this part. + if (data_settings->do_not_merge_across_partitions_select_final && + std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && + parts_to_merge_ranges[range_index]->data_part->info.level > 0) { - select_and_merge_pipes.emplace_back(std::move(pipe)); + partition_pipes.emplace_back(std::move(pipe)); continue; } @@ -1359,7 +1370,7 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (num_streams <= 1 || sort_description.empty()) { pipe.addTransform(get_merging_processor()); - select_and_merge_pipes.emplace_back(std::move(pipe)); + partition_pipes.emplace_back(std::move(pipe)); continue; } @@ -1413,10 +1424,10 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( return processors; }); - select_and_merge_pipes.emplace_back(std::move(pipe)); + partition_pipes.emplace_back(std::move(pipe)); } - return Pipe::unitePipes(std::move(select_and_merge_pipes)); + return Pipe::unitePipes(std::move(partition_pipes)); } /// Calculates a set of mark ranges, that could possibly contain keys, required by condition. diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference new file mode 100644 index 00000000000..4c85a1d418a --- /dev/null +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference @@ -0,0 +1,6 @@ +2000-01-01 00:00:00 0 +2020-01-01 00:00:00 0 +2000-01-01 00:00:00 1 +2020-01-01 00:00:00 1 +2000-01-01 00:00:00 2 +2020-01-01 00:00:00 2 diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql new file mode 100644 index 00000000000..d670cf8594c --- /dev/null +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS select_final; + +CREATE TABLE select_final (t DateTime, x Int32) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY x SETTINGS do_not_merge_across_partitions_select_final = 1; + +INSERT INTO select_final SELECT toDate('2000-01-01'), number FROM numbers(2); +INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1 FROM numbers(2); + +INSERT INTO select_final SELECT toDate('2020-01-01'), number FROM numbers(2); +INSERT INTO select_final SELECT toDate('2020-01-01'), number + 1 FROM numbers(2); + + +SELECT * FROM select_final FINAL ORDER BY x; + +DROP TABLE select_final; + From 44c2b138f32a732f60730bcaebf19c60dbe9e01f Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 13 Oct 2020 22:53:36 +0300 Subject: [PATCH 03/37] Fix style --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 77559c52c4e..027f982ac24 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1314,7 +1314,8 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( continue; } - pipe.addSimpleTransform([&metadata_snapshot](const Block & header) { + pipe.addSimpleTransform([&metadata_snapshot](const Block & header) + { return std::make_shared(header, metadata_snapshot->getSortingKey().expression); }); @@ -1329,7 +1330,8 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( for (size_t i = 0; i < sort_columns_size; ++i) sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); - auto get_merging_processor = [&]() -> MergingTransformPtr { + auto get_merging_processor = [&]() -> MergingTransformPtr + { switch (data.merging_params.mode) { case MergeTreeData::MergingParams::Ordinary: { @@ -1385,11 +1387,13 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( key_columns.emplace_back(desc.column_number); } - pipe.addSimpleTransform([&](const Block & stream_header) { + pipe.addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, num_streams, key_columns); }); - pipe.transform([&](OutputPortRawPtrs ports) { + pipe.transform([&](OutputPortRawPtrs ports) + { Processors processors; std::vector output_ports; processors.reserve(ports.size() + num_streams); From f5fac575f4825755da549567d8bbca4e16a1693b Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Thu, 15 Oct 2020 15:22:41 +0300 Subject: [PATCH 04/37] don't postprocess single parts --- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 027f982ac24..faa9b80e3c8 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1304,11 +1304,10 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (!out_projection) out_projection = createProjection(pipe, data); - /// If do_not_merge_across_partitions_select_final is true, there is only one part in partition and it's level > 0 - /// then we won't merge this part. + /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition + /// then we won't postprocess this part if (data_settings->do_not_merge_across_partitions_select_final && - std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && - parts_to_merge_ranges[range_index]->data_part->info.level > 0) + std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1) { partition_pipes.emplace_back(std::move(pipe)); continue; From 89fdeb4e15ba8f4900b2dd07dd61fb2df727f6e5 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Wed, 21 Oct 2020 20:35:31 +0300 Subject: [PATCH 05/37] Fix style, move setting and add checking level>0 --- src/Core/Settings.h | 4 +++- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 14 +++++++------- src/Storages/MergeTree/MergeTreeSettings.h | 3 --- ...do_not_merge_across_partitions_select_final.sql | 4 ++-- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a1a7a690e40..770ac68aba1 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -398,7 +398,9 @@ class IColumn; M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \ M(Bool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \ M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ - M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) + M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) \ + \ + M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \ #define FORMAT_FACTORY_SETTINGS(M) \ M(Char, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index faa9b80e3c8..5acbec9d5b3 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1249,7 +1249,7 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( auto it = parts.begin(); parts_to_merge_ranges.push_back(it); - if (data_settings->do_not_merge_across_partitions_select_final) + if (settings.do_not_merge_across_partitions_select_final) { while (it != parts.end()) { @@ -1305,9 +1305,10 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( out_projection = createProjection(pipe, data); /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition - /// then we won't postprocess this part - if (data_settings->do_not_merge_across_partitions_select_final && - std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1) + /// with level > 0 then we won't postprocess this part + if (settings.do_not_merge_across_partitions_select_final && + std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && + parts_to_merge_ranges[range_index]->data_part->info.level > 0) { partition_pipes.emplace_back(std::move(pipe)); continue; @@ -1333,7 +1334,8 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( { switch (data.merging_params.mode) { - case MergeTreeData::MergingParams::Ordinary: { + case MergeTreeData::MergingParams::Ordinary: + { return std::make_shared(header, pipe.numOutputPorts(), sort_description, max_block_size); } @@ -1398,8 +1400,6 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( processors.reserve(ports.size() + num_streams); output_ports.reserve(ports.size()); - LOG_DEBUG(log, "Output ports size: {}", ports.size()); - for (auto & port : ports) { auto copier = std::make_shared(header, num_streams); diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index a9bd4248259..fe97298fe75 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -112,9 +112,6 @@ struct Settings; /** Obsolete settings. Kept for backward compatibility only. */ \ M(UInt64, min_relative_delay_to_yield_leadership, 120, "Obsolete setting, does nothing.", 0) \ M(UInt64, check_delay_period, 60, "Obsolete setting, does nothing.", 0) \ - /** Select settings */ \ - M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \ - /// Settings that should not change after the creation of a table. #define APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(M) \ M(index_granularity) diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql index d670cf8594c..08ac9a6aa1a 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS select_final; -CREATE TABLE select_final (t DateTime, x Int32) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY x SETTINGS do_not_merge_across_partitions_select_final = 1; +CREATE TABLE select_final (t DateTime, x Int32) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY x; INSERT INTO select_final SELECT toDate('2000-01-01'), number FROM numbers(2); INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1 FROM numbers(2); @@ -9,7 +9,7 @@ INSERT INTO select_final SELECT toDate('2020-01-01'), number FROM numbers(2); INSERT INTO select_final SELECT toDate('2020-01-01'), number + 1 FROM numbers(2); -SELECT * FROM select_final FINAL ORDER BY x; +SELECT * FROM select_final FINAL ORDER BY x SETTINGS do_not_merge_across_partitions_select_final = 1;; DROP TABLE select_final; From 10fb8624db2bcbed2c659793bc4c28b272671cd7 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Wed, 21 Oct 2020 20:44:35 +0300 Subject: [PATCH 06/37] Remove double simbol in test --- .../01524_do_not_merge_across_partitions_select_final.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql index 08ac9a6aa1a..d332946605d 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -9,7 +9,7 @@ INSERT INTO select_final SELECT toDate('2020-01-01'), number FROM numbers(2); INSERT INTO select_final SELECT toDate('2020-01-01'), number + 1 FROM numbers(2); -SELECT * FROM select_final FINAL ORDER BY x SETTINGS do_not_merge_across_partitions_select_final = 1;; +SELECT * FROM select_final FINAL ORDER BY x SETTINGS do_not_merge_across_partitions_select_final = 1; DROP TABLE select_final; From 0274c9e9eb0664bd6bd6a40f452707312e692b35 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Fri, 23 Oct 2020 12:05:57 +0300 Subject: [PATCH 07/37] Add perf test --- tests/performance/optimized_select_final.xml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 tests/performance/optimized_select_final.xml diff --git a/tests/performance/optimized_select_final.xml b/tests/performance/optimized_select_final.xml new file mode 100644 index 00000000000..e8f39239f9b --- /dev/null +++ b/tests/performance/optimized_select_final.xml @@ -0,0 +1,20 @@ + + + 1 + + + + CREATE TABLE optimized_select_final (t DateTime, x Int32) + ENGINE = ReplacingMergeTree() + PARTITION BY toYYYYMM(t) ORDER BY x + + + INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), number FROM numbers(1000000) + + INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), number FROM numbers(1000000) + + SELECT * FROM optimized_select_final final + + DROP TABLE IF EXISTS optimized_select_final + + From 4592c5e59b1915bfa472bf8561722bbf173d21c0 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 27 Oct 2020 14:42:26 +0300 Subject: [PATCH 08/37] Add OPTIMIZE in perf test --- tests/performance/optimized_select_final.xml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/performance/optimized_select_final.xml b/tests/performance/optimized_select_final.xml index e8f39239f9b..8bdaa2a2e70 100644 --- a/tests/performance/optimized_select_final.xml +++ b/tests/performance/optimized_select_final.xml @@ -13,7 +13,9 @@ INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), number FROM numbers(1000000) - SELECT * FROM optimized_select_final final + OPTIMIZE TABLE optimized_select_final + + SELECT * FROM optimized_select_final FINAL DROP TABLE IF EXISTS optimized_select_final From 9f0f981642cc9c7bf651585b26bf5f6b9559552f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 29 Oct 2020 14:03:50 +0300 Subject: [PATCH 09/37] Update optimized_select_final.xml --- tests/performance/optimized_select_final.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/optimized_select_final.xml b/tests/performance/optimized_select_final.xml index 8bdaa2a2e70..c904e552120 100644 --- a/tests/performance/optimized_select_final.xml +++ b/tests/performance/optimized_select_final.xml @@ -15,7 +15,7 @@ OPTIMIZE TABLE optimized_select_final - SELECT * FROM optimized_select_final FINAL + SELECT * FROM optimized_select_final FINAL FORMAT Null DROP TABLE IF EXISTS optimized_select_final From 48185d437ac70b33819ad3d95297efb374a61e8c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 30 Oct 2020 17:48:59 +0300 Subject: [PATCH 10/37] Update optimized_select_final.xml --- tests/performance/optimized_select_final.xml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/performance/optimized_select_final.xml b/tests/performance/optimized_select_final.xml index c904e552120..46b408d0cb4 100644 --- a/tests/performance/optimized_select_final.xml +++ b/tests/performance/optimized_select_final.xml @@ -9,9 +9,10 @@ PARTITION BY toYYYYMM(t) ORDER BY x - INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), number FROM numbers(1000000) - - INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), number FROM numbers(1000000) + INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), number FROM numbers(5000000) + INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), number FROM numbers(5000000) + INSERT INTO optimized_select_final SELECT toDate('2021-01-01'), number FROM numbers(5000000) + INSERT INTO optimized_select_final SELECT toDate('2022-01-01'), number FROM numbers(5000000) OPTIMIZE TABLE optimized_select_final From 5481fcdf4211eec9751cd792e9d862c38328a942 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 30 Oct 2020 20:56:14 +0300 Subject: [PATCH 11/37] Fix script --- utils/make_changelog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/make_changelog.py b/utils/make_changelog.py index 5a5c82e5ab6..9120267e3b0 100755 --- a/utils/make_changelog.py +++ b/utils/make_changelog.py @@ -262,7 +262,7 @@ def process_unknown_commits(commits, commits_info, users): # Returns False if the PR should not be mentioned changelog. def parse_one_pull_request(item): description = item['description'] - lines = [line for line in [x.strip() for x in description.split('\n') if description else []] if line] + lines = [line for line in [x.strip() for x in description.split('\n') if description] if line] lines = [re.sub(r'\s+', ' ', l) for l in lines] cat_pos = None From 9828ed95504fb1a78380d47c8590a07bf8d4c884 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Fri, 30 Oct 2020 21:16:50 +0300 Subject: [PATCH 12/37] Update utils/make_changelog.py Co-authored-by: Azat Khuzhin --- utils/make_changelog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/make_changelog.py b/utils/make_changelog.py index 9120267e3b0..4f703108d38 100755 --- a/utils/make_changelog.py +++ b/utils/make_changelog.py @@ -262,7 +262,7 @@ def process_unknown_commits(commits, commits_info, users): # Returns False if the PR should not be mentioned changelog. def parse_one_pull_request(item): description = item['description'] - lines = [line for line in [x.strip() for x in description.split('\n') if description] if line] + lines = [line for line in [x.strip() for x in description.split('\n')] if line] if description else [] lines = [re.sub(r'\s+', ' ', l) for l in lines] cat_pos = None From 2832255164979b72fcb1fabd4592a613abb74bd4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 30 Oct 2020 22:02:02 +0300 Subject: [PATCH 13/37] Increase asynchronous_metrics_update_period_s to avoid syncing MemoryTracking with RSS --- .../configs/asynchronous_metrics_update_period_s.xml | 4 ++++ .../test.py | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/asynchronous_metrics_update_period_s.xml diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/asynchronous_metrics_update_period_s.xml b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/asynchronous_metrics_update_period_s.xml new file mode 100644 index 00000000000..ed131f41ede --- /dev/null +++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/asynchronous_metrics_update_period_s.xml @@ -0,0 +1,4 @@ + + + 86400 + diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py index 69c7a5821fd..e8866d3a235 100644 --- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py +++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py @@ -8,7 +8,10 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', main_configs=['configs/conf.xml']) +instance = cluster.add_instance('instance', main_configs=[ + 'configs/conf.xml', + 'configs/asynchronous_metrics_update_period_s.xml', +]) @pytest.fixture(scope='module', autouse=True) From 831208e2cf90db994fca0055241e496e21a8281c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 30 Oct 2020 22:02:02 +0300 Subject: [PATCH 14/37] Log the difference between process RSS and MemoryTracking metric --- src/Interpreters/AsynchronousMetrics.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index e1a9a820ebb..4e052349b6b 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -207,8 +207,22 @@ void AsynchronousMetrics::update() /// We must update the value of total_memory_tracker periodically. /// Otherwise it might be calculated incorrectly - it can include a "drift" of memory amount. /// See https://github.com/ClickHouse/ClickHouse/issues/10293 - total_memory_tracker.set(data.resident); - CurrentMetrics::set(CurrentMetrics::MemoryTracking, data.resident); + { + Int64 amount = total_memory_tracker.get(); + Int64 peak = total_memory_tracker.getPeak(); + Int64 new_peak = data.resident; + + LOG_DEBUG(&Poco::Logger::get("AsynchronousMetrics"), + "MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}", + ReadableSize(amount), + ReadableSize(peak), + ReadableSize(new_peak), + ReadableSize(new_peak - peak) + ); + + total_memory_tracker.set(new_peak); + CurrentMetrics::set(CurrentMetrics::MemoryTracking, new_peak); + } } #endif From 33fa54cce6bec85940864e85e44b39b307f5f9b6 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 1 Nov 2020 20:37:54 +0300 Subject: [PATCH 15/37] better scheduling of drop table task --- src/Interpreters/DatabaseCatalog.cpp | 42 ++++++++++++++----- src/Interpreters/DatabaseCatalog.h | 1 + .../0_stateless/01193_metadata_loading.sh | 2 +- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 906863f3f44..9de938ae184 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -134,7 +134,10 @@ void DatabaseCatalog::loadDatabases() loadMarkedAsDroppedTables(); auto task_holder = global_context->getSchedulePool().createTask("DatabaseCatalog", [this](){ this->dropTableDataTask(); }); drop_task = std::make_unique(std::move(task_holder)); - (*drop_task)->activateAndSchedule(); + (*drop_task)->activate(); + std::lock_guard lock{tables_marked_dropped_mutex}; + if (!tables_marked_dropped.empty()) + (*drop_task)->schedule(); } void DatabaseCatalog::shutdownImpl() @@ -760,14 +763,15 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr std::lock_guard lock(tables_marked_dropped_mutex); if (ignore_delay) - tables_marked_dropped.push_front({table_id, table, dropped_metadata_path, 0}); + tables_marked_dropped.push_front({table_id, table, dropped_metadata_path, drop_time}); else - tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time}); + tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time + drop_delay_sec}); tables_marked_dropped_ids.insert(table_id.uuid); CurrentMetrics::add(CurrentMetrics::TablesToDropQueueSize, 1); - /// If list of dropped tables was empty, start a drop task - if (drop_task && tables_marked_dropped.size() == 1) + /// If list of dropped tables was empty, start a drop task. + /// If ignore_delay is set, schedule drop task as soon as possible. + if (drop_task && (tables_marked_dropped.size() == 1 || ignore_delay)) (*drop_task)->schedule(); } @@ -777,26 +781,40 @@ void DatabaseCatalog::dropTableDataTask() /// Table can be removed when it's not used by queries and drop_delay_sec elapsed since it was marked as dropped. bool need_reschedule = true; + /// Default reschedule time for the case when we are waiting for reference count to become 1. + size_t schedule_after_ms = reschedule_time_ms; TableMarkedAsDropped table; try { std::lock_guard lock(tables_marked_dropped_mutex); + assert(!tables_marked_dropped.empty()); time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + time_t min_drop_time = std::numeric_limits::max(); + size_t tables_in_use_count = 0; auto it = std::find_if(tables_marked_dropped.begin(), tables_marked_dropped.end(), [&](const auto & elem) { bool not_in_use = !elem.table || elem.table.unique(); - bool old_enough = elem.drop_time + drop_delay_sec < current_time; + bool old_enough = elem.drop_time <= current_time; + min_drop_time = std::min(min_drop_time, elem.drop_time); + tables_in_use_count += !not_in_use; return not_in_use && old_enough; }); if (it != tables_marked_dropped.end()) { table = std::move(*it); - LOG_INFO(log, "Will try drop {}", table.table_id.getNameForLogs()); + LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {}", + tables_marked_dropped.size(), tables_in_use_count, table.table_id.getNameForLogs()); tables_marked_dropped.erase(it); + /// Schedule the task as soon as possible, while there are suitable tables to drop. + schedule_after_ms = 0; } - else + else if (current_time < min_drop_time) { - LOG_TRACE(log, "Not found any suitable tables to drop, still have {} tables in drop queue", tables_marked_dropped.size()); + /// We are waiting for drop_delay_sec to exceed, no sense to wakeup until min_drop_time. + /// If new table is added to the queue with ignore_delay flag, schedule() is called to wakeup the task earlier. + schedule_after_ms = (min_drop_time - current_time) * 1000; + LOG_TRACE(log, "Not found any suitable tables to drop, still have {} tables in drop queue ({} of them are in use). " + "Will check again after {} seconds", tables_marked_dropped.size(), tables_in_use_count, min_drop_time - current_time); } need_reschedule = !tables_marked_dropped.empty(); } @@ -820,11 +838,15 @@ void DatabaseCatalog::dropTableDataTask() tryLogCurrentException(log, "Cannot drop table " + table.table_id.getNameForLogs() + ". Will retry later."); { + table.drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; std::lock_guard lock(tables_marked_dropped_mutex); tables_marked_dropped.emplace_back(std::move(table)); /// If list of dropped tables was empty, schedule a task to retry deletion. if (tables_marked_dropped.size() == 1) + { need_reschedule = true; + schedule_after_ms = drop_error_cooldown_sec * 1000; + } } } @@ -833,7 +855,7 @@ void DatabaseCatalog::dropTableDataTask() /// Do not schedule a task if there is no tables to drop if (need_reschedule) - (*drop_task)->scheduleAfter(reschedule_time_ms); + (*drop_task)->scheduleAfter(schedule_after_ms); } void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index d26307a3bc3..2fd5c8d2be8 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -234,6 +234,7 @@ private: void dropTableFinally(const TableMarkedAsDropped & table); static constexpr size_t reschedule_time_ms = 100; + static constexpr time_t drop_error_cooldown_sec = 5; private: using UUIDToDatabaseMap = std::unordered_map; diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh index 319b537e84b..0ee583a7265 100755 --- a/tests/queries/0_stateless/01193_metadata_loading.sh +++ b/tests/queries/0_stateless/01193_metadata_loading.sh @@ -49,4 +49,4 @@ $CLICKHOUSE_CLIENT -q "SELECT if(quantile(0.5)(query_duration_ms) < $max_time_ms $CLICKHOUSE_CLIENT -q "SELECT count() * $count_multiplier, i, d, s, n.i, n.f FROM $db.table_merge GROUP BY i, d, s, n.i, n.f ORDER BY i" -$CLICKHOUSE_CLIENT -q "DROP DATABASE $db" --database_atomic_wait_for_drop_and_detach_synchronously=0 +$CLICKHOUSE_CLIENT -q "DROP DATABASE $db" From 3f21ae55c92da3cc32318c3888174404a9c28491 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Nov 2020 14:20:43 +0300 Subject: [PATCH 16/37] Add a test for #5995 --- .../0_stateless/01548_lzy305.reference | 0 tests/queries/0_stateless/01548_lzy305.sql | 148 ++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 tests/queries/0_stateless/01548_lzy305.reference create mode 100644 tests/queries/0_stateless/01548_lzy305.sql diff --git a/tests/queries/0_stateless/01548_lzy305.reference b/tests/queries/0_stateless/01548_lzy305.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01548_lzy305.sql b/tests/queries/0_stateless/01548_lzy305.sql new file mode 100644 index 00000000000..e2929b1bb1f --- /dev/null +++ b/tests/queries/0_stateless/01548_lzy305.sql @@ -0,0 +1,148 @@ +DROP TABLE IF EXISTS fct_rt_dc_shop_sku_vender_day; + +create table fct_rt_dc_shop_sku_vender_day +( + stat_year UInt16, + stat_month UInt32, + stat_day Date, + out_buid UInt8, + out_shop_id String, + in_shop_id LowCardinality(String), + datasource UInt8, + venderid String, + categorytreeid UInt8, + categoryid String, + goodsid LowCardinality(String), + logistics UInt8, + buntype UInt8, + dctype UInt8, + shopformid UInt8, + rt_qty Decimal(18,4), + rt_cost Decimal(18,4), + rt_taxcost Decimal(18,4), + rt_boxes Decimal(18,4), + rt_shops Nullable(String), + rt_drygood_qty Decimal(18,4), + rt_drygood_cost Decimal(18,4), + rt_drygood_boxes Decimal(18,4), + rt_drygood_shops LowCardinality(Nullable(String)), + rt_fresh_qty Decimal(18,4), + rt_fresh_cost Decimal(18,4), + rt_fresh_shops LowCardinality(Nullable(String)), + rt_supshop_cost Decimal(18,4), + rt_supshop_qty Decimal(18,4), + rt_supshop_boxes Decimal(18,4), + rt_supshop_shops LowCardinality(Nullable(String)), + rt_smallshop_cost Decimal(18,4), + rt_smallshop_qty Decimal(18,4), + rt_smallshop_boxes Decimal(18,4), + rt_smallshop_shops LowCardinality(Nullable(String)), + rt_dc_cost Decimal(18,4), + rt_dc_qty Decimal(18,4), + rt_dc_boxes Decimal(18,4), + rt_dc_shops LowCardinality(Nullable(String)), + rt_drygood_supshop_cost Decimal(18,4), + rt_drygood_supshop_qty Decimal(18,4), + rt_drygood_supshop_boxes Decimal(18,4), + rt_drygood_supshop_shops LowCardinality(Nullable(String)), + rt_drygood_smallshop_cost Decimal(18,4), + rt_drygood_smallshop_qty Decimal(18,4), + rt_drygood_smallshop_boxes Decimal(18,4), + rt_drygood_smallshop_shops LowCardinality(Nullable(String)), + rt_drygood_dc_cost Decimal(18,4), + rt_drygood_dc_qty Decimal(18,4), + rt_drygood_dc_boxes Decimal(18,4), + rt_drygood_dc_shops LowCardinality(Nullable(String)), + rt_fresh_supshop_cost Decimal(18,4), + rt_fresh_supshop_qty Decimal(18,4), + rt_fresh_supshop_shops LowCardinality(Nullable(String)), + rt_fresh_smallshop_cost Decimal(18,4), + rt_fresh_smallshop_qty Decimal(18,4), + rt_fresh_smallshop_shops LowCardinality(Nullable(String)), + rt_fresh_dc_cost Decimal(18,4), + rt_fresh_dc_qty Decimal(18,4), + rt_fresh_dc_shops LowCardinality(Nullable(String)), + stat_day_num String default formatDateTime(stat_day, '%F') +) +engine = MergeTree PARTITION BY toYYYYMM(stat_day) ORDER BY (stat_day, out_shop_id) SETTINGS index_granularity = 8192 +; + + +select stat_year, + stat_month, + out_buid, + out_shop_id, + in_shop_id, + datasource, + venderid, + categorytreeid, + categoryid, + goodsid, + logistics, + buntype, + dctype, + shopformid, + sum(rt_qty), + sum(rt_cost), + sum(rt_taxcost), + sum(rt_boxes), + max(rt_shops), + sum(rt_drygood_qty), + sum(rt_drygood_cost), + sum(rt_drygood_boxes), + max(rt_drygood_shops), + sum(rt_fresh_qty), + sum(rt_fresh_cost), + max(rt_fresh_shops), + sum(rt_supshop_cost), + sum(rt_supshop_qty), + sum(rt_supshop_boxes), + max(rt_supshop_shops), + sum(rt_smallshop_cost), + sum(rt_smallshop_qty), + sum(rt_smallshop_boxes), + max(rt_smallshop_shops), + sum(rt_dc_cost), + sum(rt_dc_qty), + sum(rt_dc_boxes), + max(rt_dc_shops), + sum(rt_drygood_supshop_cost), + sum(rt_drygood_supshop_qty), + sum(rt_drygood_supshop_boxes), + max(rt_drygood_supshop_shops), + sum(rt_drygood_smallshop_cost), + sum(rt_drygood_smallshop_qty), + sum(rt_drygood_smallshop_boxes), + max(rt_drygood_smallshop_shops), + sum(rt_drygood_dc_cost), + sum(rt_drygood_dc_qty), + sum(rt_drygood_dc_boxes), + max(rt_drygood_dc_shops), + sum(rt_fresh_supshop_cost), + sum(rt_fresh_supshop_qty), + max(rt_fresh_supshop_shops), + sum(rt_fresh_smallshop_cost), + sum(rt_fresh_smallshop_qty), + max(rt_fresh_smallshop_shops), + sum(rt_fresh_dc_cost), + sum(rt_fresh_dc_qty), + max(rt_fresh_dc_shops) +from fct_rt_dc_shop_sku_vender_day frdssvd +where stat_day >= toDate('2016-01-01') + and stat_day < addMonths(toDate('2016-01-01'), 1) +group by stat_year, + stat_month, + out_buid, + out_shop_id, + in_shop_id, + datasource, + venderid, + categorytreeid, + categoryid, + goodsid, + logistics, + buntype, + dctype, + shopformid; + +DROP TABLE fct_rt_dc_shop_sku_vender_day; From b0a553e3fdf5e9ff11dbcfc8181150ae62503df7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Nov 2020 14:27:49 +0300 Subject: [PATCH 17/37] Add a test for #5954 --- ...01549_low_cardinality_materialized_view.reference | 3 +++ .../01549_low_cardinality_materialized_view.sql | 12 ++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/01549_low_cardinality_materialized_view.reference create mode 100644 tests/queries/0_stateless/01549_low_cardinality_materialized_view.sql diff --git a/tests/queries/0_stateless/01549_low_cardinality_materialized_view.reference b/tests/queries/0_stateless/01549_low_cardinality_materialized_view.reference new file mode 100644 index 00000000000..43841425c7a --- /dev/null +++ b/tests/queries/0_stateless/01549_low_cardinality_materialized_view.reference @@ -0,0 +1,3 @@ +5754696928334414137 test +HASH_VAL UInt64 +STR_VAL LowCardinality(String) diff --git a/tests/queries/0_stateless/01549_low_cardinality_materialized_view.sql b/tests/queries/0_stateless/01549_low_cardinality_materialized_view.sql new file mode 100644 index 00000000000..a522748b04c --- /dev/null +++ b/tests/queries/0_stateless/01549_low_cardinality_materialized_view.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS HASH_MV; +DROP TABLE IF EXISTS HASH_TEST_INSERT; + +CREATE TABLE HASH_TEST_INSERT (STR_VAL String) ENGINE = Null; +CREATE MATERIALIZED VIEW HASH_MV (HASH_VAL UInt64, STR_VAL LowCardinality(String)) ENGINE = ReplacingMergeTree ORDER BY HASH_VAL AS SELECT xxHash64(STR_VAL) AS HASH_VAL, toLowCardinality(STR_VAL) AS STR_VAL FROM HASH_TEST_INSERT; +INSERT INTO HASH_TEST_INSERT VALUES ('test'); + +SELECT * FROM HASH_MV; +DESC (SELECT * FROM HASH_MV); + +DROP TABLE HASH_MV; +DROP TABLE HASH_TEST_INSERT; From e7ba98fc47061d9f90429b3cd187971f3168eb23 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Nov 2020 14:53:18 +0300 Subject: [PATCH 18/37] Add a test for #7039 --- .../0_stateless/01550_mutation_subquery.reference | 2 ++ tests/queries/0_stateless/01550_mutation_subquery.sql | 11 +++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/01550_mutation_subquery.reference create mode 100644 tests/queries/0_stateless/01550_mutation_subquery.sql diff --git a/tests/queries/0_stateless/01550_mutation_subquery.reference b/tests/queries/0_stateless/01550_mutation_subquery.reference new file mode 100644 index 00000000000..9b4140c39e0 --- /dev/null +++ b/tests/queries/0_stateless/01550_mutation_subquery.reference @@ -0,0 +1,2 @@ +1 2 +--- diff --git a/tests/queries/0_stateless/01550_mutation_subquery.sql b/tests/queries/0_stateless/01550_mutation_subquery.sql new file mode 100644 index 00000000000..811c5eb4d0b --- /dev/null +++ b/tests/queries/0_stateless/01550_mutation_subquery.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS t; + +CREATE TABLE t(`id` String, `dealer_id` String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 8192; +insert into t(id, dealer_id) values('1','2'); +SELECT * FROM t; +SET mutations_sync = 1; +ALTER TABLE t DELETE WHERE id in (select id from t as tmp); +SELECT '---'; +SELECT * FROM t; + +DROP TABLE t; From eea28e5ca4d8dcff6b21f56f367100438c47a716 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Nov 2020 15:16:35 +0300 Subject: [PATCH 19/37] Add a test for #9246 --- tests/queries/0_stateless/01551_context_uaf.reference | 0 tests/queries/0_stateless/01551_context_uaf.sql | 10 ++++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/01551_context_uaf.reference create mode 100644 tests/queries/0_stateless/01551_context_uaf.sql diff --git a/tests/queries/0_stateless/01551_context_uaf.reference b/tests/queries/0_stateless/01551_context_uaf.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01551_context_uaf.sql b/tests/queries/0_stateless/01551_context_uaf.sql new file mode 100644 index 00000000000..03a6c1c49ca --- /dev/null +++ b/tests/queries/0_stateless/01551_context_uaf.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS f; +DROP TABLE IF EXISTS v; + +create table f(s String) engine File(TSV, '/dev/null'); +create view v as (select * from f); +select * from v; -- was failing long time ago +select * from merge('', 'f'); -- was failing long time ago + +DROP TABLE f; +DROP TABLE v; From 4cfae808faa8b7158a5f7cae7ee124eefd16102a Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 2 Nov 2020 18:03:52 +0800 Subject: [PATCH 20/37] Query parameter type : Identifier --- .../ReplaceQueryParameterVisitor.cpp | 27 ++++++++ .../ReplaceQueryParameterVisitor.h | 2 + src/Parsers/ASTIdentifier.cpp | 62 +++++++++++++----- src/Parsers/ASTIdentifier.h | 9 ++- src/Parsers/ExpressionElementParsers.cpp | 65 ++++++++++++++++--- src/Parsers/ExpressionElementParsers.h | 22 ++++++- src/Parsers/ParserTablesInSelectQuery.cpp | 2 +- ...1550_query_identifier_parameters.reference | 5 ++ .../01550_query_identifier_parameters.sh | 10 +++ 9 files changed, 176 insertions(+), 28 deletions(-) create mode 100644 tests/queries/0_stateless/01550_query_identifier_parameters.reference create mode 100755 tests/queries/0_stateless/01550_query_identifier_parameters.sh diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp index 3dabfb06770..9b4223b8947 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -5,8 +5,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -25,6 +27,8 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) { if (ast->as()) visitQueryParameter(ast); + else if (ast->as()) + visitIdentifier(ast); else visitChildren(ast); } @@ -71,4 +75,27 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) ast->setAlias(alias); } +void ReplaceQueryParameterVisitor::visitIdentifier(ASTPtr & ast) +{ + auto & ast_identifier = ast->as(); + if (ast_identifier.children.empty()) + return; + + auto & name_parts = ast_identifier.name_parts; + for (size_t i = 0, j = 0, size = name_parts.size(); i < size; ++i) + { + if (name_parts[i].empty()) + { + const auto & ast_param = ast_identifier.children[j++]->as(); + name_parts[i] = getParamValue(ast_param.name); + } + } + + if (!ast_identifier.semantic->special && name_parts.size() >= 2) + ast_identifier.semantic->table = ast_identifier.name_parts.end()[-2]; + + ast_identifier.resetFullName(); + ast_identifier.children.clear(); +} + } diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.h b/src/Interpreters/ReplaceQueryParameterVisitor.h index 3a84cd22acd..23e36df3fee 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.h +++ b/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -9,6 +9,7 @@ namespace DB class ASTQueryParameter; /// Visit substitutions in a query, replace ASTQueryParameter with ASTLiteral. +/// Rebuild ASTIdentifiers if some parts are ASTQueryParameter. class ReplaceQueryParameterVisitor { public: @@ -21,6 +22,7 @@ public: private: const NameToNameMap & query_parameters; const String & getParamValue(const String & name); + void visitIdentifier(ASTPtr & ast); void visitQueryParameter(ASTPtr & ast); void visitChildren(ASTPtr & ast); }; diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp index d980300a22a..5a66bc7891d 100644 --- a/src/Parsers/ASTIdentifier.cpp +++ b/src/Parsers/ASTIdentifier.cpp @@ -16,26 +16,48 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -ASTIdentifier::ASTIdentifier(const String & short_name) +ASTIdentifier::ASTIdentifier(const String & short_name, ASTPtr && name_param) : full_name(short_name), name_parts{short_name}, semantic(std::make_shared()) { - assert(!full_name.empty()); + if (name_param == nullptr) + assert(!full_name.empty()); + else + children.push_back(std::move(name_param)); } -ASTIdentifier::ASTIdentifier(std::vector && name_parts_, bool special) +ASTIdentifier::ASTIdentifier(std::vector && name_parts_, bool special, std::vector && name_params) : name_parts(name_parts_), semantic(std::make_shared()) { assert(!name_parts.empty()); - for (const auto & part [[maybe_unused]] : name_parts) - assert(!part.empty()); - semantic->special = special; semantic->legacy_compound = true; + if (!name_params.empty()) + { + size_t params = 0; + for (const auto & part [[maybe_unused]] : name_parts) + { + if (part.empty()) + ++params; + } + assert(params == name_params.size()); + children = std::move(name_params); + } + else + { + for (const auto & part [[maybe_unused]] : name_parts) + assert(!part.empty()); - if (!special && name_parts.size() >= 2) - semantic->table = name_parts.end()[-2]; + if (!special && name_parts.size() >= 2) + semantic->table = name_parts.end()[-2]; - resetFullName(); + resetFullName(); + } +} + +ASTPtr ASTIdentifier::getParam() const +{ + assert(full_name.empty() && children.size() == 1); + return children.front()->clone(); } ASTPtr ASTIdentifier::clone() const @@ -64,13 +86,16 @@ void ASTIdentifier::setShortName(const String & new_name) const String & ASTIdentifier::name() const { - assert(!name_parts.empty()); - assert(!full_name.empty()); + if (children.empty()) + { + assert(!name_parts.empty()); + assert(!full_name.empty()); + } return full_name; } -void ASTIdentifier::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const +void ASTIdentifier::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { auto format_element = [&](const String & elem_name) { @@ -82,17 +107,24 @@ void ASTIdentifier::formatImplWithoutAlias(const FormatSettings & settings, Form /// It could be compound but short if (!isShort()) { - for (size_t i = 0, size = name_parts.size(); i < size; ++i) + for (size_t i = 0, j = 0, size = name_parts.size(); i < size; ++i) { if (i != 0) settings.ostr << '.'; - format_element(name_parts[i]); + if (name_parts[i].empty()) + children[j++]->formatImpl(settings, state, frame); + else + format_element(name_parts[i]); } } else { - format_element(shortName()); + const auto & name = shortName(); + if (name.empty()) + children.front()->formatImpl(settings, state, frame); + else + format_element(name); } } diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h index 59f698eab1c..205b3bb9ad1 100644 --- a/src/Parsers/ASTIdentifier.h +++ b/src/Parsers/ASTIdentifier.h @@ -2,6 +2,7 @@ #include +#include #include #include @@ -17,15 +18,19 @@ struct StorageID; /// Identifier (column, table or alias) class ASTIdentifier : public ASTWithAlias { + friend class ReplaceQueryParameterVisitor; public: UUID uuid = UUIDHelpers::Nil; - explicit ASTIdentifier(const String & short_name); - explicit ASTIdentifier(std::vector && name_parts, bool special = false); + explicit ASTIdentifier(const String & short_name, ASTPtr && name_param = {}); + explicit ASTIdentifier(std::vector && name_parts, bool special = false, std::vector && name_params = {}); /** Get the text that identifies this element. */ String getID(char delim) const override { return "Identifier" + (delim + name()); } + /** Get the query param out of a non-compound identifier. */ + ASTPtr getParam() const; + ASTPtr clone() const override; void collectIdentifierNames(IdentifierNameSet & set) const override { set.insert(name()); } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 3c45bd005a9..1761b0b4358 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -146,7 +146,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } -bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected &) +bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { /// Identifier in backquotes or in double quotes if (pos->type == TokenType::QuotedIdentifier) @@ -172,7 +172,51 @@ bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected &) ++pos; return true; } + else if (allow_query_parameter && pos->type == TokenType::OpeningCurlyBrace) + { + ++pos; + if (pos->type != TokenType::BareWord) + { + expected.add(pos, "substitution name (identifier)"); + return false; + } + String name(pos->begin, pos->end); + ++pos; + + if (pos->type != TokenType::Colon) + { + expected.add(pos, "colon between name and type"); + return false; + } + + ++pos; + + if (pos->type != TokenType::BareWord) + { + expected.add(pos, "substitution type (identifier)"); + return false; + } + + String type(pos->begin, pos->end); + ++pos; + + if (type != "Identifier") + { + expected.add(pos, "substitution type (identifier)"); + return false; + } + + if (pos->type != TokenType::ClosingCurlyBrace) + { + expected.add(pos, "closing curly brace"); + return false; + } + ++pos; + + node = std::make_shared("", std::make_shared(name, type)); + return true; + } return false; } @@ -180,14 +224,19 @@ bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected &) bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr id_list; - if (!ParserList(std::make_unique(), std::make_unique(TokenType::Dot), false) - .parse(pos, id_list, expected)) + if (!ParserList(std::make_unique(allow_query_parameter), std::make_unique(TokenType::Dot), false) + .parse(pos, id_list, expected)) return false; std::vector parts; + std::vector params; const auto & list = id_list->as(); for (const auto & child : list.children) + { parts.emplace_back(getIdentifierName(child)); + if (parts.back() == "") + params.push_back(child->as()->getParam()); + } ParserKeyword s_uuid("UUID"); UUID uuid = UUIDHelpers::Nil; @@ -201,7 +250,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex uuid = parseFromString(ast_uuid->as()->value.get()); } - node = std::make_shared(std::move(parts)); + node = std::make_shared(std::move(parts), false, std::move(params)); node->as()->uuid = uuid; return true; @@ -1174,7 +1223,7 @@ bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword columns("COLUMNS"); - ParserList columns_p(std::make_unique(), std::make_unique(TokenType::Comma), false); + ParserList columns_p(std::make_unique(true), std::make_unique(TokenType::Comma), false); ParserStringLiteral regex; if (!columns.ignore(pos, expected)) @@ -1252,7 +1301,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e auto parse_id = [&identifiers, &pos, &expected] { ASTPtr identifier; - if (!ParserIdentifier().parse(pos, identifier, expected)) + if (!ParserIdentifier(true).parse(pos, identifier, expected)) return false; identifiers.emplace_back(std::move(identifier)); @@ -1338,7 +1387,7 @@ bool ParserAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (!ParserCompoundIdentifier().parse(pos, node, expected)) + if (!ParserCompoundIdentifier(false, true).parse(pos, node, expected)) return false; if (pos->type != TokenType::Dot) @@ -1475,7 +1524,7 @@ bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & exp || ParserFunction().parse(pos, node, expected) || ParserQualifiedAsterisk().parse(pos, node, expected) || ParserAsterisk().parse(pos, node, expected) - || ParserCompoundIdentifier().parse(pos, node, expected) + || ParserCompoundIdentifier(false, true).parse(pos, node, expected) || ParserSubstitution().parse(pos, node, expected) || ParserMySQLGlobalVariable().parse(pos, node, expected); } diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 702d757761a..86cc3db538c 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -42,9 +42,12 @@ protected: */ class ParserIdentifier : public IParserBase { +public: + ParserIdentifier(bool allow_query_parameter_ = false) : allow_query_parameter(allow_query_parameter_) {} protected: const char * getName() const override { return "identifier"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool allow_query_parameter; }; @@ -54,12 +57,16 @@ protected: class ParserCompoundIdentifier : public IParserBase { public: - ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false) - : table_name_with_optional_uuid(table_name_with_optional_uuid_) {} + ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false) + : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_) + { + } + protected: const char * getName() const override { return "compound identifier"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool table_name_with_optional_uuid; + bool allow_query_parameter; }; /// Just * @@ -299,6 +306,17 @@ private: }; +/** Prepared statements. + * Parse query with parameter expression {name:type}. + */ +class ParserIdentifierOrSubstitution : public IParserBase +{ +protected: + const char * getName() const override { return "identifier substitution"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + /** Prepared statements. * Parse query with parameter expression {name:type}. */ diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index a13baf69420..1264acefe64 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -23,7 +23,7 @@ bool ParserTableExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!ParserWithOptionalAlias(std::make_unique(), true).parse(pos, res->subquery, expected) && !ParserWithOptionalAlias(std::make_unique(), true).parse(pos, res->table_function, expected) - && !ParserWithOptionalAlias(std::make_unique(), true).parse(pos, res->database_and_table_name, expected)) + && !ParserWithOptionalAlias(std::make_unique(false, true), true).parse(pos, res->database_and_table_name, expected)) return false; /// FINAL diff --git a/tests/queries/0_stateless/01550_query_identifier_parameters.reference b/tests/queries/0_stateless/01550_query_identifier_parameters.reference new file mode 100644 index 00000000000..751ee1ae00e --- /dev/null +++ b/tests/queries/0_stateless/01550_query_identifier_parameters.reference @@ -0,0 +1,5 @@ +0 +0 +0 +0 +45 diff --git a/tests/queries/0_stateless/01550_query_identifier_parameters.sh b/tests/queries/0_stateless/01550_query_identifier_parameters.sh new file mode 100755 index 00000000000..85ca67e4e3c --- /dev/null +++ b/tests/queries/0_stateless/01550_query_identifier_parameters.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --param_tbl 'numbers' --query 'select * from system.{tbl:Identifier} limit 1' +$CLICKHOUSE_CLIENT --param_db 'system' --param_tbl 'numbers' --query 'select * from {db:Identifier}.{tbl:Identifier} limit 1' +$CLICKHOUSE_CLIENT --param_col 'number' --query 'select {col:Identifier} from system.numbers limit 1' +$CLICKHOUSE_CLIENT --param_col 'number' --query 'select a.{col:Identifier} from system.numbers a limit 1' +$CLICKHOUSE_CLIENT --param_tbl 'numbers' --param_col 'number' --query 'select sum({tbl:Identifier}.{col:Identifier}) FROM (select * from system.{tbl:Identifier} limit 10) numbers' From 5633dfb402eb1a0da62656efc9946d273a4dec30 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 2 Nov 2020 16:15:40 +0300 Subject: [PATCH 21/37] Update ExpressionElementParsers.h --- src/Parsers/ExpressionElementParsers.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 86cc3db538c..09c43a1f769 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -39,6 +39,7 @@ protected: /** An identifier, for example, x_yz123 or `something special` + * If allow_query_parameter_ = true, also parses substitutions in form {name:Identifier} */ class ParserIdentifier : public IParserBase { @@ -312,7 +313,7 @@ private: class ParserIdentifierOrSubstitution : public IParserBase { protected: - const char * getName() const override { return "identifier substitution"; } + const char * getName() const override { return "identifier or substitution"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; From 5e512568977143f2a5172363a1b85d54d615eb4c Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 2 Nov 2020 23:04:18 +0800 Subject: [PATCH 22/37] Fix warning --- src/Parsers/ExpressionElementParsers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 1761b0b4358..d69e421e6cf 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -234,7 +234,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex for (const auto & child : list.children) { parts.emplace_back(getIdentifierName(child)); - if (parts.back() == "") + if (parts.back().empty()) params.push_back(child->as()->getParam()); } From ac32809b6ab99d685a1025e92e3ce2a903ced70e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 1 Nov 2020 20:38:43 +0300 Subject: [PATCH 23/37] fix #16482 --- src/Databases/DatabaseAtomic.cpp | 26 +++++++++++-------- src/Databases/DatabaseAtomic.h | 2 +- src/Databases/DatabaseOnDisk.cpp | 2 +- src/Storages/IStorage.h | 4 +++ src/Storages/MergeTree/MergeTreeData.h | 1 + src/Storages/StorageDistributed.h | 1 + src/Storages/StorageFile.cpp | 5 ++++ src/Storages/StorageFile.h | 1 + src/Storages/StorageLog.h | 1 + src/Storages/StorageProxy.h | 1 + src/Storages/StorageSet.h | 1 + src/Storages/StorageStripeLog.h | 1 + src/Storages/StorageTinyLog.h | 1 + .../0_stateless/01114_database_atomic.sh | 1 + 14 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 4fcd9f12276..af8b751e787 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -114,7 +114,8 @@ void DatabaseAtomic::dropTable(const Context &, const String & table_name, bool DatabaseWithDictionaries::detachTableUnlocked(table_name, lock); /// Should never throw table_name_to_path.erase(table_name); } - tryRemoveSymlink(table_name); + if (table->storesDataOnDisk()) + tryRemoveSymlink(table_name); /// Remove the inner table (if any) to avoid deadlock /// (due to attempt to execute DROP from the worker thread) if (auto * mv = dynamic_cast(table.get())) @@ -145,7 +146,7 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n String old_metadata_path = getObjectMetadataPath(table_name); String new_metadata_path = to_database.getObjectMetadataPath(to_table_name); - auto detach = [](DatabaseAtomic & db, const String & table_name_) + auto detach = [](DatabaseAtomic & db, const String & table_name_, bool has_symlink) { auto it = db.table_name_to_path.find(table_name_); String table_data_path_saved; @@ -155,7 +156,7 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n assert(!table_data_path_saved.empty() || db.dictionaries.find(table_name_) != db.dictionaries.end()); db.tables.erase(table_name_); db.table_name_to_path.erase(table_name_); - if (!table_data_path_saved.empty()) + if (has_symlink) db.tryRemoveSymlink(table_name_); return table_data_path_saved; }; @@ -166,7 +167,8 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n if (table_data_path_.empty()) return; db.table_name_to_path.emplace(table_name_, table_data_path_); - db.tryCreateSymlink(table_name_, table_data_path_); + if (table_->storesDataOnDisk()) + db.tryCreateSymlink(table_name_, table_data_path_); }; auto assert_can_move_mat_view = [inside_database](const StoragePtr & table_) @@ -228,9 +230,9 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n renameNoReplace(old_metadata_path, new_metadata_path); /// After metadata was successfully moved, the following methods should not throw (if them do, it's a logical error) - table_data_path = detach(*this, table_name); + table_data_path = detach(*this, table_name, table->storesDataOnDisk()); if (exchange) - other_table_data_path = detach(other_db, to_table_name); + other_table_data_path = detach(other_db, to_table_name, other_table->storesDataOnDisk()); auto old_table_id = table->getStorageID(); @@ -286,7 +288,8 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora DatabaseCatalog::instance().removeUUIDMappingFinally(query.uuid); throw; } - tryCreateSymlink(query.table, table_data_path); + if (table->storesDataOnDisk()) + tryCreateSymlink(query.table, table_data_path); } void DatabaseAtomic::commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path) @@ -383,17 +386,18 @@ void DatabaseAtomic::loadStoredObjects(Context & context, bool has_force_restore Poco::File(path_to_table_symlinks).createDirectories(); for (const auto & table : table_names) - tryCreateSymlink(table.first, table.second); + tryCreateSymlink(table.first, table.second, true); } } -void DatabaseAtomic::tryCreateSymlink(const String & table_name, const String & actual_data_path) +void DatabaseAtomic::tryCreateSymlink(const String & table_name, const String & actual_data_path, bool if_data_path_exist) { try { String link = path_to_table_symlinks + escapeForFileName(table_name); - String data = Poco::Path(global_context.getPath()).makeAbsolute().toString() + actual_data_path; - Poco::File{data}.linkTo(link, Poco::File::LINK_SYMBOLIC); + Poco::File data = Poco::Path(global_context.getPath()).makeAbsolute().toString() + actual_data_path; + if (!if_data_path_exist || data.exists()) + data.linkTo(link, Poco::File::LINK_SYMBOLIC); } catch (...) { diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 2d90ed96f1d..82408ff3ab3 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -55,7 +55,7 @@ public: UUID tryGetTableUUID(const String & table_name) const override; - void tryCreateSymlink(const String & table_name, const String & actual_data_path); + void tryCreateSymlink(const String & table_name, const String & actual_data_path, bool if_data_path_exist = false); void tryRemoveSymlink(const String & table_name); void waitDetachedTableNotInUse(const UUID & uuid); diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 8d9f222bf69..4d7fcd4e248 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -321,7 +321,7 @@ void DatabaseOnDisk::renameTable( /// Special case: usually no actions with symlinks are required when detaching/attaching table, /// but not when moving from Atomic database to Ordinary - if (from_atomic_to_ordinary) + if (from_atomic_to_ordinary && table->storesDataOnDisk()) { auto & atomic_db = assert_cast(*this); atomic_db.tryRemoveSymlink(table_name); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 836e2d7dcf1..fd73ae2ccd2 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -449,6 +449,10 @@ public: /// We do not use mutex because it is not very important that the size could change during the operation. virtual void checkPartitionCanBeDropped(const ASTPtr & /*partition*/) {} + /// Returns true if Storage may store some data on disk. + /// NOTE: may not be equivalent to !getDataPaths().empty() + virtual bool storesDataOnDisk() const { return false; } + /// Returns data paths if storage supports it, empty vector otherwise. virtual Strings getDataPaths() const { return {}; } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 5c18661dad1..a8371a2dca2 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -618,6 +618,7 @@ public: /// `additional_path` can be set if part is not located directly in table data path (e.g. 'detached/') std::optional getFullRelativePathForPart(const String & part_name, const String & additional_path = "") const; + bool storesDataOnDisk() const override { return true; } Strings getDataPaths() const override; using PathsWithDisks = std::vector; diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index dd043e5feeb..5adc4ca5627 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -82,6 +82,7 @@ public: void shutdown() override; void drop() override; + bool storesDataOnDisk() const override { return true; } Strings getDataPaths() const override; const ExpressionActionsPtr & getShardingKeyExpr() const { return sharding_key_expr; } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 8c7cd7b63d7..0cc3aa807c4 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -524,6 +524,11 @@ BlockOutputStreamPtr StorageFile::write( chooseCompressionMethod(path, compression_method), context); } +bool StorageFile::storesDataOnDisk() const +{ + return is_db_table; +} + Strings StorageFile::getDataPaths() const { if (paths.empty()) diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index f331538b4c7..eb6f0cb4d87 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -46,6 +46,7 @@ public: void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; + bool storesDataOnDisk() const override; Strings getDataPaths() const override; struct CommonArguments diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 49fc9a576c5..306383a1235 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -41,6 +41,7 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override; + bool storesDataOnDisk() const override { return true; } Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } protected: diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 7b010476b22..e2a6438ecfe 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -144,6 +144,7 @@ public: CheckResults checkData(const ASTPtr & query , const Context & context) override { return getNested()->checkData(query, context); } void checkTableCanBeDropped() const override { getNested()->checkTableCanBeDropped(); } void checkPartitionCanBeDropped(const ASTPtr & partition) override { getNested()->checkPartitionCanBeDropped(partition); } + bool storesDataOnDisk() const override { return getNested()->storesDataOnDisk(); } Strings getDataPaths() const override { return getNested()->getDataPaths(); } StoragePolicyPtr getStoragePolicy() const override { return getNested()->getStoragePolicy(); } std::optional totalRows() const override { return getNested()->totalRows(); } diff --git a/src/Storages/StorageSet.h b/src/Storages/StorageSet.h index 98677dcfb15..9a734b50002 100644 --- a/src/Storages/StorageSet.h +++ b/src/Storages/StorageSet.h @@ -24,6 +24,7 @@ public: BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; + bool storesDataOnDisk() const override { return true; } Strings getDataPaths() const override { return {path}; } protected: diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index f88120a932e..ea2b1693169 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -40,6 +40,7 @@ public: CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) override; + bool storesDataOnDisk() const override { return true; } Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder&) override; diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index dc6ff101503..39c2994aaed 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -39,6 +39,7 @@ public: CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) override; + bool storesDataOnDisk() const override { return true; } Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override; diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index d220491d152..2a3a171b724 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -3,6 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh + $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS test_01114_1" $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS test_01114_2" $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS test_01114_3" From 347782966b14c5c97fef505383a8b12371cb9181 Mon Sep 17 00:00:00 2001 From: Fabiano Francesconi Date: Mon, 2 Nov 2020 18:36:54 +0100 Subject: [PATCH 24/37] Added Teralytics to adopters.md Added Teralytics to list of adopters. --- docs/en/introduction/adopters.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 24705a52a9a..d08e7833b33 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -90,6 +90,7 @@ toc_title: Adopters | Splunk | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | | Spotify | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | | Staffcop | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) | +| Teralytics | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) | | Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | | Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | | Traffic Stars | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | From 3be8a56f5cdad9f1b514127a93f97f82509edda7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 29 Oct 2020 22:28:46 +0300 Subject: [PATCH 25/37] Add log_queries_min_query_duration_ms Only queries slower then the value of this setting will go to system.query_log, i.e. something like slow_query_log in mysql. v2: log_queries_min_time renamed to log_queries_min_query_duration_ms v3: add current_database into system.query_thread_log v4: rewrite test using current_database v5: fix query_duration_ms in system.query_thread_log --- docs/en/operations/settings/settings.md | 15 ++++++ src/Common/ThreadStatus.h | 2 +- src/Core/Settings.h | 1 + src/Interpreters/ThreadStatusExt.cpp | 18 ++++--- src/Interpreters/executeQuery.cpp | 23 +++++--- ...og_queries_min_query_duration_ms.reference | 4 ++ ...1546_log_queries_min_query_duration_ms.sql | 54 +++++++++++++++++++ 7 files changed, 102 insertions(+), 15 deletions(-) create mode 100644 tests/queries/0_stateless/01546_log_queries_min_query_duration_ms.reference create mode 100644 tests/queries/0_stateless/01546_log_queries_min_query_duration_ms.sql diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index a4bd7d77bfc..359a3579e46 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -680,6 +680,21 @@ Example: log_queries=1 ``` +## log_queries_min_query_duration_ms {#settings-log-queries-min-query-duration-ms} + +Minimal time for the query to run to get to the following tables: + +- `system.query_log` +- `system.query_thread_log` + +Only the queries with the following type will get to the log: + +- `QUERY_FINISH` +- `EXCEPTION_WHILE_PROCESSING` + +- Type: milliseconds +- Default value: 0 (any query) + ## log_queries_min_type {#settings-log-queries-min-type} `query_log` minimal type to log. diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 820ea449d66..1b4d20e9721 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -172,7 +172,7 @@ protected: void finalizeQueryProfiler(); - void logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database); + void logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database, std::chrono::time_point now); void assertState(const std::initializer_list & permitted_states, const char * description = nullptr) const; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 896fcaaca82..dabd3ed8b45 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -150,6 +150,7 @@ class IColumn; \ M(Bool, log_queries, 1, "Log requests and write the log to the system table.", 0) \ M(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "Minimal type in query_log to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \ + M(Milliseconds, log_queries_min_query_duration_ms, 0, "Minimal time for the query to run, to get to the query_log/query_thread_log.", 0) \ M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \ \ M(DistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, "How are distributed subqueries performed inside IN or JOIN sections?", IMPORTANT) \ diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index ffb9e140ce6..adb9a38b10d 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -242,8 +242,15 @@ void ThreadStatus::finalizePerformanceCounters() { const auto & settings = query_context->getSettingsRef(); if (settings.log_queries && settings.log_query_threads) - if (auto thread_log = global_context->getQueryThreadLog()) - logToQueryThreadLog(*thread_log, query_context->getCurrentDatabase()); + { + const auto now = std::chrono::system_clock::now(); + Int64 query_duration_ms = (time_in_microseconds(now) - query_start_time_microseconds) / 1000; + if (query_duration_ms >= settings.log_queries_min_query_duration_ms.totalMilliseconds()) + { + if (auto thread_log = global_context->getQueryThreadLog()) + logToQueryThreadLog(*thread_log, query_context->getCurrentDatabase(), now); + } + } } } catch (...) @@ -322,15 +329,14 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) #endif } -void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database) +void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database, std::chrono::time_point now) { QueryThreadLogElement elem; // construct current_time and current_time_microseconds using the same time point // so that the two times will always be equal up to a precision of a second. - const auto now = std::chrono::system_clock::now(); - auto current_time = time_in_seconds(now); - auto current_time_microseconds = time_in_microseconds(now); + auto current_time = time_in_seconds(now); + auto current_time_microseconds = time_in_microseconds(now); elem.event_time = current_time; elem.event_time_microseconds = current_time_microseconds; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 5feff841ca9..52940a337cb 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -241,7 +241,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c /// Update performance counters before logging to query_log CurrentThread::finalizePerformanceCounters(); - if (settings.log_queries && elem.type >= settings.log_queries_min_type) + if (settings.log_queries && elem.type >= settings.log_queries_min_type && !settings.log_queries_min_query_duration_ms.totalMilliseconds()) if (auto query_log = context.getQueryLog()) query_log->add(elem); @@ -552,7 +552,7 @@ static std::tuple executeQueryImpl( if (settings.log_query_settings) elem.query_settings = std::make_shared(context.getSettingsRef()); - if (elem.type >= settings.log_queries_min_type) + if (elem.type >= settings.log_queries_min_type && !settings.log_queries_min_query_duration_ms.totalMilliseconds()) { if (auto query_log = context.getQueryLog()) query_log->add(elem); @@ -588,8 +588,12 @@ static std::tuple executeQueryImpl( }; /// Also make possible for caller to log successful query finish and exception during execution. - auto finish_callback = [elem, &context, ast, log_queries, log_queries_min_type = settings.log_queries_min_type, - status_info_to_query_log] + auto finish_callback = [elem, &context, ast, + log_queries, + log_queries_min_type = settings.log_queries_min_type, + log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), + status_info_to_query_log + ] (IBlockInputStream * stream_in, IBlockOutputStream * stream_out, QueryPipeline * query_pipeline) mutable { QueryStatus * process_list_elem = context.getProcessListElement(); @@ -655,7 +659,7 @@ static std::tuple executeQueryImpl( elem.thread_ids = std::move(info.thread_ids); elem.profile_counters = std::move(info.profile_counters); - if (log_queries && elem.type >= log_queries_min_type) + if (log_queries && elem.type >= log_queries_min_type && Int64(elem.query_duration_ms) >= log_queries_min_query_duration_ms) { if (auto query_log = context.getQueryLog()) query_log->add(elem); @@ -694,8 +698,11 @@ static std::tuple executeQueryImpl( } }; - auto exception_callback = [elem, &context, ast, log_queries, log_queries_min_type = settings.log_queries_min_type, quota(quota), - status_info_to_query_log] () mutable + auto exception_callback = [elem, &context, ast, + log_queries, + log_queries_min_type = settings.log_queries_min_type, + log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), + quota(quota), status_info_to_query_log] () mutable { if (quota) quota->used(Quota::ERRORS, 1, /* check_exceeded = */ false); @@ -729,7 +736,7 @@ static std::tuple executeQueryImpl( logException(context, elem); /// In case of exception we log internal queries also - if (log_queries && elem.type >= log_queries_min_type) + if (log_queries && elem.type >= log_queries_min_type && Int64(elem.query_duration_ms) >= log_queries_min_query_duration_ms) { if (auto query_log = context.getQueryLog()) query_log->add(elem); diff --git a/tests/queries/0_stateless/01546_log_queries_min_query_duration_ms.reference b/tests/queries/0_stateless/01546_log_queries_min_query_duration_ms.reference new file mode 100644 index 00000000000..0463db26710 --- /dev/null +++ b/tests/queries/0_stateless/01546_log_queries_min_query_duration_ms.reference @@ -0,0 +1,4 @@ +0 +0 +1 +1 diff --git a/tests/queries/0_stateless/01546_log_queries_min_query_duration_ms.sql b/tests/queries/0_stateless/01546_log_queries_min_query_duration_ms.sql new file mode 100644 index 00000000000..f0f681288cf --- /dev/null +++ b/tests/queries/0_stateless/01546_log_queries_min_query_duration_ms.sql @@ -0,0 +1,54 @@ +set log_queries_min_query_duration_ms=300000; +set log_query_threads=1; +set log_queries=1; + +-- +-- fast -- no logging +-- +select '01546_log_queries_min_query_duration_ms-fast' format Null; +system flush logs; + +-- No logging, since the query is fast enough. +select count() +from system.query_log +where + query like '%01546_log_queries_min_query_duration_ms-fast%' + and query not like '%system.query_log%' + and current_database = currentDatabase() + and event_date = today() + and event_time >= now() - interval 1 minute; +select count() +from system.query_thread_log +where + query like '%01546_log_queries_min_query_duration_ms-fast%' + and query not like '%system.query_thread_log%' + and current_database = currentDatabase() + and event_date = today() + and event_time >= now() - interval 1 minute; + +-- +-- slow -- query logged +-- +set log_queries_min_query_duration_ms=300; +select '01546_log_queries_min_query_duration_ms-slow', sleep(0.4) format Null; +system flush logs; + +-- With the limit on minimum execution time, "query start" and "exception before start" events are not logged, only query finish. +select count() +from system.query_log +where + query like '%01546_log_queries_min_query_duration_ms-slow%' + and query not like '%system.query_log%' + and current_database = currentDatabase() + and event_date = today() + and event_time >= now() - interval 1 minute; +-- There at least two threads involved in a simple query +-- (one thread just waits another, sigh) +select count() == 2 +from system.query_thread_log +where + query like '%01546_log_queries_min_query_duration_ms-slow%' + and query not like '%system.query_thread_log%' + and current_database = currentDatabase() + and event_date = today() + and event_time >= now() - interval 1 minute; From e6d8ab2270532e3ed5171d9ee1e8b52ca2843f82 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Nov 2020 21:37:23 +0300 Subject: [PATCH 26/37] Fix possible name collision in ALTER --- src/Interpreters/InterpreterCreateQuery.cpp | 3 ++- src/Storages/AlterCommands.cpp | 9 ++++----- .../0_stateless/01552_alter_name_collision.reference | 0 tests/queries/0_stateless/01552_alter_name_collision.sql | 3 +++ 4 files changed, 9 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/01552_alter_name_collision.reference create mode 100644 tests/queries/0_stateless/01552_alter_name_collision.sql diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ddb1d738031..6a8bdbea1ec 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -362,7 +363,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.type) { const auto & final_column_name = col_decl.name; - const auto tmp_column_name = final_column_name + "_tmp"; + const auto tmp_column_name = final_column_name + "_tmp_alter" + toString(randomSeed()); const auto * data_type_ptr = column_names_and_types.back().type.get(); default_expr_list->children.emplace_back( diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 8cae7866748..559121d6911 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -22,12 +22,11 @@ #include #include #include +#include #include #include #include - - -#include +#include namespace DB @@ -1117,7 +1116,7 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con data_type_ptr = command.data_type; const auto & final_column_name = column_name; - const auto tmp_column_name = final_column_name + "_tmp"; + const auto tmp_column_name = final_column_name + "_tmp_alter" + toString(randomSeed()); default_expr_list->children.emplace_back(setAlias( addTypeConversionToAST(std::make_shared(tmp_column_name), data_type_ptr->getName()), @@ -1133,7 +1132,7 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con continue; const auto & final_column_name = column_name; - const auto tmp_column_name = final_column_name + "_tmp"; + const auto tmp_column_name = final_column_name + "_tmp_alter" + toString(randomSeed()); const auto data_type_ptr = command.data_type; default_expr_list->children.emplace_back(setAlias( diff --git a/tests/queries/0_stateless/01552_alter_name_collision.reference b/tests/queries/0_stateless/01552_alter_name_collision.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01552_alter_name_collision.sql b/tests/queries/0_stateless/01552_alter_name_collision.sql new file mode 100644 index 00000000000..dc717f1071a --- /dev/null +++ b/tests/queries/0_stateless/01552_alter_name_collision.sql @@ -0,0 +1,3 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test(test String DEFAULT 'test', test_tmp Int DEFAULT 1)ENGINE = Memory; +DROP TABLE test; From 3bc8ff6af6b2e8ccbbb1b74f2819bf6d9e266643 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Nov 2020 21:52:04 +0300 Subject: [PATCH 27/37] Add a test for #1148 --- .../01552_dict_fixedstring.reference | 2 ++ .../0_stateless/01552_dict_fixedstring.sql | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tests/queries/0_stateless/01552_dict_fixedstring.reference create mode 100644 tests/queries/0_stateless/01552_dict_fixedstring.sql diff --git a/tests/queries/0_stateless/01552_dict_fixedstring.reference b/tests/queries/0_stateless/01552_dict_fixedstring.reference new file mode 100644 index 00000000000..2f877f0c754 --- /dev/null +++ b/tests/queries/0_stateless/01552_dict_fixedstring.reference @@ -0,0 +1,2 @@ + +Hello\0World diff --git a/tests/queries/0_stateless/01552_dict_fixedstring.sql b/tests/queries/0_stateless/01552_dict_fixedstring.sql new file mode 100644 index 00000000000..7e0269f8e50 --- /dev/null +++ b/tests/queries/0_stateless/01552_dict_fixedstring.sql @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS src; + +CREATE TABLE src (k UInt64, s FixedString(11)) ENGINE = Memory; +INSERT INTO src VALUES (1, 'Hello\0World'); + +DROP DICTIONARY IF EXISTS dict; +CREATE DICTIONARY dict +( + k UInt64, + s String +) +PRIMARY KEY k +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER default TABLE 'src')) +LAYOUT(FLAT) +LIFETIME(MIN 10 MAX 10); + +SELECT dictGet(currentDatabase() || '.dict', 's', number) FROM numbers(2); + +DROP TABLE src; +DROP DICTIONARY dict; From 878fd466c5c141ad6478524f13b2646f0210eb3d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Nov 2020 22:23:26 +0300 Subject: [PATCH 28/37] Apply SETTINGS clause earlier --- src/Interpreters/executeQuery.cpp | 41 ++++++++++++------ .../01553_settings_early_apply.reference | 43 +++++++++++++++++++ .../01553_settings_early_apply.sql | 8 ++++ 3 files changed, 79 insertions(+), 13 deletions(-) create mode 100644 tests/queries/0_stateless/01553_settings_early_apply.reference create mode 100644 tests/queries/0_stateless/01553_settings_early_apply.sql diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 5feff841ca9..7697bbda36b 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -337,6 +338,27 @@ static std::tuple executeQueryImpl( /// TODO Parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + /// Interpret SETTINGS clauses as early as possible (before invoking the corresponding interpreter), + /// to allow settings to take effect. + if (const auto * select_query = ast->as()) + { + if (auto new_settings = select_query->settings()) + InterpreterSetQuery(new_settings, context).executeForCurrentContext(); + } + else if (const auto * select_with_union_query = ast->as()) + { + if (!select_with_union_query->list_of_selects->children.empty()) + { + if (auto new_settings = select_with_union_query->list_of_selects->children.back()->as()->settings()) + InterpreterSetQuery(new_settings, context).executeForCurrentContext(); + } + } + else if (const auto * query_with_output = dynamic_cast(ast.get())) + { + if (query_with_output->settings_ast) + InterpreterSetQuery(query_with_output->settings_ast, context).executeForCurrentContext(); + } + auto * insert_query = ast->as(); if (insert_query && insert_query->settings_ast) @@ -802,12 +824,12 @@ BlockIO executeQuery( } BlockIO executeQuery( - const String & query, - Context & context, - bool internal, - QueryProcessingStage::Enum stage, - bool may_have_embedded_data, - bool allow_processors) + const String & query, + Context & context, + bool internal, + QueryProcessingStage::Enum stage, + bool may_have_embedded_data, + bool allow_processors) { BlockIO res = executeQuery(query, context, internal, stage, may_have_embedded_data); @@ -876,7 +898,6 @@ void executeQuery( } else if (streams.in) { - /// FIXME: try to prettify this cast using `as<>()` const auto * ast_query_with_output = dynamic_cast(ast.get()); WriteBuffer * out_buf = &ostr; @@ -895,9 +916,6 @@ void executeQuery( ? getIdentifierName(ast_query_with_output->format) : context.getDefaultFormat(); - if (ast_query_with_output && ast_query_with_output->settings_ast) - InterpreterSetQuery(ast_query_with_output->settings_ast, context).executeForCurrentContext(); - BlockOutputStreamPtr out = context.getOutputFormat(format_name, *out_buf, streams.in->getHeader()); /// Save previous progress callback if any. TODO Do it more conveniently. @@ -936,9 +954,6 @@ void executeQuery( ? getIdentifierName(ast_query_with_output->format) : context.getDefaultFormat(); - if (ast_query_with_output && ast_query_with_output->settings_ast) - InterpreterSetQuery(ast_query_with_output->settings_ast, context).executeForCurrentContext(); - if (!pipeline.isCompleted()) { pipeline.addSimpleTransform([](const Block & header) diff --git a/tests/queries/0_stateless/01553_settings_early_apply.reference b/tests/queries/0_stateless/01553_settings_early_apply.reference new file mode 100644 index 00000000000..3dad208be5d --- /dev/null +++ b/tests/queries/0_stateless/01553_settings_early_apply.reference @@ -0,0 +1,43 @@ + number + + 0 + number + + 1 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + } + ], + + "data": + [ + ["0"], + ["1"], + ["2"], + ["3"], + ["4"], + ["5"], + ["6"], + ["7"], + ["8"], + ["9"] + ], + + "rows": 10, + + "rows_before_limit_at_least": 10 +} diff --git a/tests/queries/0_stateless/01553_settings_early_apply.sql b/tests/queries/0_stateless/01553_settings_early_apply.sql new file mode 100644 index 00000000000..48178c38f33 --- /dev/null +++ b/tests/queries/0_stateless/01553_settings_early_apply.sql @@ -0,0 +1,8 @@ +select * from numbers(100) settings max_result_rows = 1; -- { serverError 396 } +select * from numbers(100) FORMAT JSON settings max_result_rows = 1; -- { serverError 396 } + +SET max_result_rows = 1; +select * from numbers(10); -- { serverError 396 } +select * from numbers(10) SETTINGS result_overflow_mode = 'break', max_block_size = 1 FORMAT PrettySpaceNoEscapes; +select * from numbers(10) settings max_result_rows = 10; +select * from numbers(10) FORMAT JSONCompact settings max_result_rows = 10, output_format_write_statistics = 0; From bdd453c54dcc78638c4bfe2e36af5c1ee577301c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 3 Nov 2020 00:47:45 +0300 Subject: [PATCH 29/37] Add 01546_log_queries_min_query_duration_ms into arcadia_skip_list In attempt to fix "Yandex synchronization check (only for Yandex employees)" --- tests/queries/0_stateless/arcadia_skip_list.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index f5b81c08520..f6d899a5742 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -155,5 +155,6 @@ 01509_dictionary_preallocate 01526_max_untracked_memory 01530_drop_database_atomic_sync +01546_log_queries_min_query_duration_ms 01547_query_log_current_database 01548_query_log_query_execution_ms From 5d2c3d177ec28ba9cf22e4b5f10339f91ea96142 Mon Sep 17 00:00:00 2001 From: Yingchun Lai <405403881@qq.com> Date: Tue, 3 Nov 2020 15:53:44 +0800 Subject: [PATCH 30/37] Update query_log.md --- docs/zh/operations/system-tables/query_log.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/operations/system-tables/query_log.md b/docs/zh/operations/system-tables/query_log.md index bf3aa063a83..6d8d7a39699 100644 --- a/docs/zh/operations/system-tables/query_log.md +++ b/docs/zh/operations/system-tables/query_log.md @@ -3,7 +3,7 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -# 系统。query_log {#system_tables-query_log} +# system.query_log {#system_tables-query_log} 包含有关已执行查询的信息,例如,开始时间、处理持续时间、错误消息。 @@ -140,4 +140,4 @@ Settings.Values: ['0','random','1','10000000000'] **另请参阅** -- [系统。query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. +- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. From 4d8fb3842d48047be16e758d58b1e2b44c203384 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 3 Nov 2020 13:24:51 +0300 Subject: [PATCH 31/37] Fix strange test --- tests/queries/0_stateless/01093_cyclic_defaults_filimonov.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01093_cyclic_defaults_filimonov.sql b/tests/queries/0_stateless/01093_cyclic_defaults_filimonov.sql index 548cd794ba3..f5f88db9d66 100644 --- a/tests/queries/0_stateless/01093_cyclic_defaults_filimonov.sql +++ b/tests/queries/0_stateless/01093_cyclic_defaults_filimonov.sql @@ -1,7 +1,7 @@ CREATE TABLE test ( - `a1` UInt64 DEFAULT a + 1, - `a1` UInt64 DEFAULT a + 1, + `a0` UInt64 DEFAULT a1 + 1, + `a1` UInt64 DEFAULT a0 + 1, `a2` UInt64 DEFAULT a3 + a4, `a3` UInt64 DEFAULT a2 + 1, `a4` UInt64 ALIAS a3 + 1 From 888440fe8f2ea474b3ff6ba145fe56e42a80bd6d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 3 Nov 2020 13:41:56 +0300 Subject: [PATCH 32/37] Skip test in Arcadia --- tests/queries/0_stateless/arcadia_skip_list.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index f5b81c08520..eb9c65885d9 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -157,3 +157,4 @@ 01530_drop_database_atomic_sync 01547_query_log_current_database 01548_query_log_query_execution_ms +01552_dict_fixedstring From a8f325ba84836a01f1d4c3610ec9150a439f15d2 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 3 Nov 2020 14:41:46 +0300 Subject: [PATCH 33/37] Remove redundant check --- src/Interpreters/executeQuery.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index a672b58633d..5a249c15799 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -461,9 +461,6 @@ static std::tuple executeQueryImpl( QueryPipeline & pipeline = res.pipeline; bool use_processors = pipeline.initialized(); - if (res.pipeline.initialized()) - use_processors = true; - if (const auto * insert_interpreter = typeid_cast(&*interpreter)) { /// Save insertion table (not table function). TODO: support remote() table function. From 993af08779180820cde39fc4a80d346d5fad7110 Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Tue, 3 Nov 2020 18:36:25 +0300 Subject: [PATCH 34/37] DOCSUP-1829: Update the system.columns table description (#16576) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update order-by.md Задокументировал параметры OFFSET и FETCH. * Update prewhere.md Поправляю битые ссылки. * Update prewhere.md Вернул изменения назад. * Update order-by.md Пытаюсь исправить битые ссылки. * Update columns.md Добавил position и compression_codec. * Translation columns.md Сделал перевод на русский язык. * Delete changes order-by.md Удалил изменения в файле order-by. Co-authored-by: Dmitriy --- docs/en/operations/system-tables/columns.md | 30 +++++++++++---------- docs/ru/operations/system-tables/columns.md | 30 +++++++++++---------- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index 4d8077ddeac..92cbdd19ca8 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -6,19 +6,21 @@ You can use this table to get information similar to the [DESCRIBE TABLE](../../ The `system.columns` table contains the following columns (the column type is shown in brackets): -- `database` (String) — Database name. -- `table` (String) — Table name. -- `name` (String) — Column name. -- `type` (String) — Column type. -- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. -- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined. -- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes. -- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes. -- `marks_bytes` (UInt64) — The size of marks, in bytes. -- `comment` (String) — Comment on the column, or an empty string if it is not defined. -- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression. -- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. -- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. -- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. +- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Column name. +- `type` ([String](../../sql-reference/data-types/string.md)) — Column type. +- `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. +- `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. +- `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of marks, in bytes. +- `comment` ([String](../../sql-reference/data-types/string.md)) — Comment on the column, or an empty string if it is not defined. +- `is_in_partition_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the partition expression. +- `is_in_sorting_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sorting key expression. +- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the primary key expression. +- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression. +- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name. [Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) diff --git a/docs/ru/operations/system-tables/columns.md b/docs/ru/operations/system-tables/columns.md index b476d6907f8..8cb9408e7d8 100644 --- a/docs/ru/operations/system-tables/columns.md +++ b/docs/ru/operations/system-tables/columns.md @@ -6,19 +6,21 @@ Таблица `system.columns` содержит столбцы (тип столбца указан в скобках): -- `database` (String) — имя базы данных. -- `table` (String) — имя таблицы. -- `name` (String) — имя столбца. -- `type` (String) — тип столбца. -- `default_kind` (String) — тип выражения (`DEFAULT`, `MATERIALIZED`, `ALIAS`) значения по умолчанию, или пустая строка. -- `default_expression` (String) — выражение для значения по умолчанию или пустая строка. -- `data_compressed_bytes` (UInt64) — размер сжатых данных в байтах. -- `data_uncompressed_bytes` (UInt64) — размер распакованных данных в байтах. -- `marks_bytes` (UInt64) — размер засечек в байтах. -- `comment` (String) — комментарий к столбцу или пустая строка. -- `is_in_partition_key` (UInt8) — флаг, показывающий включение столбца в ключ партиционирования. -- `is_in_sorting_key` (UInt8) — флаг, показывающий включение столбца в ключ сортировки. -- `is_in_primary_key` (UInt8) — флаг, показывающий включение столбца в первичный ключ. -- `is_in_sampling_key` (UInt8) — флаг, показывающий включение столбца в ключ выборки. +- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных. +- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы. +- `name` ([String](../../sql-reference/data-types/string.md)) — имя столбца. +- `type` ([String](../../sql-reference/data-types/string.md)) — тип столбца. +- `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — порядковый номер столбца в таблице (нумерация начинается с 1). +- `default_kind` ([String](../../sql-reference/data-types/string.md)) — тип выражения (`DEFAULT`, `MATERIALIZED`, `ALIAS`) для значения по умолчанию или пустая строка. +- `default_expression` ([String](../../sql-reference/data-types/string.md)) — выражение для значения по умолчанию или пустая строка. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер сжатых данных в байтах. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер распакованных данных в байтах. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер засечек в байтах. +- `comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к столбцу или пустая строка. +- `is_in_partition_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ партиционирования. +- `is_in_sorting_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ сортировки. +- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в первичный ключ. +- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ выборки. +- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — имя кодека сжатия. [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/columns) From 2fae1c3c315e59a1de91096dd528e4bf577fd0f0 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Tue, 3 Nov 2020 18:59:21 +0300 Subject: [PATCH 35/37] DOCSUP-3171: Document the formatRowNoNewline function (#16603) * add description in English * minor changes * changes in EN and added RU version --- .../functions/type-conversion-functions.md | 38 ++++++++++++++++++ .../functions/type-conversion-functions.md | 39 +++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index ba8d379ccec..e4a47e2c620 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -780,4 +780,42 @@ Result: └──────────────────────────────────┘ ``` +## formatRowNoNewline {#formatrownonewline} + +Converts arbitrary expressions into a string via given format. The function trims the last `\n` if any. + +**Syntax** + +``` sql +formatRowNoNewline(format, x, y, ...) +``` + +**Parameters** + +- `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). +- `x`,`y`, ... — Expressions. + +**Returned value** + +- A formatted string. + +**Example** + +Query: + +``` sql +SELECT formatRowNoNewline('CSV', number, 'good') +FROM numbers(3) +``` + +Result: + +``` text +┌─formatRowNoNewline('CSV', number, 'good')─┐ +│ 0,"good" │ +│ 1,"good" │ +│ 2,"good" │ +└───────────────────────────────────────────┘ +``` + [Original article](https://clickhouse.tech/docs/en/query_language/functions/type_conversion_functions/) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 773850b65ce..3733e570f10 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -772,4 +772,43 @@ FROM numbers(3) │ └──────────────────────────────────┘ ``` + +## formatRowNoNewline {#formatrownonewline} + +Преобразует произвольные выражения в строку заданного формата. При этом удаляет лишние переводы строк `\n`, если они появились. + +**Синтаксис** + +``` sql +formatRowNoNewline(format, x, y, ...) +``` + +**Параметры** + +- `format` — Текстовый формат. Например, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). +- `x`,`y`, ... — Выражения. + +**Возвращаемое значение** + +- Отформатированная строка (в текстовых форматах без завершающего перевода строки). + +**Пример** + +Запрос: + +``` sql +SELECT formatRowNoNewline('CSV', number, 'good') +FROM numbers(3) +``` + +Ответ: + +``` text +┌─formatRowNoNewline('CSV', number, 'good')─┐ +│ 0,"good" │ +│ 1,"good" │ +│ 2,"good" │ +└───────────────────────────────────────────┘ +``` + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/type_conversion_functions/) From f5ed9adad303d94d03529587aae21913d6c69ea3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 3 Nov 2020 23:52:57 +0300 Subject: [PATCH 36/37] Remove old code --- src/AggregateFunctions/ReservoirSampler.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/AggregateFunctions/ReservoirSampler.h b/src/AggregateFunctions/ReservoirSampler.h index 2ff5ab863af..abf41ad6581 100644 --- a/src/AggregateFunctions/ReservoirSampler.h +++ b/src/AggregateFunctions/ReservoirSampler.h @@ -200,9 +200,6 @@ public: } private: - friend void qdigest_test(int normal_size, UInt64 value_limit, const std::vector & values, int queries_count, bool verbose); - friend void rs_perf_test(); - /// We allocate a little memory on the stack - to avoid allocations when there are many objects with a small number of elements. using Array = DB::PODArrayWithStackMemory; From 64bd63ca4999297e041fe853c6e4b6a2fa5bd5f0 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Wed, 4 Nov 2020 03:08:55 +0300 Subject: [PATCH 37/37] Try to parse DataType arguments as another nested type (#16262) * Try to parse DataType arguments as another nested one * Allow mixed lists of arguments of data types * Restore croaring back * Fix tests --- src/Parsers/ParserDataType.cpp | 31 +++++++++++++++++-- ...oom_filter_index_string_multi_granulas.sql | 3 ++ .../01532_tuple_with_name_type.reference | 5 +++ .../01532_tuple_with_name_type.sql | 21 +++++++++++++ 4 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01532_tuple_with_name_type.reference create mode 100644 tests/queries/0_stateless/01532_tuple_with_name_type.sql diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index a0a4eb97efe..ee746329bff 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -1,13 +1,38 @@ #include -#include -#include + #include #include +#include +#include #include + namespace DB { +namespace +{ + +/// Wrapper to allow mixed lists of nested and normal types. +class ParserNestedTableOrExpression : public IParserBase +{ + private: + const char * getName() const override { return "data type or expression"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + ParserNestedTable parser1; + + if (parser1.parse(pos, node, expected)) + return true; + + ParserExpression parser2; + + return parser2.parse(pos, node, expected); + } +}; + +} + bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserNestedTable nested; @@ -78,7 +103,7 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ++pos; /// Parse optional parameters - ParserList args_parser(std::make_unique(), std::make_unique(TokenType::Comma)); + ParserList args_parser(std::make_unique(), std::make_unique(TokenType::Comma)); ASTPtr expr_list_args; if (!args_parser.parse(pos, expr_list_args, expected)) diff --git a/tests/queries/0_stateless/01307_bloom_filter_index_string_multi_granulas.sql b/tests/queries/0_stateless/01307_bloom_filter_index_string_multi_granulas.sql index 832f7140af2..e96c70bef7f 100644 --- a/tests/queries/0_stateless/01307_bloom_filter_index_string_multi_granulas.sql +++ b/tests/queries/0_stateless/01307_bloom_filter_index_string_multi_granulas.sql @@ -1,4 +1,5 @@ DROP TABLE IF EXISTS test_01307; + CREATE TABLE test_01307 (id UInt64, val String, INDEX ind val TYPE bloom_filter() GRANULARITY 1) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 2; INSERT INTO test_01307 (id, val) select number as id, toString(number) as val from numbers(4); SELECT count() FROM test_01307 WHERE identity(val) = '2'; @@ -6,3 +7,5 @@ SELECT count() FROM test_01307 WHERE val = '2'; OPTIMIZE TABLE test_01307 FINAL; SELECT count() FROM test_01307 WHERE identity(val) = '2'; SELECT count() FROM test_01307 WHERE val = '2'; + +DROP TABLE test_01307; diff --git a/tests/queries/0_stateless/01532_tuple_with_name_type.reference b/tests/queries/0_stateless/01532_tuple_with_name_type.reference new file mode 100644 index 00000000000..f9f6b5995ce --- /dev/null +++ b/tests/queries/0_stateless/01532_tuple_with_name_type.reference @@ -0,0 +1,5 @@ +a Tuple(key String, value String) +a Tuple(Tuple(key String, value String)) +a.key Array(String) +a.value Array(String) +a Tuple(UInt8, Tuple(key String, value String)) diff --git a/tests/queries/0_stateless/01532_tuple_with_name_type.sql b/tests/queries/0_stateless/01532_tuple_with_name_type.sql new file mode 100644 index 00000000000..fbc052d3cc0 --- /dev/null +++ b/tests/queries/0_stateless/01532_tuple_with_name_type.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS test_01532_1; +DROP TABLE IF EXISTS test_01532_2; +DROP TABLE IF EXISTS test_01532_3; +DROP TABLE IF EXISTS test_01532_4; + +CREATE TABLE test_01532_1 (a Tuple(key String, value String)) ENGINE Memory(); +DESCRIBE TABLE test_01532_1; + +CREATE TABLE test_01532_2 (a Tuple(Tuple(key String, value String))) ENGINE Memory(); +DESCRIBE TABLE test_01532_2; + +CREATE TABLE test_01532_3 (a Array(Tuple(key String, value String))) ENGINE Memory(); +DESCRIBE TABLE test_01532_3; + +CREATE TABLE test_01532_4 (a Tuple(UInt8, Tuple(key String, value String))) ENGINE Memory(); +DESCRIBE TABLE test_01532_4; + +DROP TABLE test_01532_1; +DROP TABLE test_01532_2; +DROP TABLE test_01532_3; +DROP TABLE test_01532_4;