Merge branch 'master' into master

This commit is contained in:
mergify[bot] 2022-02-12 12:40:52 +00:00 committed by GitHub
commit b51c185bb6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 127 additions and 309 deletions

20
src/Common/ArenaUtils.h Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include <string.h>
#include <string>
#include <base/StringRef.h>
/** Copy string value into Arena.
* Arena should support method:
* char * alloc(size_t size).
*/
template <typename Arena>
inline StringRef copyStringInArena(Arena & arena, StringRef value)
{
size_t key_size = value.size;
char * place_for_key = arena.alloc(key_size);
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(value.data), key_size);
StringRef result{place_for_key, key_size};
return result;
}

View File

@ -2,6 +2,7 @@
#include <base/StringRef.h>
#include <Common/HashTable/HashMap.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/ArenaUtils.h>
#include <unordered_map>
#include <list>
#include <atomic>
@ -115,17 +116,6 @@ private:
}
}
StringRef copyStringInArena(const std::string & value_to_copy)
{
size_t value_to_copy_size = value_to_copy.size();
char * place_for_key = arena.alloc(value_to_copy_size);
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(value_to_copy.data()), value_to_copy_size);
StringRef updated_value{place_for_key, value_to_copy_size};
return updated_value;
}
public:
using iterator = typename List::iterator;
@ -139,7 +129,7 @@ public:
if (!it)
{
ListElem elem{copyStringInArena(key), value, true};
ListElem elem{copyStringInArena(arena, key), value, true};
auto itr = list.insert(list.end(), elem);
bool inserted;
map.emplace(itr->key, it, inserted, hash_value);
@ -161,7 +151,7 @@ public:
if (it == map.end())
{
ListElem elem{copyStringInArena(key), value, true};
ListElem elem{copyStringInArena(arena, key), value, true};
auto itr = list.insert(list.end(), elem);
bool inserted;
map.emplace(itr->key, it, inserted, hash_value);

View File

@ -8,10 +8,10 @@
#include <Common/randomSeed.h>
#include <Common/Arena.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/ArenaUtils.h>
#include <Common/HashTable/LRUHashMap.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/ICacheDictionaryStorage.h>
#include <Dictionaries/DictionaryHelpers.h>
namespace DB

View File

@ -623,17 +623,6 @@ void mergeBlockWithPipe(
}
}
template <typename Arena>
static StringRef copyStringInArena(Arena & arena, StringRef value)
{
size_t key_size = value.size;
char * place_for_key = arena.alloc(key_size);
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(value.data), key_size);
StringRef result{place_for_key, key_size};
return result;
}
/**
* Returns ColumnVector data as PaddedPodArray.

View File

@ -3,6 +3,7 @@
#include <Core/Defines.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
#include <Common/ArenaUtils.h>
#include <DataTypes/DataTypesDecimal.h>
#include <IO/WriteHelpers.h>
@ -13,7 +14,7 @@
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Dictionaries//DictionarySource.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/HierarchyDictionariesUtils.h>

View File

@ -1,5 +1,6 @@
#include "HashedArrayDictionary.h"
#include <Common/ArenaUtils.h>
#include <Core/Defines.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnsNumber.h>

View File

@ -1,5 +1,6 @@
#include "HashedDictionary.h"
#include <Common/ArenaUtils.h>
#include <Core/Defines.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnsNumber.h>

View File

@ -1,5 +1,7 @@
#include <Dictionaries/RangeHashedDictionary.h>
#include <Common/ArenaUtils.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeEnum.h>

View File

@ -16,6 +16,7 @@
#include <Common/randomSeed.h>
#include <Common/Arena.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/ArenaUtils.h>
#include <Common/MemorySanitizer.h>
#include <Common/CurrentMetrics.h>
#include <Common/HashTable/HashMap.h>

View File

@ -1292,8 +1292,8 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context,
key_expr_type_not_null = key_expr_type;
bool cast_not_needed = is_set_const /// Set args are already casted inside Set::createFromAST
|| ((isNativeNumber(key_expr_type_not_null) || isDateTime(key_expr_type_not_null))
&& (isNativeNumber(const_type) || isDateTime(const_type))); /// Numbers and DateTime are accurately compared without cast.
|| ((isNativeInteger(key_expr_type_not_null) || isDateTime(key_expr_type_not_null))
&& (isNativeInteger(const_type) || isDateTime(const_type))); /// Native integers and DateTime are accurately compared without cast.
if (!cast_not_needed && !key_expr_type_not_null->equals(*const_type))
{

View File

@ -1,5 +1,7 @@
#include "PartitionedSink.h"
#include <Common/ArenaUtils.h>
#include <Functions/FunctionsConversion.h>
#include <Interpreters/Context.h>
@ -40,19 +42,18 @@ PartitionedSink::PartitionedSink(
}
SinkPtr PartitionedSink::getSinkForPartition(const String & partition_id)
SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key)
{
auto it = sinks.find(partition_id);
if (it == sinks.end())
auto it = partition_id_to_sink.find(partition_key);
if (it == partition_id_to_sink.end())
{
auto sink = createSinkForPartition(partition_id);
std::tie(it, std::ignore) = sinks.emplace(partition_id, sink);
auto sink = createSinkForPartition(partition_key.toString());
std::tie(it, std::ignore) = partition_id_to_sink.emplace(partition_key, sink);
}
return it->second;
}
void PartitionedSink::consume(Chunk chunk)
{
const auto & columns = chunk.getColumns();
@ -61,45 +62,59 @@ void PartitionedSink::consume(Chunk chunk)
block_with_partition_by_expr.setColumns(columns);
partition_by_expr->execute(block_with_partition_by_expr);
const auto * column = block_with_partition_by_expr.getByName(partition_by_column_name).column.get();
const auto * partition_by_result_column = block_with_partition_by_expr.getByName(partition_by_column_name).column.get();
std::unordered_map<String, size_t> sub_chunks_indices;
IColumn::Selector selector;
for (size_t row = 0; row < chunk.getNumRows(); ++row)
size_t chunk_rows = chunk.getNumRows();
chunk_row_index_to_partition_index.resize(chunk_rows);
partition_id_to_chunk_index.clear();
for (size_t row = 0; row < chunk_rows; ++row)
{
auto value = column->getDataAt(row);
auto [it, inserted] = sub_chunks_indices.emplace(value, sub_chunks_indices.size());
selector.push_back(it->second);
auto partition_key = partition_by_result_column->getDataAt(row);
auto [it, inserted] = partition_id_to_chunk_index.insert(makePairNoInit(partition_key, partition_id_to_chunk_index.size()));
if (inserted)
it->value.first = copyStringInArena(partition_keys_arena, partition_key);
chunk_row_index_to_partition_index[row] = it->getMapped();
}
Chunks sub_chunks;
sub_chunks.reserve(sub_chunks_indices.size());
for (size_t column_index = 0; column_index < columns.size(); ++column_index)
size_t columns_size = columns.size();
size_t partitions_size = partition_id_to_chunk_index.size();
Chunks partition_index_to_chunk;
partition_index_to_chunk.reserve(partitions_size);
for (size_t column_index = 0; column_index < columns_size; ++column_index)
{
MutableColumns column_sub_chunks = columns[column_index]->scatter(sub_chunks_indices.size(), selector);
if (column_index == 0) /// Set sizes for sub-chunks.
MutableColumns partition_index_to_column_split = columns[column_index]->scatter(partitions_size, chunk_row_index_to_partition_index);
/// Add chunks into partition_index_to_chunk with sizes of result columns
if (column_index == 0)
{
for (const auto & column_sub_chunk : column_sub_chunks)
for (const auto & partition_column : partition_index_to_column_split)
{
sub_chunks.emplace_back(Columns(), column_sub_chunk->size());
partition_index_to_chunk.emplace_back(Columns(), partition_column->size());
}
}
for (size_t sub_chunk_index = 0; sub_chunk_index < column_sub_chunks.size(); ++sub_chunk_index)
for (size_t partition_index = 0; partition_index < partitions_size; ++partition_index)
{
sub_chunks[sub_chunk_index].addColumn(std::move(column_sub_chunks[sub_chunk_index]));
partition_index_to_chunk[partition_index].addColumn(std::move(partition_index_to_column_split[partition_index]));
}
}
for (const auto & [partition_id, sub_chunk_index] : sub_chunks_indices)
for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index)
{
getSinkForPartition(partition_id)->consume(std::move(sub_chunks[sub_chunk_index]));
auto sink = getSinkForPartitionKey(partition_key);
sink->consume(std::move(partition_index_to_chunk[partition_index]));
}
}
void PartitionedSink::onFinish()
{
for (auto & [partition_id, sink] : sinks)
for (auto & [_, sink] : partition_id_to_sink)
{
sink->onFinish();
}

View File

@ -1,5 +1,8 @@
#pragma once
#include <Common/HashTable/HashMap.h>
#include <Common/Arena.h>
#include <absl/container/flat_hash_map.h>
#include <Processors/Sinks/SinkToStorage.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/Context_fwd.h>
@ -34,9 +37,13 @@ private:
ExpressionActionsPtr partition_by_expr;
String partition_by_column_name;
std::unordered_map<String, SinkPtr> sinks;
absl::flat_hash_map<StringRef, SinkPtr> partition_id_to_sink;
HashMapWithSavedHash<StringRef, size_t> partition_id_to_chunk_index;
IColumn::Selector chunk_row_index_to_partition_index;
Arena partition_keys_arena;
SinkPtr getSinkForPartitionKey(StringRef partition_key);
SinkPtr getSinkForPartition(const String & partition_id);
};
}

View File

@ -14,7 +14,7 @@ class StorageSystemAsynchronousInserts final :
public IStorageSystemOneBlock<StorageSystemAsynchronousInserts>
{
public:
std::string getName() const override { return "AsynchronousInserts"; }
std::string getName() const override { return "SystemAsynchronousInserts"; }
static NamesAndTypesList getNamesAndTypes();
protected:

View File

@ -1,5 +1,5 @@
CREATE TABLE system.aggregate_function_combinators\n(\n `name` String,\n `is_internal` UInt8\n)\nENGINE = SystemAggregateFunctionCombinators()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
CREATE TABLE system.asynchronous_inserts\n(\n `query` String,\n `database` String,\n `table` String,\n `format` String,\n `first_update` DateTime64(6),\n `last_update` DateTime64(6),\n `total_bytes` UInt64,\n `entries.query_id` Array(String),\n `entries.bytes` Array(UInt64),\n `entries.finished` Array(UInt8),\n `entries.exception` Array(String)\n)\nENGINE = AsynchronousInserts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
CREATE TABLE system.asynchronous_inserts\n(\n `query` String,\n `database` String,\n `table` String,\n `format` String,\n `first_update` DateTime64(6),\n `last_update` DateTime64(6),\n `total_bytes` UInt64,\n `entries.query_id` Array(String),\n `entries.bytes` Array(UInt64),\n `entries.finished` Array(UInt8),\n `entries.exception` Array(String)\n)\nENGINE = SystemAsynchronousInserts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
CREATE TABLE system.asynchronous_metrics\n(\n `metric` String,\n `value` Float64\n)\nENGINE = SystemAsynchronousMetrics()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
CREATE TABLE system.build_options\n(\n `name` String,\n `value` String\n)\nENGINE = SystemBuildOptions()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
CREATE TABLE system.clusters\n(\n `cluster` String,\n `shard_num` UInt32,\n `shard_weight` UInt32,\n `replica_num` UInt32,\n `host_name` String,\n `host_address` String,\n `port` UInt16,\n `is_local` UInt8,\n `user` String,\n `default_database` String,\n `errors_count` UInt32,\n `slowdowns_count` UInt32,\n `estimated_recovery_time` UInt32\n)\nENGINE = SystemClusters()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'

View File

@ -0,0 +1,9 @@
2
2
2
2
2
2
2
2
1

View File

@ -0,0 +1,34 @@
DROP TABLE IF EXISTS t_key_condition_float;
CREATE TABLE t_key_condition_float (a Float32)
ENGINE = MergeTree ORDER BY a;
INSERT INTO t_key_condition_float VALUES (0.1), (0.2);
SELECT count() FROM t_key_condition_float WHERE a > 0;
SELECT count() FROM t_key_condition_float WHERE a > 0.0;
SELECT count() FROM t_key_condition_float WHERE a > 0::Float32;
SELECT count() FROM t_key_condition_float WHERE a > 0::Float64;
DROP TABLE t_key_condition_float;
CREATE TABLE t_key_condition_float (a Float64)
ENGINE = MergeTree ORDER BY a;
INSERT INTO t_key_condition_float VALUES (0.1), (0.2);
SELECT count() FROM t_key_condition_float WHERE a > 0;
SELECT count() FROM t_key_condition_float WHERE a > 0.0;
SELECT count() FROM t_key_condition_float WHERE a > 0::Float32;
SELECT count() FROM t_key_condition_float WHERE a > 0::Float64;
DROP TABLE t_key_condition_float;
CREATE TABLE t_key_condition_float (a UInt64)
ENGINE = MergeTree ORDER BY a;
INSERT INTO t_key_condition_float VALUES (1), (2);
SELECT count() FROM t_key_condition_float WHERE a > 1.5;
DROP TABLE t_key_condition_float;

View File

@ -1,252 +0,0 @@
# Configuration file for Uncrustify code formatter.
# https://github.com/uncrustify/uncrustify
#
# Created with https://cdanu.github.io/uncrustify_config_preview/index.html
#
# You may apply it for your code with:
# uncrustify -l CPP -c uncrustify.cfg -f filename.cpp
#
# This config is in beta: it doesn't implement our style guide perfectly.
# It's not recommended to apply it for existing code base.
newlines = lf
input_tab_size = 4
output_tab_size = 4
string_replace_tab_chars = true
utf8_bom = remove
utf8_byte = true
utf8_force = true
sp_arith = force
sp_assign = force
sp_cpp_lambda_assign = remove
sp_cpp_lambda_paren = remove
sp_assign_default = force
sp_enum_assign = force
sp_enum_colon = force
sp_pp_concat = force
sp_pp_stringify = remove
sp_bool = force
sp_compare = force
sp_inside_paren = remove
sp_paren_paren = remove
sp_paren_brace = force
sp_before_ptr_star = force
sp_between_ptr_star = remove
sp_after_ptr_star = force
sp_after_ptr_star_qualifier = force
sp_after_ptr_star_func = force
sp_ptr_star_paren = force
sp_before_ptr_star_func = force
sp_before_byref = force
sp_before_unnamed_byref = force
sp_after_byref = force
sp_after_byref_func = force
sp_before_byref_func = force
sp_template_angle = force
sp_before_angle = remove
sp_inside_angle = remove
sp_angle_colon = force
sp_after_angle = force
sp_angle_paren = remove
sp_angle_paren_empty = remove
sp_angle_word = force
sp_angle_shift = remove
sp_permit_cpp11_shift = true
sp_before_sparen = force
sp_inside_sparen = remove
sp_after_sparen = force
sp_sparen_brace = force
sp_special_semi = force
sp_before_semi_for = remove
sp_before_semi_for_empty = remove
sp_after_semi = force
sp_after_semi_for_empty = remove
sp_before_square = remove
sp_before_squares = remove
sp_inside_square = remove
sp_after_comma = force
sp_before_ellipsis = remove
sp_after_class_colon = force
sp_before_class_colon = force
sp_after_constr_colon = force
sp_before_constr_colon = force
sp_after_operator = remove
sp_after_operator_sym = remove
sp_after_cast = remove
sp_inside_paren_cast = remove
sp_cpp_cast_paren = remove
sp_sizeof_paren = remove
sp_inside_braces_enum = force
sp_inside_braces_struct = force
sp_inside_braces = force
sp_inside_braces_empty = remove
sp_type_func = force
sp_func_proto_paren = remove
sp_func_proto_paren_empty = remove
sp_func_def_paren = remove
sp_func_def_paren_empty = remove
sp_inside_fparens = remove
sp_inside_fparen = remove
sp_inside_tparen = remove
sp_after_tparen_close = remove
sp_square_fparen = remove
sp_fparen_brace = force
sp_func_call_paren = remove
sp_func_class_paren = remove
sp_func_class_paren_empty = remove
sp_return_paren = force
sp_attribute_paren = remove
sp_defined_paren = remove
sp_throw_paren = force
sp_after_throw = force
sp_catch_paren = force
sp_macro = add
sp_macro_func = add
sp_else_brace = force
sp_brace_else = force
sp_brace_typedef = force
sp_catch_brace = force
sp_brace_catch = force
sp_try_brace = force
sp_word_brace = remove
sp_word_brace_ns = force
sp_before_dc = remove
sp_after_dc = remove
sp_cond_colon = force
sp_cond_colon_before = force
sp_cond_colon_after = force
sp_cond_question = force
sp_cond_question_before = force
sp_cond_question_after = force
sp_cond_ternary_short = remove
sp_cmt_cpp_start = force
sp_cmt_cpp_doxygen = true
sp_cmt_cpp_qttr = true
sp_endif_cmt = force
sp_after_new = force
sp_between_new_paren = remove
sp_after_newop_paren = force
sp_inside_newop_paren = remove
sp_before_tr_emb_cmt = force
indent_columns = 4
indent_with_tabs = 0
indent_namespace = false
indent_namespace_limit = 100
indent_class = true
indent_ctor_init_leading = 1
indent_shift = true
indent_func_call_param = true
indent_func_def_param = true
indent_func_proto_param = true
indent_func_class_param = true
indent_func_ctor_var_param = true
indent_template_param = true
indent_member = 4
indent_switch_case = 4
indent_switch_pp = false
indent_label = 0
indent_access_spec = -4
indent_paren_close = 2
indent_paren_after_func_def = true
indent_paren_after_func_decl = true
indent_paren_after_func_call = true
indent_align_assign = false
indent_token_after_brace = false
indent_cpp_lambda_body = true
indent_ternary_operator = 1
nl_assign_leave_one_liners = true
nl_class_leave_one_liners = true
nl_enum_leave_one_liners = true
nl_getset_leave_one_liners = true
nl_func_leave_one_liners = true
nl_cpp_lambda_leave_one_liners = true
nl_cpp_ldef_brace = add
nl_if_leave_one_liners = true
nl_start_of_file = remove
nl_end_of_file = force
nl_enum_brace = add
nl_struct_brace = add
nl_union_brace = add
nl_if_brace = add
nl_brace_else = add
nl_else_brace = add
nl_else_if = remove
nl_before_if_closing_paren = remove
nl_try_brace = add
nl_for_brace = add
nl_catch_brace = add
nl_brace_catch = add
nl_while_brace = add
nl_do_brace = add
nl_brace_while = remove
nl_switch_brace = add
nl_multi_line_define = true
nl_before_case = true
nl_after_case = true
nl_case_colon_brace = add
nl_namespace_brace = add
nl_template_class = add
nl_class_brace = add
nl_enum_own_lines = add
nl_func_scope_name = remove
nl_func_paren = remove
nl_func_def_paren = remove
nl_func_call_paren = remove
nl_func_call_paren_empty = remove
nl_func_decl_start_multi_line = true
nl_func_def_start_multi_line = true
nl_func_decl_args_multi_line = true
nl_func_def_args_multi_line = true
nl_func_decl_end = remove
nl_func_def_end = remove
nl_func_decl_empty = remove
nl_func_def_empty = remove
nl_func_call_empty = remove
nl_func_call_start_multi_line = true
nl_func_call_args_multi_line = true
nl_fdef_brace = add
nl_after_semicolon = true
nl_constr_colon = force
nl_split_if_one_liner = true
nl_split_for_one_liner = true
nl_split_while_one_liner = true
nl_max = 3
nl_max_blank_in_func = 2
nl_after_func_proto = 1
nl_after_func_proto_group = 2
nl_after_func_class_proto = 1
nl_after_func_class_proto_group = 2
nl_before_func_body_def = 1
nl_before_func_body_proto = 1
nl_after_func_body = 3
nl_after_func_body_class = 3
nl_after_func_body_one_liner = 1
nl_after_multiline_comment = true
nl_after_struct = 3
nl_before_class = 3
nl_after_class = 3
nl_before_access_spec = 2
nl_after_access_spec = 1
nl_after_try_catch_finally = 1
eat_blanks_after_open_brace = true
eat_blanks_before_close_brace = true
nl_remove_extra_newlines = 1
nl_after_return = true
pos_constr_comma = lead_break
pos_constr_colon = lead_force
code_width = 160
ls_func_split_full = true
ls_code_width = true
align_left_shift = false
cmt_convert_tab_to_spaces = true
mod_full_brace_for = remove
mod_full_brace_if = remove
mod_full_brace_if_chain = true
mod_full_brace_while = remove
mod_paren_on_return = remove
mod_remove_extra_semicolon = true
mod_remove_empty_return = true
align_func_params = true
align_func_params_thresh = 140
sp_inside_type_brace_init_lst = remove
nl_constr_init_args = add