From c65fec341ce246a4c95c0b2c1256762ba6308785 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 23 Oct 2018 10:16:28 +0300 Subject: [PATCH 01/79] Test for MySQL as range_hashed dictionary source --- .../generate_and_test.py | 74 ++++++++++++++++--- 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/dbms/tests/external_dictionaries/generate_and_test.py b/dbms/tests/external_dictionaries/generate_and_test.py index 98b67345326..db2f4fe5e9c 100755 --- a/dbms/tests/external_dictionaries/generate_and_test.py +++ b/dbms/tests/external_dictionaries/generate_and_test.py @@ -145,6 +145,15 @@ def generate_structure(args): # [ 'executable_flat' + base_name, 3, True ] ]) + if not args.no_mysql: + for range_hashed_range_type in range_hashed_range_types: + base_name = 'range_hashed_' + range_hashed_range_type + dictionaries.extend([ + ['mysql_' + base_name, 3, False], + # [ 'clickhouse_' + base_name, 3, True ], + # [ 'executable_flat' + base_name, 3, True ] + ]) + files = [ 'key_simple.tsv', 'key_complex_integers.tsv', 'key_complex_mixed.tsv', 'key_range_hashed_{range_hashed_range_type}.tsv' ] @@ -206,6 +215,20 @@ range_hashed_dictGet_values = { ("toDateTime('2015-11-19 23:59:59')", "toDateTime('2015-10-26 00:00:01')", "toDateTime('2018-09-14 00:00:00')")], } +range_hashed_mysql_column_types = { + 'UInt8': 'tinyint unsigned', + 'UInt16': 'smallint unsigned', + 'UInt32': 'int unsigned', + 'UInt64': 'bigint unsigned', + 'Int8': 'tinyint', + 'Int16': 'smallint', + 'Int32': 'int', + 'Int64': 'bigint', + # default type (Date) for compatibility with older versions: + '': 'date', + 'Date': 'date', + 'DateTime': 'datetime', +} def dump_report(destination, suite, test_case, report): if destination is not None: @@ -268,6 +291,24 @@ def generate_data(args): query = file_source_query % comma_separated(chain(keys, columns(), ['Parent'] if 1 == len(keys) else [])) call([args.client, '--port', args.port, '--query', query], 'generated/' + file) + if not args.no_mysql: + print 'Creating MySQL table for "{0}"...'.format(range_hashed_range_type) + table_name = "test.dictionary_source_" + range_hashed_range_type + col_type = range_hashed_mysql_column_types[range_hashed_range_type] + subprocess.check_call('echo "' + 'create database if not exists test;' + 'drop table if exists {table_name};' + 'create table {table_name} (' + 'id tinyint unsigned, StartDate {col_type}, EndDate {col_type}, ' + 'UInt8_ tinyint unsigned, UInt16_ smallint unsigned, UInt32_ int unsigned, UInt64_ bigint unsigned, ' + 'Int8_ tinyint, Int16_ smallint, Int32_ int, Int64_ bigint, ' + 'Float32_ float, Float64_ double, ' + 'String_ text, Date_ date, DateTime_ datetime, Parent bigint unsigned, UUID_ varchar(36)' + ');' + 'load data local infile \'{0}/generated/{file}\' into table {table_name};" | mysql $MYSQL_OPTIONS --local-infile=1' + .format(prefix, table_name=table_name, col_type=col_type, file=file), shell=True) + + # create MySQL table from complete_query if not args.no_mysql: print 'Creating MySQL table' @@ -384,7 +425,7 @@ def generate_dictionaries(args): root test - dictionary_source
+ dictionary_source{key_type}
''' @@ -568,14 +609,15 @@ def generate_dictionaries(args): ]) if not args.no_mysql: + source_mysql_default = source_mysql.format(key_type="") sources_and_layouts.extend([ - [ source_mysql, layout_flat ], - [ source_mysql, layout_hashed ], - [ source_mysql, layout_cache ], - [ source_mysql, layout_complex_key_hashed ], - [ source_mysql, layout_complex_key_cache ], - [ source_mysql, layout_complex_key_hashed ], - [ source_mysql, layout_complex_key_cache ], + [ source_mysql_default, layout_flat ], + [ source_mysql_default, layout_hashed ], + [ source_mysql_default, layout_cache ], + [ source_mysql_default, layout_complex_key_hashed ], + [ source_mysql_default, layout_complex_key_cache ], + [ source_mysql_default, layout_complex_key_hashed ], + [ source_mysql_default, layout_complex_key_cache ], ]) if not args.no_mongo: @@ -619,10 +661,22 @@ def generate_dictionaries(args): # [ source_executable, layout_range_hashed ] ]) + if not args.no_mysql: + for range_hashed_range_type in range_hashed_range_types: + key_type = "_" + range_hashed_range_type + source_mysql_typed = source_mysql.format(key_type=key_type) + sources_and_layouts.extend([ + [source_mysql_typed, + (layout_range_hashed, range_hashed_range_type)], + ]) + + dict_name_filter = args.filter.split('/')[0] for (name, key_idx, has_parent), (source, layout) in zip(dictionaries, sources_and_layouts): + if args.filter and not fnmatch.fnmatch(name, dict_name_filter): + continue + filename = os.path.join(args.generated, 'dictionary_%s.xml' % name) key = keys[key_idx] - if key_idx == 3: layout, range_hashed_range_type = layout # Wrap non-empty type (default) with tag. @@ -670,7 +724,7 @@ def run_tests(args): global SERVER_DIED print "{0:100}".format('Dictionary: ' + dict + ' Name: ' + name + ": "), - if args.filter and not fnmatch.fnmatch(dict, args.filter) and not fnmatch.fnmatch(name, args.filter): + if args.filter and not fnmatch.fnmatch(dict + "/" + name, args.filter): print " ... skipped due to filter." return From e3dea84265feb422cf56e77a5d435416e323a0e6 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Tue, 23 Oct 2018 20:43:09 +0300 Subject: [PATCH 02/79] support more hash functions for tuples --- dbms/src/Functions/FunctionsHashing.h | 299 +++++++----------- .../0_stateless/00678_murmurhash.reference | 22 +- .../queries/0_stateless/00678_murmurhash.sql | 1 - .../00746_hashing_tuples.reference | 12 + .../0_stateless/00746_hashing_tuples.sql | 15 + 5 files changed, 161 insertions(+), 188 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00746_hashing_tuples.reference create mode 100644 dbms/tests/queries/0_stateless/00746_hashing_tuples.sql diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index 9ea16bc09fe..84b61062398 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -54,8 +54,10 @@ namespace ErrorCodes * Fast non-cryptographic hash function for strings: * cityHash64: String -> UInt64 * - * A non-cryptographic hash from a tuple of values of any types (uses cityHash64 for strings and intHash64 for numbers): + * A non-cryptographic hashes from a tuple of values of any types (uses respective function for strings and intHash64 for numbers): * cityHash64: any* -> UInt64 + * sipHash64: any* -> UInt64 + * halfMD5: any* -> UInt64 * * Fast non-cryptographic hash function from any integer: * intHash32: number -> UInt32 @@ -63,8 +65,31 @@ namespace ErrorCodes * */ +struct IntHash32Impl +{ + using ReturnType = UInt32; + + static UInt32 apply(UInt64 x) + { + /// seed is taken from /dev/urandom. It allows you to avoid undesirable dependencies with hashes in different data structures. + return intHash32<0x75D9543DE018BF45ULL>(x); + } +}; + +struct IntHash64Impl +{ + using ReturnType = UInt64; + + static UInt64 apply(UInt64 x) + { + return intHash64(x ^ 0x4CF2D2BAAE6DA887ULL); + } +}; + + struct HalfMD5Impl { + static constexpr auto name = "halfMD5"; using ReturnType = UInt64; static UInt64 apply(const char * begin, size_t size) @@ -82,6 +107,11 @@ struct HalfMD5Impl return Poco::ByteOrder::flipBytes(buf.uint64_data); /// Compatibility with existing code. } + + static UInt64 mergeHashes(UInt64 h1, UInt64 h2) + { + return IntHash64Impl::apply(h1) ^ h2; + } }; struct MD5Impl @@ -142,12 +172,18 @@ struct SHA256Impl struct SipHash64Impl { + static constexpr auto name = "sipHash64"; + using ReturnType = UInt64; static UInt64 apply(const char * begin, size_t size) { return sipHash64(begin, size); } + + static UInt64 mergeHashes(UInt64 h1, UInt64 h2){ + return IntHash64Impl::apply(h1) ^ h2; + } }; @@ -162,27 +198,6 @@ struct SipHash128Impl } }; -struct IntHash32Impl -{ - using ReturnType = UInt32; - - static UInt32 apply(UInt64 x) - { - /// seed is taken from /dev/urandom. It allows you to avoid undesirable dependencies with hashes in different data structures. - return intHash32<0x75D9543DE018BF45ULL>(x); - } -}; - -struct IntHash64Impl -{ - using ReturnType = UInt64; - - static UInt64 apply(UInt64 x) - { - return intHash64(x ^ 0x4CF2D2BAAE6DA887ULL); - } -}; - template class FunctionStringHashFixedString : public IFunction @@ -322,19 +337,18 @@ public: }; -/** We use hash functions called CityHash, FarmHash, MetroHash. - * In this regard, this template is named with the words `NeighborhoodHash`. - */ template -class FunctionNeighbourhoodHash64 : public IFunction +class FunctionAnyHash : public IFunction { public: static constexpr auto name = Impl::name; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } private: + using ToType = typename Impl::ReturnType; + template - void executeIntType(const IColumn * column, ColumnUInt64::Container & vec_to) + void executeIntType(const IColumn * column, typename ColumnVector::Container & vec_to) { if (const ColumnVector * col_from = checkAndGetColumn>(column)) { @@ -342,16 +356,26 @@ private: size_t size = vec_from.size(); for (size_t i = 0; i < size; ++i) { - UInt64 h = IntHash64Impl::apply(ext::bit_cast(vec_from[i])); + ToType h; + if constexpr (std::is_same_v) + h = IntHash64Impl::apply(ext::bit_cast(vec_from[i])); + else + h = IntHash32Impl::apply(ext::bit_cast(vec_from[i])); if (first) vec_to[i] = h; else - vec_to[i] = Impl::Hash128to64(typename Impl::uint128_t(vec_to[i], h)); + vec_to[i] = Impl::mergeHashes(vec_to[i], h); } } else if (auto col_from = checkAndGetColumnConst>(column)) { - const UInt64 hash = IntHash64Impl::apply(ext::bit_cast(col_from->template getValue())); + auto value = col_from->template getValue(); + ToType hash; + if constexpr (std::is_same_v) + hash = IntHash64Impl::apply(ext::bit_cast(value)); + else + hash = IntHash32Impl::apply(ext::bit_cast(value)); + size_t size = vec_to.size(); if (first) { @@ -360,7 +384,7 @@ private: else { for (size_t i = 0; i < size; ++i) - vec_to[i] = Impl::Hash128to64(typename Impl::uint128_t(vec_to[i], hash)); + vec_to[i] = Impl::mergeHashes(vec_to[i], hash); } } else @@ -370,7 +394,7 @@ private: } template - void executeString(const IColumn * column, ColumnUInt64::Container & vec_to) + void executeString(const IColumn * column, typename ColumnVector::Container & vec_to) { if (const ColumnString * col_from = checkAndGetColumn(column)) { @@ -381,14 +405,14 @@ private: ColumnString::Offset current_offset = 0; for (size_t i = 0; i < size; ++i) { - const UInt64 h = Impl::Hash64( + const ToType h = Impl::apply( reinterpret_cast(&data[current_offset]), offsets[i] - current_offset - 1); if (first) vec_to[i] = h; else - vec_to[i] = Impl::Hash128to64(typename Impl::uint128_t(vec_to[i], h)); + vec_to[i] = Impl::mergeHashes(vec_to[i], h); current_offset = offsets[i]; } @@ -401,17 +425,17 @@ private: for (size_t i = 0; i < size; ++i) { - const UInt64 h = Impl::Hash64(reinterpret_cast(&data[i * n]), n); + const ToType h = Impl::apply(reinterpret_cast(&data[i * n]), n); if (first) vec_to[i] = h; else - vec_to[i] = Impl::Hash128to64(typename Impl::uint128_t(vec_to[i], h)); + vec_to[i] = Impl::mergeHashes(vec_to[i], h); } } else if (const ColumnConst * col_from = checkAndGetColumnConstStringOrFixedString(column)) { String value = col_from->getValue().data(); - const UInt64 hash = Impl::Hash64(value.data(), value.size()); + const ToType hash = Impl::apply(value.data(), value.size()); const size_t size = vec_to.size(); if (first) @@ -422,7 +446,7 @@ private: { for (size_t i = 0; i < size; ++i) { - vec_to[i] = Impl::Hash128to64(typename Impl::uint128_t(vec_to[i], hash)); + vec_to[i] = Impl::mergeHashes(vec_to[i], hash); } } } @@ -433,7 +457,7 @@ private: } template - void executeArray(const IDataType * type, const IColumn * column, ColumnUInt64::Container & vec_to) + void executeArray(const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to) { const IDataType * nested_type = typeid_cast(type)->getNestedType().get(); @@ -443,7 +467,7 @@ private: const ColumnArray::Offsets & offsets = col_from->getOffsets(); const size_t nested_size = nested_column->size(); - ColumnUInt64::Container vec_temp(nested_size); + typename ColumnVector::Container vec_temp(nested_size); executeAny(nested_type, nested_column, vec_temp); const size_t size = offsets.size(); @@ -453,14 +477,19 @@ private: { ColumnArray::Offset next_offset = offsets[i]; - UInt64 h = IntHash64Impl::apply(next_offset - current_offset); + ToType h; + if constexpr (std::is_same_v) + h = IntHash64Impl::apply(next_offset - current_offset); + else + h = IntHash32Impl::apply(next_offset - current_offset); + if (first) vec_to[i] = h; else - vec_to[i] = Impl::Hash128to64(typename Impl::uint128_t(vec_to[i], h)); + vec_to[i] = Impl::mergeHashes(vec_to[i], h); for (size_t j = current_offset; j < next_offset; ++j) - vec_to[i] = Impl::Hash128to64(typename Impl::uint128_t(vec_to[i], vec_temp[j])); + vec_to[i] = Impl::mergeHashes(vec_to[i], vec_temp[j]); current_offset = offsets[i]; } @@ -478,7 +507,7 @@ private: } template - void executeAny(const IDataType * from_type, const IColumn * icolumn, ColumnUInt64::Container & vec_to) + void executeAny(const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to) { WhichDataType which(from_type); @@ -504,7 +533,7 @@ private: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - void executeForArgument(const IDataType * type, const IColumn * column, ColumnUInt64::Container & vec_to, bool & is_first) + void executeForArgument(const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) { /// Flattening of tuples. if (const ColumnTuple * tuple = typeid_cast(column)) @@ -549,20 +578,20 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { - return std::make_shared(); + return std::make_shared>(); } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { size_t rows = input_rows_count; - auto col_to = ColumnUInt64::create(rows); + auto col_to = ColumnVector::create(rows); - ColumnUInt64::Container & vec_to = col_to->getData(); + typename ColumnVector::Container & vec_to = col_to->getData(); if (arguments.empty()) { /// Constant random number from /dev/urandom is used as a hash value of empty list of arguments. - vec_to.assign(rows, static_cast(0xe28dbde7fe22e41c)); + vec_to.assign(rows, static_cast(0xe28dbde7fe22e41c)); } /// The function supports arbitrary number of arguments of arbitrary types. @@ -579,110 +608,6 @@ public: }; -template -class FunctionStringHash : public IFunction -{ -public: - static constexpr auto name = Name::name; - static FunctionPtr create(const Context &) { return std::make_shared(); } - - String getName() const override { return name; } - - bool isVariadic() const override { return false; } - - size_t getNumberOfArguments() const override { return 1; } - - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments */) const override - { return std::make_shared>(); } - - bool useDefaultImplementationForConstants() const override { return true; } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override - { - auto col_to = ColumnVector::create(input_rows_count); - typename ColumnVector::Container & vec_to = col_to->getData(); - - const ColumnWithTypeAndName & col = block.getByPosition(arguments[0]); - const IDataType * from_type = col.type.get(); - const IColumn * icolumn = col.column.get(); - WhichDataType which(from_type); - - if (which.isUInt8()) executeIntType(icolumn, vec_to); - else if (which.isUInt16()) executeIntType(icolumn, vec_to); - else if (which.isUInt32()) executeIntType(icolumn, vec_to); - else if (which.isUInt64()) executeIntType(icolumn, vec_to); - else if (which.isInt8()) executeIntType(icolumn, vec_to); - else if (which.isInt16()) executeIntType(icolumn, vec_to); - else if (which.isInt32()) executeIntType(icolumn, vec_to); - else if (which.isInt64()) executeIntType(icolumn, vec_to); - else if (which.isEnum8()) executeIntType(icolumn, vec_to); - else if (which.isEnum16()) executeIntType(icolumn, vec_to); - else if (which.isDate()) executeIntType(icolumn, vec_to); - else if (which.isDateTime()) executeIntType(icolumn, vec_to); - else if (which.isFloat32()) executeIntType(icolumn, vec_to); - else if (which.isFloat64()) executeIntType(icolumn, vec_to); - else if (which.isStringOrFixedString()) executeString(icolumn, vec_to); - else - throw Exception("Unexpected type " + from_type->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - block.getByPosition(result).column = std::move(col_to); - } -private: - using ToType = typename Impl::ReturnType; - - template - void executeIntType(const IColumn * column, typename ColumnVector::Container & vec_to) - { - if (const ColumnVector * col_from = checkAndGetColumn>(column)) - { - const typename ColumnVector::Container & vec_from = col_from->getData(); - size_t size = vec_from.size(); - for (size_t i = 0; i < size; ++i) - { - vec_to[i] = Impl::apply(reinterpret_cast(&vec_from[i]), sizeof(FromType)); - } - } - else - throw Exception("Illegal column " + column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - - void executeString(const IColumn * column, typename ColumnVector::Container & vec_to) - { - if (const ColumnString * col_from = checkAndGetColumn(column)) - { - const typename ColumnString::Chars_t & data = col_from->getChars(); - const typename ColumnString::Offsets & offsets = col_from->getOffsets(); - size_t size = offsets.size(); - - ColumnString::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) - { - vec_to[i] = Impl::apply( - reinterpret_cast(&data[current_offset]), - offsets[i] - current_offset - 1); - - current_offset = offsets[i]; - } - } - else if (const ColumnFixedString * col_from = checkAndGetColumn(column)) - { - const typename ColumnString::Chars_t & data = col_from->getChars(); - size_t n = col_from->getN(); - size_t size = data.size() / n; - for (size_t i = 0; i < size; ++i) - vec_to[i] = Impl::apply(reinterpret_cast(&data[i * n]), n); - } - else - throw Exception("Illegal column " + column->getName() - + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - - /** Why we need MurmurHash2? * MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash. * Usually there is no reason to use MurmurHash. @@ -692,26 +617,41 @@ private: */ struct MurmurHash2Impl32 { + static constexpr auto name = "murmurHash2_32"; + using ReturnType = UInt32; static UInt32 apply(const char * data, const size_t size) { return MurmurHash2(data, size, 0); } + + static UInt32 mergeHashes(UInt32 h1, UInt32 h2) + { + return IntHash32Impl::apply(h1) ^ h2; + } }; struct MurmurHash2Impl64 { + static constexpr auto name = "murmurHash2_64"; + using ReturnType = UInt64; static UInt64 apply(const char * data, const size_t size) { return MurmurHash64A(data, size, 0); } + + static UInt64 mergeHashes(UInt64 h1, UInt64 h2){ + return IntHash64Impl::apply(h1) ^ h2; + } }; struct MurmurHash3Impl32 { + static constexpr auto name = "murmurHash3_32"; + using ReturnType = UInt32; static UInt32 apply(const char * data, const size_t size) @@ -724,10 +664,16 @@ struct MurmurHash3Impl32 MurmurHash3_x86_32(data, size, 0, bytes); return h; } + + static UInt32 mergeHashes(UInt32 h1, UInt32 h2) + { + return IntHash32Impl::apply(h1) ^ h2; + } }; struct MurmurHash3Impl64 { + static constexpr auto name = "murmurHash3_64"; using ReturnType = UInt64; static UInt64 apply(const char * data, const size_t size) @@ -740,6 +686,11 @@ struct MurmurHash3Impl64 MurmurHash3_x64_128(data, size, 0, bytes); return h[0] ^ h[1]; } + + static UInt64 mergeHashes(UInt64 h1, UInt64 h2) + { + return IntHash64Impl::apply(h1) ^ h2; + } }; struct MurmurHash3Impl128 @@ -943,43 +894,39 @@ private: }; -struct NameHalfMD5 { static constexpr auto name = "halfMD5"; }; -struct NameSipHash64 { static constexpr auto name = "sipHash64"; }; struct NameIntHash32 { static constexpr auto name = "intHash32"; }; struct NameIntHash64 { static constexpr auto name = "intHash64"; }; -struct NameMurmurHash2_32 { static constexpr auto name = "murmurHash2_32"; }; -struct NameMurmurHash2_64 { static constexpr auto name = "murmurHash2_64"; }; -struct NameMurmurHash3_32 { static constexpr auto name = "murmurHash3_32"; }; -struct NameMurmurHash3_64 { static constexpr auto name = "murmurHash3_64"; }; -struct NameMurmurHash3_128 { static constexpr auto name = "murmurHash3_128"; }; struct ImplCityHash64 { static constexpr auto name = "cityHash64"; + using ReturnType = UInt64; using uint128_t = CityHash_v1_0_2::uint128; - static auto Hash128to64(const uint128_t & x) { return CityHash_v1_0_2::Hash128to64(x); } - static auto Hash64(const char * s, const size_t len) { return CityHash_v1_0_2::CityHash64(s, len); } + static auto mergeHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } + static auto apply(const char * s, const size_t len) { return CityHash_v1_0_2::CityHash64(s, len); } }; // see farmhash.h for definition of NAMESPACE_FOR_HASH_FUNCTIONS struct ImplFarmHash64 { static constexpr auto name = "farmHash64"; + using ReturnType = UInt64; using uint128_t = NAMESPACE_FOR_HASH_FUNCTIONS::uint128_t; - static auto Hash128to64(const uint128_t & x) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash128to64(x); } - static auto Hash64(const char * s, const size_t len) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(s, len); } + static auto mergeHashes(UInt64 h1, UInt64 h2) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash128to64(uint128_t(h1, h2)); } + static auto apply(const char * s, const size_t len) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(s, len); } }; struct ImplMetroHash64 { static constexpr auto name = "metroHash64"; + using ReturnType = UInt64; using uint128_t = CityHash_v1_0_2::uint128; - static auto Hash128to64(const uint128_t & x) { return CityHash_v1_0_2::Hash128to64(x); } - static auto Hash64(const char * s, const size_t len) + static auto mergeHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } + static auto apply(const char * s, const size_t len) { union { @@ -993,8 +940,8 @@ struct ImplMetroHash64 } }; -using FunctionHalfMD5 = FunctionStringHash; -using FunctionSipHash64 = FunctionStringHash; +using FunctionHalfMD5 = FunctionAnyHash; +using FunctionSipHash64 = FunctionAnyHash; using FunctionIntHash32 = FunctionIntHash; using FunctionIntHash64 = FunctionIntHash; using FunctionMD5 = FunctionStringHashFixedString; @@ -1002,12 +949,12 @@ using FunctionSHA1 = FunctionStringHashFixedString; using FunctionSHA224 = FunctionStringHashFixedString; using FunctionSHA256 = FunctionStringHashFixedString; using FunctionSipHash128 = FunctionStringHashFixedString; -using FunctionCityHash64 = FunctionNeighbourhoodHash64; -using FunctionFarmHash64 = FunctionNeighbourhoodHash64; -using FunctionMetroHash64 = FunctionNeighbourhoodHash64; -using FunctionMurmurHash2_32 = FunctionStringHash; -using FunctionMurmurHash2_64 = FunctionStringHash; -using FunctionMurmurHash3_32 = FunctionStringHash; -using FunctionMurmurHash3_64 = FunctionStringHash; +using FunctionCityHash64 = FunctionAnyHash; +using FunctionFarmHash64 = FunctionAnyHash; +using FunctionMetroHash64 = FunctionAnyHash; +using FunctionMurmurHash2_32 = FunctionAnyHash; +using FunctionMurmurHash2_64 = FunctionAnyHash; +using FunctionMurmurHash3_32 = FunctionAnyHash; +using FunctionMurmurHash3_64 = FunctionAnyHash; using FunctionMurmurHash3_128 = FunctionStringHashFixedString; } diff --git a/dbms/tests/queries/0_stateless/00678_murmurhash.reference b/dbms/tests/queries/0_stateless/00678_murmurhash.reference index 548c5c1cae6..f9d3ed38508 100644 --- a/dbms/tests/queries/0_stateless/00678_murmurhash.reference +++ b/dbms/tests/queries/0_stateless/00678_murmurhash.reference @@ -1,26 +1,26 @@ -623211862 -3533626746 -2388617433 -2708309598 +3012058918 +1298551497 +864444010 +367840556 2414502773 670491991 +1343103100 0 0 0 0 -0 -0 -0 -1 +1343103100 +1996614413 +kek 1 14834356025302342401 12725806677685968135 -12725806677685968135 +10577349846663553072 4138058784 3831157163 -3831157163 +1343103100 11303473983767132390 956517343494314387 -956517343494314387 +10577349846663553072 6145F501578671E2877DBA2BE487AF7E 16FE7483905CCE7A85670E43E4678877 diff --git a/dbms/tests/queries/0_stateless/00678_murmurhash.sql b/dbms/tests/queries/0_stateless/00678_murmurhash.sql index 9d20b56aa93..1b800c7eb9c 100644 --- a/dbms/tests/queries/0_stateless/00678_murmurhash.sql +++ b/dbms/tests/queries/0_stateless/00678_murmurhash.sql @@ -13,7 +13,6 @@ SELECT murmurHash2_32('\x03\0\0'); SELECT murmurHash2_32(1); SELECT murmurHash2_32(toUInt16(2)); -SELECT murmurHash2_32(2) = bitXor(toUInt32(0x5bd1e995 * bitXor(toUInt32(3 * 0x5bd1e995) AS a, bitShiftRight(a, 13))) AS b, bitShiftRight(b, 15)); SELECT murmurHash2_32('\x02') = bitXor(toUInt32(0x5bd1e995 * bitXor(toUInt32(3 * 0x5bd1e995) AS a, bitShiftRight(a, 13))) AS b, bitShiftRight(b, 15)); SELECT murmurHash2_64('foo'); diff --git a/dbms/tests/queries/0_stateless/00746_hashing_tuples.reference b/dbms/tests/queries/0_stateless/00746_hashing_tuples.reference new file mode 100644 index 00000000000..b38b216a8d6 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00746_hashing_tuples.reference @@ -0,0 +1,12 @@ +6847376565456338547 +15499510486101262177 +3822366986039497337 +617416965 +3293554683 +15433379 +6847376565456338547 +15499510486101262177 +3137889964064254064 +6847376565456338547 +15499510486101262177 +6284898493105666575 diff --git a/dbms/tests/queries/0_stateless/00746_hashing_tuples.sql b/dbms/tests/queries/0_stateless/00746_hashing_tuples.sql new file mode 100644 index 00000000000..ce26225d730 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00746_hashing_tuples.sql @@ -0,0 +1,15 @@ +SELECT sipHash64(1, 2, 3); +SELECT sipHash64(1, 3, 2); +SELECT sipHash64('a', [1, 2, 3], 4); + +SELECT murmurHash2_32(1, 2, 3); +SELECT murmurHash2_32(1, 3, 2); +SELECT murmurHash2_32('a', [1, 2, 3], 4); + +SELECT murmurHash2_64(1, 2, 3); +SELECT murmurHash2_64(1, 3, 2); +SELECT murmurHash2_64('a', [1, 2, 3], 4); + +SELECT murmurHash3_64(1, 2, 3); +SELECT murmurHash3_64(1, 3, 2); +SELECT murmurHash3_64('a', [1, 2, 3], 4); From ef5c65c3dd30111385e399aa9ccb6e48e39b8a8d Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Tue, 23 Oct 2018 20:48:01 +0300 Subject: [PATCH 03/79] fix test --- dbms/tests/queries/0_stateless/00678_murmurhash.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00678_murmurhash.reference b/dbms/tests/queries/0_stateless/00678_murmurhash.reference index f9d3ed38508..2699ce220f5 100644 --- a/dbms/tests/queries/0_stateless/00678_murmurhash.reference +++ b/dbms/tests/queries/0_stateless/00678_murmurhash.reference @@ -11,7 +11,6 @@ 0 1343103100 1996614413 -kek 1 14834356025302342401 12725806677685968135 From 68f2bea4ba177b478c1c3277e4e33fc236b3f832 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Tue, 23 Oct 2018 21:07:20 +0300 Subject: [PATCH 04/79] fix style --- dbms/src/Functions/FunctionsHashing.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index 84b61062398..2c645b2f37c 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -173,7 +173,6 @@ struct SHA256Impl struct SipHash64Impl { static constexpr auto name = "sipHash64"; - using ReturnType = UInt64; static UInt64 apply(const char * begin, size_t size) @@ -181,7 +180,8 @@ struct SipHash64Impl return sipHash64(begin, size); } - static UInt64 mergeHashes(UInt64 h1, UInt64 h2){ + static UInt64 mergeHashes(UInt64 h1, UInt64 h2) + { return IntHash64Impl::apply(h1) ^ h2; } }; @@ -635,7 +635,6 @@ struct MurmurHash2Impl32 struct MurmurHash2Impl64 { static constexpr auto name = "murmurHash2_64"; - using ReturnType = UInt64; static UInt64 apply(const char * data, const size_t size) @@ -643,7 +642,8 @@ struct MurmurHash2Impl64 return MurmurHash64A(data, size, 0); } - static UInt64 mergeHashes(UInt64 h1, UInt64 h2){ + static UInt64 mergeHashes(UInt64 h1, UInt64 h2) + { return IntHash64Impl::apply(h1) ^ h2; } }; @@ -651,7 +651,6 @@ struct MurmurHash2Impl64 struct MurmurHash3Impl32 { static constexpr auto name = "murmurHash3_32"; - using ReturnType = UInt32; static UInt32 apply(const char * data, const size_t size) From 3cf2177268d8d44274c29ded086360bf86b7ed5e Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 22 Oct 2018 16:54:54 +0800 Subject: [PATCH 05/79] Flexible fields assignment Field f; before: f = Int64(-42); after: f = -42; --- .../AggregateFunctionHistogram.h | 6 +- dbms/src/Columns/ColumnLowCardinality.cpp | 2 +- dbms/src/Columns/ColumnNullable.cpp | 4 +- dbms/src/Columns/ColumnVector.cpp | 4 +- dbms/src/Columns/ColumnVector.h | 4 +- dbms/src/Core/Field.h | 64 +++++++++++++------ .../DataStreams/FilterBlockInputStream.cpp | 6 +- .../GraphiteRollupSortedBlockInputStream.cpp | 2 +- dbms/src/DataTypes/DataTypeEnum.cpp | 4 +- .../Dictionaries/DictionaryBlockInputStream.h | 2 +- .../Dictionaries/MySQLBlockInputStream.cpp | 24 +++---- .../src/Dictionaries/ODBCBlockInputStream.cpp | 24 +++---- .../RangeDictionaryBlockInputStream.h | 2 +- dbms/src/Dictionaries/TrieDictionary.cpp | 2 +- dbms/src/Formats/CapnProtoRowInputStream.cpp | 10 +-- dbms/src/Functions/FunctionsComparison.h | 4 +- .../Functions/FunctionsExternalDictionaries.h | 2 +- dbms/src/Functions/FunctionsGeo.h | 2 +- dbms/src/Functions/GatherUtils/Algorithms.h | 4 +- dbms/src/Functions/arrayAll.cpp | 2 +- dbms/src/Functions/arrayCount.cpp | 2 +- dbms/src/Functions/arrayElement.cpp | 2 +- dbms/src/Functions/arrayExists.cpp | 2 +- dbms/src/Functions/arrayFirstIndex.cpp | 2 +- dbms/src/Functions/arrayIndex.h | 2 +- dbms/src/Functions/arrayIntersect.cpp | 2 +- dbms/src/Functions/getSizeOfEnumType.cpp | 4 +- dbms/src/Functions/hasColumnInTable.cpp | 2 +- dbms/src/Functions/ignore.cpp | 2 +- dbms/src/Functions/indexHint.cpp | 2 +- dbms/src/Functions/isNotNull.cpp | 2 +- dbms/src/Functions/isNull.cpp | 2 +- dbms/src/Functions/sleep.h | 2 +- dbms/src/Functions/timeSlots.cpp | 2 +- dbms/src/Functions/today.cpp | 2 +- dbms/src/Functions/version.cpp | 2 +- dbms/src/Functions/yesterday.cpp | 2 +- dbms/src/Interpreters/Aggregator.h | 2 +- dbms/src/Interpreters/DDLWorker.cpp | 8 +-- .../Interpreters/InterpreterCheckQuery.cpp | 2 +- .../Interpreters/InterpreterCreateQuery.cpp | 2 +- .../InterpreterKillQueryQuery.cpp | 2 +- dbms/src/Interpreters/PartLog.cpp | 22 ++++--- dbms/src/Interpreters/QueryLog.cpp | 39 ++++++----- dbms/src/Interpreters/QueryThreadLog.cpp | 30 ++++----- dbms/src/Interpreters/convertFieldToType.cpp | 12 ++-- .../evaluateConstantExpression.cpp | 2 +- dbms/src/Storages/AlterCommands.cpp | 6 +- dbms/src/Storages/Kafka/StorageKafka.cpp | 2 +- .../MergeTreeBaseBlockInputStream.cpp | 2 +- .../Storages/MergeTree/MergeTreeDataPart.h | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../Storages/MergeTree/MergeTreePartition.h | 2 +- .../MergeTree/MergeTreeRangeReader.cpp | 2 +- ...rageSystemAggregateFunctionCombinators.cpp | 2 +- .../System/StorageSystemBuildOptions.cpp | 4 +- .../Storages/System/StorageSystemClusters.cpp | 10 +-- .../Storages/System/StorageSystemColumns.cpp | 6 +- .../System/StorageSystemDataTypeFamilies.cpp | 4 +- .../System/StorageSystemDictionaries.cpp | 6 +- .../Storages/System/StorageSystemEvents.cpp | 4 +- .../System/StorageSystemFunctions.cpp | 4 +- .../Storages/System/StorageSystemGraphite.cpp | 14 ++-- .../System/StorageSystemMergeTreeSettings.cpp | 4 +- .../Storages/System/StorageSystemMetrics.cpp | 4 +- .../System/StorageSystemMutations.cpp | 2 +- dbms/src/Storages/System/StorageSystemOne.cpp | 2 +- .../Storages/System/StorageSystemParts.cpp | 24 +++---- .../System/StorageSystemPartsColumns.cpp | 40 ++++++------ .../System/StorageSystemProcesses.cpp | 22 +++---- .../Storages/System/StorageSystemReplicas.cpp | 36 +++++------ .../System/StorageSystemReplicationQueue.cpp | 14 ++-- .../Storages/System/StorageSystemSettings.cpp | 6 +- .../Storages/System/StorageSystemTables.cpp | 4 +- .../System/StorageSystemZooKeeper.cpp | 18 +++--- libs/libdaemon/src/OwnSplitChannel.cpp | 8 +-- 76 files changed, 308 insertions(+), 279 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h b/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h index b532a373270..123ecbf76cc 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -237,13 +237,13 @@ public: for (size_t i = 0; i < size; ++i) { - to_lower.insert((i == 0) ? lower_bound : (points[i].mean + points[i - 1].mean) / 2); - to_upper.insert((i + 1 == size) ? upper_bound : (points[i].mean + points[i + 1].mean) / 2); + to_lower.insertValue((i == 0) ? lower_bound : (points[i].mean + points[i - 1].mean) / 2); + to_upper.insertValue((i + 1 == size) ? upper_bound : (points[i].mean + points[i + 1].mean) / 2); // linear density approximation Weight lower_weight = (i == 0) ? points[i].weight : ((points[i - 1].weight) + points[i].weight * 3) / 4; Weight upper_weight = (i + 1 == size) ? points[i].weight : (points[i + 1].weight + points[i].weight * 3) / 4; - to_weights.insert((lower_weight + upper_weight) / 2); + to_weights.insertValue((lower_weight + upper_weight) / 2); } } diff --git a/dbms/src/Columns/ColumnLowCardinality.cpp b/dbms/src/Columns/ColumnLowCardinality.cpp index 45fd9a0da50..873ecfa0613 100644 --- a/dbms/src/Columns/ColumnLowCardinality.cpp +++ b/dbms/src/Columns/ColumnLowCardinality.cpp @@ -527,7 +527,7 @@ void ColumnLowCardinality::Index::insertPosition(UInt64 position) while (position > getMaxPositionForCurrentType()) expandType(); - positions->assumeMutableRef().insert(UInt64(position)); + positions->assumeMutableRef().insert(position); checkSizeOfType(); } diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index 1045f6ed2cb..10a4519bf73 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -353,8 +353,8 @@ void getExtremesFromNullableContent(const ColumnVector & col, const NullMap & if (has_not_null) { - min = typename NearestFieldType::Type(cur_min); - max = typename NearestFieldType::Type(cur_max); + min = cur_min; + max = cur_max; } } diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index a58bcd4eaa5..7d30759c844 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -279,8 +279,8 @@ void ColumnVector::getExtremes(Field & min, Field & max) const if (size == 0) { - min = typename NearestFieldType::Type(0); - max = typename NearestFieldType::Type(0); + min = T(0); + max = T(0); return; } diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index b1e89e413c9..c0c2020ba20 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -193,7 +193,7 @@ public: return data.allocated_bytes(); } - void insert(const T value) + void insertValue(const T value) { data.push_back(value); } @@ -217,7 +217,7 @@ public: Field operator[](size_t n) const override { - return typename NearestFieldType::Type(data[n]); + return data[n]; } void get(size_t n, Field & res) const override diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h index ca8bd2fb234..0fd92733409 100644 --- a/dbms/src/Core/Field.h +++ b/dbms/src/Core/Field.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include @@ -181,10 +183,7 @@ public: } template - Field(T && rhs, std::integral_constant>::value> * = nullptr) - { - createConcrete(std::forward(rhs)); - } + Field(T && rhs, std::enable_if_t, Field>, void *> = nullptr); /// Create a string inplace. Field(const char * data, size_t size) @@ -242,18 +241,7 @@ public: template std::enable_if_t, Field>, Field &> - operator= (T && rhs) - { - if (which != TypeToEnum>::value) - { - destroy(); - createConcrete(std::forward(rhs)); - } - else - assignConcrete(std::forward(rhs)); - - return *this; - } + operator= (T && rhs); ~Field() { @@ -596,7 +584,9 @@ template <> struct NearestFieldType { using Type = UInt64; }; template <> struct NearestFieldType { using Type = UInt64; }; template <> struct NearestFieldType { using Type = UInt64; }; template <> struct NearestFieldType { using Type = UInt64; }; +template <> struct NearestFieldType { using Type = UInt64; }; template <> struct NearestFieldType { using Type = UInt128; }; +template <> struct NearestFieldType { using Type = UInt128; }; template <> struct NearestFieldType { using Type = Int64; }; template <> struct NearestFieldType { using Type = Int64; }; template <> struct NearestFieldType { using Type = Int64; }; @@ -605,19 +595,57 @@ template <> struct NearestFieldType { using Type = Int128; }; template <> struct NearestFieldType { using Type = DecimalField; }; template <> struct NearestFieldType { using Type = DecimalField; }; template <> struct NearestFieldType { using Type = DecimalField; }; +template <> struct NearestFieldType> { using Type = DecimalField; }; +template <> struct NearestFieldType> { using Type = DecimalField; }; +template <> struct NearestFieldType> { using Type = DecimalField; }; template <> struct NearestFieldType { using Type = Float64; }; template <> struct NearestFieldType { using Type = Float64; }; +template <> struct NearestFieldType { using Type = String; }; template <> struct NearestFieldType { using Type = String; }; template <> struct NearestFieldType { using Type = Array; }; template <> struct NearestFieldType { using Type = Tuple; }; template <> struct NearestFieldType { using Type = UInt64; }; template <> struct NearestFieldType { using Type = Null; }; +template +decltype(auto) nearestFieldType(T && x) +{ + using U = typename NearestFieldType>::Type; + if constexpr (std::is_same_v, U>) + return std::forward(x); + else + return U(x); +} + +/// This (rather tricky) code is to avoid ambiguity in expressions like +/// Field f = 1; +/// instead of +/// Field f = Int64(1); +/// Things to note: +/// 1. float <--> int needs explicit cast +/// 2. customized types needs explicit cast +template +Field::Field(T && rhs, std::enable_if_t, Field>, void *>) +{ + auto && val = nearestFieldType(std::forward(rhs)); + createConcrete(std::forward(val)); +} template -typename NearestFieldType::Type nearestFieldType(const T & x) +std::enable_if_t, Field>, Field &> +Field::operator= (T && rhs) { - return typename NearestFieldType::Type(x); + auto && val = nearestFieldType(std::forward(rhs)); + using U = decltype(val); + if (which != TypeToEnum>::value) + { + destroy(); + createConcrete(std::forward(val)); + } + else + assignConcrete(std::forward(val)); + + return *this; } diff --git a/dbms/src/DataStreams/FilterBlockInputStream.cpp b/dbms/src/DataStreams/FilterBlockInputStream.cpp index 39d0d0c9615..24a429aaf3a 100644 --- a/dbms/src/DataStreams/FilterBlockInputStream.cpp +++ b/dbms/src/DataStreams/FilterBlockInputStream.cpp @@ -39,7 +39,7 @@ FilterBlockInputStream::FilterBlockInputStream(const BlockInputStreamPtr & input { /// Replace the filter column to a constant with value 1. FilterDescription filter_description_check(*column_elem.column); - column_elem.column = column_elem.type->createColumnConst(header.rows(), UInt64(1)); + column_elem.column = column_elem.type->createColumnConst(header.rows(), 1u); } if (remove_filter) @@ -144,7 +144,7 @@ Block FilterBlockInputStream::readImpl() if (filtered_rows == filter_and_holder.data->size()) { /// Replace the column with the filter by a constant. - res.safeGetByPosition(filter_column).column = res.safeGetByPosition(filter_column).type->createColumnConst(filtered_rows, UInt64(1)); + res.safeGetByPosition(filter_column).column = res.safeGetByPosition(filter_column).type->createColumnConst(filtered_rows, 1u); /// No need to touch the rest of the columns. return removeFilterIfNeed(std::move(res)); } @@ -161,7 +161,7 @@ Block FilterBlockInputStream::readImpl() /// Example: /// SELECT materialize(100) AS x WHERE x /// will work incorrectly. - current_column.column = current_column.type->createColumnConst(filtered_rows, UInt64(1)); + current_column.column = current_column.type->createColumnConst(filtered_rows, 1u); continue; } diff --git a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp index a2740dabdaa..dc30a3e7a07 100644 --- a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp @@ -251,7 +251,7 @@ void GraphiteRollupSortedBlockInputStream::startNextGroup(MutableColumns & merge void GraphiteRollupSortedBlockInputStream::finishCurrentGroup(MutableColumns & merged_columns) { /// Insert calculated values of the columns `time`, `value`, `version`. - merged_columns[time_column_num]->insert(UInt64(current_time_rounded)); + merged_columns[time_column_num]->insert(current_time_rounded); merged_columns[version_column_num]->insertFrom( *(*current_subgroup_newest_row.columns)[version_column_num], current_subgroup_newest_row.row_num); diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index edd3b797602..aacb403fb9e 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -225,7 +225,7 @@ void DataTypeEnum::deserializeBinaryBulk( template Field DataTypeEnum::getDefault() const { - return typename NearestFieldType::Type(values.front().second); + return values.front().second; } template @@ -293,7 +293,7 @@ Field DataTypeEnum::castToValue(const Field & value_or_name) const { if (value_or_name.getType() == Field::Types::String) { - return static_cast(getValue(value_or_name.get())); + return getValue(value_or_name.get()); } else if (value_or_name.getType() == Field::Types::Int64 || value_or_name.getType() == Field::Types::UInt64) diff --git a/dbms/src/Dictionaries/DictionaryBlockInputStream.h b/dbms/src/Dictionaries/DictionaryBlockInputStream.h index 83491e5a699..8f9a3a10d35 100644 --- a/dbms/src/Dictionaries/DictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/DictionaryBlockInputStream.h @@ -464,7 +464,7 @@ ColumnPtr DictionaryBlockInputStream::getColumnFromIds(cons auto column_vector = ColumnVector::create(); column_vector->getData().reserve(ids_to_fill.size()); for (UInt64 id : ids_to_fill) - column_vector->insert(id); + column_vector->insertValue(id); return column_vector; } diff --git a/dbms/src/Dictionaries/MySQLBlockInputStream.cpp b/dbms/src/Dictionaries/MySQLBlockInputStream.cpp index ecd6da11f4d..41318e78b83 100644 --- a/dbms/src/Dictionaries/MySQLBlockInputStream.cpp +++ b/dbms/src/Dictionaries/MySQLBlockInputStream.cpp @@ -42,19 +42,19 @@ namespace { switch (type) { - case ValueType::UInt8: static_cast(column).insert(value.getUInt()); break; - case ValueType::UInt16: static_cast(column).insert(value.getUInt()); break; - case ValueType::UInt32: static_cast(column).insert(value.getUInt()); break; - case ValueType::UInt64: static_cast(column).insert(value.getUInt()); break; - case ValueType::Int8: static_cast(column).insert(value.getInt()); break; - case ValueType::Int16: static_cast(column).insert(value.getInt()); break; - case ValueType::Int32: static_cast(column).insert(value.getInt()); break; - case ValueType::Int64: static_cast(column).insert(value.getInt()); break; - case ValueType::Float32: static_cast(column).insert(value.getDouble()); break; - case ValueType::Float64: static_cast(column).insert(value.getDouble()); break; + case ValueType::UInt8: static_cast(column).insertValue(value.getUInt()); break; + case ValueType::UInt16: static_cast(column).insertValue(value.getUInt()); break; + case ValueType::UInt32: static_cast(column).insertValue(value.getUInt()); break; + case ValueType::UInt64: static_cast(column).insertValue(value.getUInt()); break; + case ValueType::Int8: static_cast(column).insertValue(value.getInt()); break; + case ValueType::Int16: static_cast(column).insertValue(value.getInt()); break; + case ValueType::Int32: static_cast(column).insertValue(value.getInt()); break; + case ValueType::Int64: static_cast(column).insertValue(value.getInt()); break; + case ValueType::Float32: static_cast(column).insertValue(value.getDouble()); break; + case ValueType::Float64: static_cast(column).insertValue(value.getDouble()); break; case ValueType::String: static_cast(column).insertData(value.data(), value.size()); break; - case ValueType::Date: static_cast(column).insert(UInt16{value.getDate().getDayNum()}); break; - case ValueType::DateTime: static_cast(column).insert(time_t{value.getDateTime()}); break; + case ValueType::Date: static_cast(column).insertValue(UInt16(value.getDate().getDayNum())); break; + case ValueType::DateTime: static_cast(column).insertValue(UInt32(value.getDateTime())); break; case ValueType::UUID: static_cast(column).insert(parse(value.data(), value.size())); break; } } diff --git a/dbms/src/Dictionaries/ODBCBlockInputStream.cpp b/dbms/src/Dictionaries/ODBCBlockInputStream.cpp index 2bf1d0756eb..85f727963d6 100644 --- a/dbms/src/Dictionaries/ODBCBlockInputStream.cpp +++ b/dbms/src/Dictionaries/ODBCBlockInputStream.cpp @@ -48,19 +48,19 @@ namespace { switch (type) { - case ValueType::UInt8: static_cast(column).insert(value.convert()); break; - case ValueType::UInt16: static_cast(column).insert(value.convert()); break; - case ValueType::UInt32: static_cast(column).insert(value.convert()); break; - case ValueType::UInt64: static_cast(column).insert(value.convert()); break; - case ValueType::Int8: static_cast(column).insert(value.convert()); break; - case ValueType::Int16: static_cast(column).insert(value.convert()); break; - case ValueType::Int32: static_cast(column).insert(value.convert()); break; - case ValueType::Int64: static_cast(column).insert(value.convert()); break; - case ValueType::Float32: static_cast(column).insert(value.convert()); break; - case ValueType::Float64: static_cast(column).insert(value.convert()); break; + case ValueType::UInt8: static_cast(column).insertValue(value.convert()); break; + case ValueType::UInt16: static_cast(column).insertValue(value.convert()); break; + case ValueType::UInt32: static_cast(column).insertValue(value.convert()); break; + case ValueType::UInt64: static_cast(column).insertValue(value.convert()); break; + case ValueType::Int8: static_cast(column).insertValue(value.convert()); break; + case ValueType::Int16: static_cast(column).insertValue(value.convert()); break; + case ValueType::Int32: static_cast(column).insertValue(value.convert()); break; + case ValueType::Int64: static_cast(column).insertValue(value.convert()); break; + case ValueType::Float32: static_cast(column).insertValue(value.convert()); break; + case ValueType::Float64: static_cast(column).insertValue(value.convert()); break; case ValueType::String: static_cast(column).insert(value.convert()); break; - case ValueType::Date: static_cast(column).insert(UInt16{LocalDate{value.convert()}.getDayNum()}); break; - case ValueType::DateTime: static_cast(column).insert(time_t{LocalDateTime{value.convert()}}); break; + case ValueType::Date: static_cast(column).insertValue(UInt16{LocalDate{value.convert()}.getDayNum()}); break; + case ValueType::DateTime: static_cast(column).insertValue(time_t{LocalDateTime{value.convert()}}); break; case ValueType::UUID: static_cast(column).insert(parse(value.convert())); break; } diff --git a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h index 8847a9927f5..a6c31cf7a9a 100644 --- a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h @@ -141,7 +141,7 @@ ColumnPtr RangeDictionaryBlockInputStream::getCo auto column_vector = ColumnVector::create(); column_vector->getData().reserve(array.size()); for (T value : array) - column_vector->insert(value); + column_vector->insertValue(value); return column_vector; } diff --git a/dbms/src/Dictionaries/TrieDictionary.cpp b/dbms/src/Dictionaries/TrieDictionary.cpp index 3792a5fb8c8..768b09dc010 100644 --- a/dbms/src/Dictionaries/TrieDictionary.cpp +++ b/dbms/src/Dictionaries/TrieDictionary.cpp @@ -624,7 +624,7 @@ Columns TrieDictionary::getKeyColumns() const ip_array[1] = Poco::ByteOrder::fromNetwork(ip_array[1]); std::swap(ip_array[0], ip_array[1]); ip_column->insertData(reinterpret_cast(ip_array), IPV6_BINARY_LENGTH); - mask_column->insert(static_cast(mask)); + mask_column->insertValue(static_cast(mask)); }; trieTraverse(trie, std::move(getter)); diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index 710ad2f08ac..8ed9d882a2e 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -46,13 +46,13 @@ Field convertNodeToField(capnp::DynamicValue::Reader value) case capnp::DynamicValue::VOID: return Field(); case capnp::DynamicValue::BOOL: - return UInt64(value.as() ? 1 : 0); + return value.as() ? 1u : 0u; case capnp::DynamicValue::INT: - return Int64((value.as())); + return value.as(); case capnp::DynamicValue::UINT: - return UInt64(value.as()); + return value.as(); case capnp::DynamicValue::FLOAT: - return Float64(value.as()); + return value.as(); case capnp::DynamicValue::TEXT: { auto arr = value.as(); @@ -73,7 +73,7 @@ Field convertNodeToField(capnp::DynamicValue::Reader value) return res; } case capnp::DynamicValue::ENUM: - return UInt64(value.as().getRaw()); + return value.as().getRaw(); case capnp::DynamicValue::STRUCT: { auto structValue = value.as(); diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 3f7f6f39008..15d253e7788 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -835,7 +835,7 @@ private: if (!in.eof()) throw Exception("String is too long for Date: " + string_value.toString()); - ColumnPtr parsed_const_date_holder = DataTypeDate().createColumnConst(input_rows_count, UInt64(date)); + ColumnPtr parsed_const_date_holder = DataTypeDate().createColumnConst(input_rows_count, date); const ColumnConst * parsed_const_date = static_cast(parsed_const_date_holder.get()); executeNumLeftType(block, result, left_is_num ? col_left_untyped : parsed_const_date, @@ -863,7 +863,7 @@ private: if (!in.eof()) throw Exception("String is too long for UUID: " + string_value.toString()); - ColumnPtr parsed_const_uuid_holder = DataTypeUUID().createColumnConst(input_rows_count, UInt128(uuid)); + ColumnPtr parsed_const_uuid_holder = DataTypeUUID().createColumnConst(input_rows_count, uuid); const ColumnConst * parsed_const_uuid = static_cast(parsed_const_uuid_holder.get()); executeNumLeftType(block, result, left_is_num ? col_left_untyped : parsed_const_uuid, diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index fceb68be0ee..62f35ba8e9f 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -1445,7 +1445,7 @@ private: UInt8 res = 0; dictionary->isInConstantConstant(child_id, ancestor_id, res); - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(child_id_col->size(), UInt64(res)); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(child_id_col->size(), res); } else throw Exception{"Illegal column " + ancestor_id_col_untyped->getName() diff --git a/dbms/src/Functions/FunctionsGeo.h b/dbms/src/Functions/FunctionsGeo.h index 79e11959e18..456198b9e0d 100644 --- a/dbms/src/Functions/FunctionsGeo.h +++ b/dbms/src/Functions/FunctionsGeo.h @@ -293,7 +293,7 @@ private: const auto col_const_y = static_cast (col_y); size_t start_index = 0; UInt8 res = isPointInEllipses(col_const_x->getValue(), col_const_y->getValue(), ellipses, ellipses_count, start_index); - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(size, UInt64(res)); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(size, res); } else { diff --git a/dbms/src/Functions/GatherUtils/Algorithms.h b/dbms/src/Functions/GatherUtils/Algorithms.h index e4027d54cdf..67cfa132e74 100644 --- a/dbms/src/Functions/GatherUtils/Algorithms.h +++ b/dbms/src/Functions/GatherUtils/Algorithms.h @@ -79,7 +79,7 @@ inline ALWAYS_INLINE void writeSlice(const NumericArraySlice & slice, Generic { for (size_t i = 0; i < slice.size; ++i) { - Field field = static_cast::Type>(slice.data[i]); + Field field = T(slice.data[i]); sink.elements.insert(field); } sink.current_offset += slice.size; @@ -147,7 +147,7 @@ inline ALWAYS_INLINE void writeSlice(const GenericValueSlice & slice, NumericArr template inline ALWAYS_INLINE void writeSlice(const NumericValueSlice & slice, GenericArraySink & sink) { - Field field = static_cast::Type>(slice.value); + Field field = T(slice.value); sink.elements.insert(field); ++sink.current_offset; } diff --git a/dbms/src/Functions/arrayAll.cpp b/dbms/src/Functions/arrayAll.cpp index fc92a2c35b7..240039e8bfb 100644 --- a/dbms/src/Functions/arrayAll.cpp +++ b/dbms/src/Functions/arrayAll.cpp @@ -33,7 +33,7 @@ struct ArrayAllImpl throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); if (column_filter_const->getValue()) - return DataTypeUInt8().createColumnConst(array.size(), UInt64(1)); + return DataTypeUInt8().createColumnConst(array.size(), 1u); else { const IColumn::Offsets & offsets = array.getOffsets(); diff --git a/dbms/src/Functions/arrayCount.cpp b/dbms/src/Functions/arrayCount.cpp index 18c045a9469..9b20f4f7609 100644 --- a/dbms/src/Functions/arrayCount.cpp +++ b/dbms/src/Functions/arrayCount.cpp @@ -48,7 +48,7 @@ struct ArrayCountImpl return out_column; } else - return DataTypeUInt32().createColumnConst(array.size(), UInt64(0)); + return DataTypeUInt32().createColumnConst(array.size(), 0u); } const IColumn::Filter & filter = column_filter->getData(); diff --git a/dbms/src/Functions/arrayElement.cpp b/dbms/src/Functions/arrayElement.cpp index ed3511fdb3e..c6ba66cba11 100644 --- a/dbms/src/Functions/arrayElement.cpp +++ b/dbms/src/Functions/arrayElement.cpp @@ -856,7 +856,7 @@ void FunctionArrayElement::perform(Block & block, const ColumnNumbers & argument if (builder) builder.initSink(input_rows_count); - if (index == UInt64(0)) + if (index == 0u) throw Exception("Array indices is 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); if (!( executeNumberConst(block, arguments, result, index, builder) diff --git a/dbms/src/Functions/arrayExists.cpp b/dbms/src/Functions/arrayExists.cpp index 64ec1c86a96..4ee187674bc 100644 --- a/dbms/src/Functions/arrayExists.cpp +++ b/dbms/src/Functions/arrayExists.cpp @@ -48,7 +48,7 @@ struct ArrayExistsImpl return out_column; } else - return DataTypeUInt8().createColumnConst(array.size(), UInt64(0)); + return DataTypeUInt8().createColumnConst(array.size(), 0u); } const IColumn::Filter & filter = column_filter->getData(); diff --git a/dbms/src/Functions/arrayFirstIndex.cpp b/dbms/src/Functions/arrayFirstIndex.cpp index cb4dc1289f5..5a0e291f5a1 100644 --- a/dbms/src/Functions/arrayFirstIndex.cpp +++ b/dbms/src/Functions/arrayFirstIndex.cpp @@ -45,7 +45,7 @@ struct ArrayFirstIndexImpl return out_column; } else - return DataTypeUInt32().createColumnConst(array.size(), UInt64(0)); + return DataTypeUInt32().createColumnConst(array.size(), 0u); } const auto & filter = column_filter->getData(); diff --git a/dbms/src/Functions/arrayIndex.h b/dbms/src/Functions/arrayIndex.h index 82e5f32fb67..c4861184699 100644 --- a/dbms/src/Functions/arrayIndex.h +++ b/dbms/src/Functions/arrayIndex.h @@ -751,7 +751,7 @@ private: block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst( item_arg->size(), - static_cast::Type>(current)); + static_cast(current)); } else { diff --git a/dbms/src/Functions/arrayIntersect.cpp b/dbms/src/Functions/arrayIntersect.cpp index 85678c10f1d..b646d5d89b2 100644 --- a/dbms/src/Functions/arrayIntersect.cpp +++ b/dbms/src/Functions/arrayIntersect.cpp @@ -429,7 +429,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable { ++result_offset; if constexpr (is_numeric_column) - result_data.insert(pair.first); + result_data.insertValue(pair.first); else if constexpr (std::is_same::value || std::is_same::value) result_data.insertData(pair.first.data, pair.first.size); else diff --git a/dbms/src/Functions/getSizeOfEnumType.cpp b/dbms/src/Functions/getSizeOfEnumType.cpp index 4aef8492243..59f2144446f 100644 --- a/dbms/src/Functions/getSizeOfEnumType.cpp +++ b/dbms/src/Functions/getSizeOfEnumType.cpp @@ -51,9 +51,9 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { if (auto type = checkAndGetDataType(block.getByPosition(arguments[0]).type.get())) - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, UInt64(type->getValues().size())); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, type->getValues().size()); else if (auto type = checkAndGetDataType(block.getByPosition(arguments[0]).type.get())) - block.getByPosition(result).column = DataTypeUInt16().createColumnConst(input_rows_count, UInt64(type->getValues().size())); + block.getByPosition(result).column = DataTypeUInt16().createColumnConst(input_rows_count, type->getValues().size()); else throw Exception("The argument for function " + getName() + " must be Enum", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } diff --git a/dbms/src/Functions/hasColumnInTable.cpp b/dbms/src/Functions/hasColumnInTable.cpp index 5d539281bf6..1039cd1b70b 100644 --- a/dbms/src/Functions/hasColumnInTable.cpp +++ b/dbms/src/Functions/hasColumnInTable.cpp @@ -132,7 +132,7 @@ void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers & has_column = remote_columns.hasPhysical(column_name); } - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, UInt64(has_column)); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, has_column); } diff --git a/dbms/src/Functions/ignore.cpp b/dbms/src/Functions/ignore.cpp index 09a7ac2653f..73aaea9f3ca 100644 --- a/dbms/src/Functions/ignore.cpp +++ b/dbms/src/Functions/ignore.cpp @@ -40,7 +40,7 @@ public: void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, UInt64(0)); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 0u); } }; diff --git a/dbms/src/Functions/indexHint.cpp b/dbms/src/Functions/indexHint.cpp index 2b1e2608df6..04057854b16 100644 --- a/dbms/src/Functions/indexHint.cpp +++ b/dbms/src/Functions/indexHint.cpp @@ -50,7 +50,7 @@ public: void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, UInt64(1)); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 1u); } }; diff --git a/dbms/src/Functions/isNotNull.cpp b/dbms/src/Functions/isNotNull.cpp index fe45d054cb9..019654053da 100644 --- a/dbms/src/Functions/isNotNull.cpp +++ b/dbms/src/Functions/isNotNull.cpp @@ -53,7 +53,7 @@ public: else { /// Since no element is nullable, return a constant one. - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(elem.column->size(), UInt64(1)); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(elem.column->size(), 1u); } } }; diff --git a/dbms/src/Functions/isNull.cpp b/dbms/src/Functions/isNull.cpp index 2d77f4d5c79..9feb8a8afae 100644 --- a/dbms/src/Functions/isNull.cpp +++ b/dbms/src/Functions/isNull.cpp @@ -47,7 +47,7 @@ public: { /// Since no element is nullable, return a zero-constant column representing /// a zero-filled null map. - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(elem.column->size(), UInt64(0)); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(elem.column->size(), 0u); } } }; diff --git a/dbms/src/Functions/sleep.h b/dbms/src/Functions/sleep.h index 1a6c9024486..1c492f61f69 100644 --- a/dbms/src/Functions/sleep.h +++ b/dbms/src/Functions/sleep.h @@ -92,7 +92,7 @@ public: } /// convertToFullColumn needed, because otherwise (constant expression case) function will not get called on each block. - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, UInt64(0))->convertToFullColumnIfConst(); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, 0u)->convertToFullColumnIfConst(); } }; diff --git a/dbms/src/Functions/timeSlots.cpp b/dbms/src/Functions/timeSlots.cpp index 07afc2894e1..e2de8cac702 100644 --- a/dbms/src/Functions/timeSlots.cpp +++ b/dbms/src/Functions/timeSlots.cpp @@ -103,7 +103,7 @@ struct TimeSlotsImpl Array & result) { for (UInt32 value = start / TIME_SLOT_SIZE; value <= (start + duration) / TIME_SLOT_SIZE; ++value) - result.push_back(static_cast(value * TIME_SLOT_SIZE)); + result.push_back(value * TIME_SLOT_SIZE); } }; diff --git a/dbms/src/Functions/today.cpp b/dbms/src/Functions/today.cpp index 7a72f4d547f..c74bc000292 100644 --- a/dbms/src/Functions/today.cpp +++ b/dbms/src/Functions/today.cpp @@ -33,7 +33,7 @@ public: { block.getByPosition(result).column = DataTypeDate().createColumnConst( input_rows_count, - UInt64(DateLUT::instance().toDayNum(time(nullptr)))); + DateLUT::instance().toDayNum(time(nullptr))); } }; diff --git a/dbms/src/Functions/version.cpp b/dbms/src/Functions/version.cpp index 3467788a96c..cde0266a15e 100644 --- a/dbms/src/Functions/version.cpp +++ b/dbms/src/Functions/version.cpp @@ -35,7 +35,7 @@ public: void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - block.getByPosition(result).column = DataTypeString().createColumnConst(input_rows_count, String(VERSION_STRING)); + block.getByPosition(result).column = DataTypeString().createColumnConst(input_rows_count, VERSION_STRING); } }; diff --git a/dbms/src/Functions/yesterday.cpp b/dbms/src/Functions/yesterday.cpp index 506f84c863c..393a3a45361 100644 --- a/dbms/src/Functions/yesterday.cpp +++ b/dbms/src/Functions/yesterday.cpp @@ -33,7 +33,7 @@ public: { block.getByPosition(result).column = DataTypeDate().createColumnConst( input_rows_count, - UInt64(DateLUT::instance().toDayNum(time(nullptr)) - 1)); + DateLUT::instance().toDayNum(time(nullptr)) - 1); } }; diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 1d1a86e8a9e..5e0972dd3f1 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -817,7 +817,7 @@ struct AggregationMethodKeysFixed size_t bucket = i / 8; size_t offset = i % 8; UInt8 val = (reinterpret_cast(&value.first)[bucket] >> offset) & 1; - null_map->insert(val); + null_map->insertValue(val); is_null = val == 1; } else diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index 8fa02c91f6a..35445eb5566 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -1059,11 +1059,11 @@ public: ++num_hosts_finished; columns[0]->insert(host); - columns[1]->insert(static_cast(port)); - columns[2]->insert(static_cast(status.code)); + columns[1]->insert(port); + columns[2]->insert(status.code); columns[3]->insert(status.message); - columns[4]->insert(static_cast(waiting_hosts.size() - num_hosts_finished)); - columns[5]->insert(static_cast(current_active_hosts.size())); + columns[4]->insert(waiting_hosts.size() - num_hosts_finished); + columns[5]->insert(current_active_hosts.size()); } res = sample.cloneWithColumns(std::move(columns)); } diff --git a/dbms/src/Interpreters/InterpreterCheckQuery.cpp b/dbms/src/Interpreters/InterpreterCheckQuery.cpp index bb8a1d46143..84788fd0685 100644 --- a/dbms/src/Interpreters/InterpreterCheckQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCheckQuery.cpp @@ -26,7 +26,7 @@ BlockIO InterpreterCheckQuery::execute() StoragePtr table = context.getTable(database_name, table_name); auto column = ColumnUInt8::create(); - column->insert(UInt64(table->checkData())); + column->insertValue(UInt64(table->checkData())); result = Block{{ std::move(column), std::make_shared(), "result" }}; BlockIO res; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index c1c4c6f0706..fd9270430d5 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -211,7 +211,7 @@ static ColumnsAndDefaults parseColumns(const ASTExpressionList & column_list_ast default_expr_list->children.emplace_back(setAlias( makeASTFunction("CAST", std::make_shared(tmp_column_name), - std::make_shared(Field(data_type_ptr->getName()))), final_column_name)); + std::make_shared(data_type_ptr->getName())), final_column_name)); default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), tmp_column_name)); } else diff --git a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp index f0add31dc38..e304f33efbc 100644 --- a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -62,7 +62,7 @@ using QueryDescriptors = std::vector; static void insertResultRow(size_t n, CancellationCode code, const Block & source_processes, const Block & sample_block, MutableColumns & columns) { - columns[0]->insert(String(cancellationCodeToStatus(code))); + columns[0]->insert(cancellationCodeToStatus(code)); for (size_t col_num = 1, size = columns.size(); col_num < size; ++col_num) columns[col_num]->insertFrom(*source_processes.getByName(sample_block.getByPosition(col_num).name).column, n); diff --git a/dbms/src/Interpreters/PartLog.cpp b/dbms/src/Interpreters/PartLog.cpp index 116bbe0fef8..d32fe3983e1 100644 --- a/dbms/src/Interpreters/PartLog.cpp +++ b/dbms/src/Interpreters/PartLog.cpp @@ -15,6 +15,8 @@ namespace DB { +template <> struct NearestFieldType { using Type = UInt64; }; + Block PartLogElement::createBlock() { auto event_type_datatype = std::make_shared( @@ -60,18 +62,18 @@ void PartLogElement::appendToBlock(Block & block) const size_t i = 0; - columns[i++]->insert(Int64(event_type)); - columns[i++]->insert(UInt64(DateLUT::instance().toDayNum(event_time))); - columns[i++]->insert(UInt64(event_time)); - columns[i++]->insert(UInt64(duration_ms)); + columns[i++]->insert(event_type); + columns[i++]->insert(DateLUT::instance().toDayNum(event_time)); + columns[i++]->insert(event_time); + columns[i++]->insert(duration_ms); columns[i++]->insert(database_name); columns[i++]->insert(table_name); columns[i++]->insert(part_name); columns[i++]->insert(partition_id); - columns[i++]->insert(UInt64(rows)); - columns[i++]->insert(UInt64(bytes_compressed_on_disk)); + columns[i++]->insert(rows); + columns[i++]->insert(bytes_compressed_on_disk); Array source_part_names_array; source_part_names_array.reserve(source_part_names.size()); @@ -80,11 +82,11 @@ void PartLogElement::appendToBlock(Block & block) const columns[i++]->insert(source_part_names_array); - columns[i++]->insert(UInt64(bytes_uncompressed)); - columns[i++]->insert(UInt64(rows_read)); - columns[i++]->insert(UInt64(bytes_read_uncompressed)); + columns[i++]->insert(bytes_uncompressed); + columns[i++]->insert(rows_read); + columns[i++]->insert(bytes_read_uncompressed); - columns[i++]->insert(UInt64(error)); + columns[i++]->insert(error); columns[i++]->insert(exception); block.setColumns(std::move(columns)); diff --git a/dbms/src/Interpreters/QueryLog.cpp b/dbms/src/Interpreters/QueryLog.cpp index a005b63a6c3..08224f637f6 100644 --- a/dbms/src/Interpreters/QueryLog.cpp +++ b/dbms/src/Interpreters/QueryLog.cpp @@ -19,7 +19,6 @@ namespace DB { - Block QueryLogElement::createBlock() { return @@ -104,19 +103,19 @@ void QueryLogElement::appendToBlock(Block & block) const size_t i = 0; columns[i++]->insert(UInt64(type)); - columns[i++]->insert(UInt64(DateLUT::instance().toDayNum(event_time))); - columns[i++]->insert(UInt64(event_time)); - columns[i++]->insert(UInt64(query_start_time)); - columns[i++]->insert(UInt64(query_duration_ms)); + columns[i++]->insert(DateLUT::instance().toDayNum(event_time)); + columns[i++]->insert(event_time); + columns[i++]->insert(query_start_time); + columns[i++]->insert(query_duration_ms); - columns[i++]->insert(UInt64(read_rows)); - columns[i++]->insert(UInt64(read_bytes)); - columns[i++]->insert(UInt64(written_rows)); - columns[i++]->insert(UInt64(written_bytes)); - columns[i++]->insert(UInt64(result_rows)); - columns[i++]->insert(UInt64(result_bytes)); + columns[i++]->insert(read_rows); + columns[i++]->insert(read_bytes); + columns[i++]->insert(written_rows); + columns[i++]->insert(written_bytes); + columns[i++]->insert(result_rows); + columns[i++]->insert(result_bytes); - columns[i++]->insert(UInt64(memory_usage)); + columns[i++]->insert(memory_usage); columns[i++]->insertData(query.data(), query.size()); columns[i++]->insertData(exception.data(), exception.size()); @@ -124,7 +123,7 @@ void QueryLogElement::appendToBlock(Block & block) const appendClientInfo(client_info, columns, i); - columns[i++]->insert(UInt64(ClickHouseRevision::get())); + columns[i++]->insert(ClickHouseRevision::get()); { Array threads_array; @@ -163,27 +162,27 @@ void QueryLogElement::appendToBlock(Block & block) const void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i) { - columns[i++]->insert(UInt64(client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY)); + columns[i++]->insert(client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY); columns[i++]->insert(client_info.current_user); columns[i++]->insert(client_info.current_query_id); columns[i++]->insertData(IPv6ToBinary(client_info.current_address.host()).data(), 16); - columns[i++]->insert(UInt64(client_info.current_address.port())); + columns[i++]->insert(client_info.current_address.port()); columns[i++]->insert(client_info.initial_user); columns[i++]->insert(client_info.initial_query_id); columns[i++]->insertData(IPv6ToBinary(client_info.initial_address.host()).data(), 16); - columns[i++]->insert(UInt64(client_info.initial_address.port())); + columns[i++]->insert(client_info.initial_address.port()); columns[i++]->insert(UInt64(client_info.interface)); columns[i++]->insert(client_info.os_user); columns[i++]->insert(client_info.client_hostname); columns[i++]->insert(client_info.client_name); - columns[i++]->insert(UInt64(client_info.client_revision)); - columns[i++]->insert(UInt64(client_info.client_version_major)); - columns[i++]->insert(UInt64(client_info.client_version_minor)); - columns[i++]->insert(UInt64(client_info.client_version_patch)); + columns[i++]->insert(client_info.client_revision); + columns[i++]->insert(client_info.client_version_major); + columns[i++]->insert(client_info.client_version_minor); + columns[i++]->insert(client_info.client_version_patch); columns[i++]->insert(UInt64(client_info.http_method)); columns[i++]->insert(client_info.http_user_agent); diff --git a/dbms/src/Interpreters/QueryThreadLog.cpp b/dbms/src/Interpreters/QueryThreadLog.cpp index 8a84d814388..e421c7a9b78 100644 --- a/dbms/src/Interpreters/QueryThreadLog.cpp +++ b/dbms/src/Interpreters/QueryThreadLog.cpp @@ -75,30 +75,30 @@ void QueryThreadLogElement::appendToBlock(Block & block) const size_t i = 0; - columns[i++]->insert(UInt64(DateLUT::instance().toDayNum(event_time))); - columns[i++]->insert(UInt64(event_time)); - columns[i++]->insert(UInt64(query_start_time)); - columns[i++]->insert(UInt64(query_duration_ms)); + columns[i++]->insert(DateLUT::instance().toDayNum(event_time)); + columns[i++]->insert(event_time); + columns[i++]->insert(query_start_time); + columns[i++]->insert(query_duration_ms); - columns[i++]->insert(UInt64(read_rows)); - columns[i++]->insert(UInt64(read_bytes)); - columns[i++]->insert(UInt64(written_rows)); - columns[i++]->insert(UInt64(written_bytes)); + columns[i++]->insert(read_rows); + columns[i++]->insert(read_bytes); + columns[i++]->insert(written_rows); + columns[i++]->insert(written_bytes); - columns[i++]->insert(Int64(memory_usage)); - columns[i++]->insert(Int64(peak_memory_usage)); + columns[i++]->insert(memory_usage); + columns[i++]->insert(peak_memory_usage); columns[i++]->insertData(thread_name.data(), thread_name.size()); - columns[i++]->insert(UInt64(thread_number)); - columns[i++]->insert(Int64(os_thread_id)); - columns[i++]->insert(UInt64(master_thread_number)); - columns[i++]->insert(Int64(master_os_thread_id)); + columns[i++]->insert(thread_number); + columns[i++]->insert(os_thread_id); + columns[i++]->insert(master_thread_number); + columns[i++]->insert(master_os_thread_id); columns[i++]->insertData(query.data(), query.size()); QueryLogElement::appendClientInfo(client_info, columns, i); - columns[i++]->insert(UInt64(ClickHouseRevision::get())); + columns[i++]->insert(ClickHouseRevision::get()); if (profile_counters) { diff --git a/dbms/src/Interpreters/convertFieldToType.cpp b/dbms/src/Interpreters/convertFieldToType.cpp index 58b0c164c35..36e867afed0 100644 --- a/dbms/src/Interpreters/convertFieldToType.cpp +++ b/dbms/src/Interpreters/convertFieldToType.cpp @@ -58,7 +58,7 @@ static Field convertNumericTypeImpl(const Field & from) if (!accurate::equalsOp(value, To(value))) return {}; - return Field(typename NearestFieldType::Type(value)); + return To(value); } template @@ -86,7 +86,7 @@ static Field convertIntToDecimalType(const Field & from, const To & type) throw Exception("Number is too much to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); FieldType scaled_value = type.getScaleMultiplier() * value; - return Field(typename NearestFieldType::Type(scaled_value, type.getScale())); + return DecimalField(scaled_value, type.getScale()); } @@ -97,7 +97,7 @@ static Field convertStringToDecimalType(const Field & from, const DataTypeDecima const String & str_value = from.get(); T value = type.parseFromString(str_value); - return Field(typename NearestFieldType::Type(value, type.getScale())); + return DecimalField(value, type.getScale()); } @@ -150,11 +150,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID /// Conversion between Date and DateTime and vice versa. if (which_type.isDate() && which_from_type.isDateTime()) { - return UInt64(static_cast(*from_type_hint).getTimeZone().toDayNum(src.get())); + return static_cast(*from_type_hint).getTimeZone().toDayNum(src.get()); } else if (which_type.isDateTime() && which_from_type.isDate()) { - return UInt64(static_cast(type).getTimeZone().fromDayNum(DayNum(src.get()))); + return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); } else if (type.isValueRepresentedByNumber()) { @@ -184,7 +184,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (which_type.isDate()) { /// Convert 'YYYY-MM-DD' Strings to Date - return UInt64(stringToDate(src.get())); + return stringToDate(src.get()); } else if (which_type.isDateTime()) { diff --git a/dbms/src/Interpreters/evaluateConstantExpression.cpp b/dbms/src/Interpreters/evaluateConstantExpression.cpp index 3fee9a8e2a7..481ad8ee0b2 100644 --- a/dbms/src/Interpreters/evaluateConstantExpression.cpp +++ b/dbms/src/Interpreters/evaluateConstantExpression.cpp @@ -69,7 +69,7 @@ ASTPtr evaluateConstantExpressionAsLiteral(const ASTPtr & node, const Context & ASTPtr evaluateConstantExpressionOrIdentifierAsLiteral(const ASTPtr & node, const Context & context) { if (auto id = typeid_cast(node.get())) - return std::make_shared(Field(id->name)); + return std::make_shared(id->name); return evaluateConstantExpressionAsLiteral(node, context); } diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 9e6d525f685..08e24d98c0e 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -298,7 +298,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context) default_expr_list->children.emplace_back(setAlias( makeASTFunction("CAST", std::make_shared(tmp_column_name), - std::make_shared(Field(column_type_raw_ptr->getName()))), + std::make_shared(column_type_raw_ptr->getName())), final_column_name)); default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), tmp_column_name)); @@ -368,7 +368,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context) default_expr_list->children.emplace_back(setAlias( makeASTFunction("CAST", std::make_shared(tmp_column_name), - std::make_shared(Field(column_type_ptr->getName()))), + std::make_shared(column_type_ptr->getName())), column_name)); default_expr_list->children.emplace_back(setAlias(col_def.second.expression->clone(), tmp_column_name)); @@ -413,7 +413,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context) } command_ptr->default_expression = makeASTFunction("CAST", command_ptr->default_expression->clone(), - std::make_shared(Field(explicit_type->getName()))); + std::make_shared(explicit_type->getName())); } } else diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index fb55db8bbb9..9edb04fd1fa 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -162,7 +162,7 @@ public: : storage(storage_), consumer(nullptr), context(context_), max_block_size(max_block_size_) { // Always skip unknown fields regardless of the context (JSON or TSKV) - context.setSetting("input_format_skip_unknown_fields", UInt64(1)); + context.setSetting("input_format_skip_unknown_fields", 1u); if (schema.size() > 0) context.setSetting("format_schema", schema); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp index 303fce24c53..158cb64083f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp @@ -205,7 +205,7 @@ void MergeTreeBaseBlockInputStream::injectVirtualColumns(Block & block) const { ColumnPtr column; if (rows) - column = DataTypeUInt64().createColumnConst(rows, static_cast(task->part_index_in_query))->convertToFullColumnIfConst(); + column = DataTypeUInt64().createColumnConst(rows, task->part_index_in_query)->convertToFullColumnIfConst(); else column = DataTypeUInt64().createColumn(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index d714cf1b037..bec7422d7a1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -188,7 +188,7 @@ struct MergeTreeDataPart /// For month-based partitioning. MinMaxIndex(DayNum min_date, DayNum max_date) - : parallelogram(1, Range(static_cast(min_date), true, static_cast(max_date), true)) + : parallelogram(1, Range(min_date, true, max_date, true)) , initialized(true) { } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 6e2b2d3a20a..060540931bc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -843,7 +843,7 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition( auto function = std::make_shared(); auto arguments = std::make_shared(); auto sign = std::make_shared(data.merging_params.sign_column); - auto one = std::make_shared(Field(static_cast(1))); + auto one = std::make_shared(1); function->name = "equals"; function->arguments = arguments; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.h b/dbms/src/Storages/MergeTree/MergeTreePartition.h index cb5493cc8bd..a03f4addd81 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartition.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.h @@ -22,7 +22,7 @@ public: explicit MergeTreePartition(Row value_) : value(std::move(value_)) {} /// For month-based partitioning. - explicit MergeTreePartition(UInt32 yyyymm) : value(1, static_cast(yyyymm)) {} + explicit MergeTreePartition(UInt32 yyyymm) : value(1, yyyymm) {} String getID(const MergeTreeData & storage) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp index f6fc0807c07..532118e5e06 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -645,7 +645,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r if (remove_prewhere_column) result.block.erase(*prewhere_column_name); else - prewhere_column.column = prewhere_column.type->createColumnConst(getNumRows(), UInt64(1)); + prewhere_column.column = prewhere_column.type->createColumnConst(getNumRows(), 1u); /// If block is empty, create column in order to store rows number. if (last_reader_in_chain && result.block.columns() == 0) diff --git a/dbms/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp b/dbms/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp index 8fa335faceb..c0dd5cc85d3 100644 --- a/dbms/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp +++ b/dbms/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp @@ -18,7 +18,7 @@ void StorageSystemAggregateFunctionCombinators::fillData(MutableColumns & res_co for (const auto & pair : combinators) { res_columns[0]->insert(pair.first); - res_columns[1]->insert(UInt64(pair.second->isForInternalUsageOnly())); + res_columns[1]->insert(pair.second->isForInternalUsageOnly()); } } diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.cpp b/dbms/src/Storages/System/StorageSystemBuildOptions.cpp index 2a8ffc947be..11d59b0207d 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.cpp +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.cpp @@ -18,8 +18,8 @@ void StorageSystemBuildOptions::fillData(MutableColumns & res_columns, const Con { for (auto it = auto_config_build; *it; it += 2) { - res_columns[0]->insert(String(it[0])); - res_columns[1]->insert(String(it[1])); + res_columns[0]->insert(it[0]); + res_columns[1]->insert(it[1]); } } diff --git a/dbms/src/Storages/System/StorageSystemClusters.cpp b/dbms/src/Storages/System/StorageSystemClusters.cpp index 3527de302a1..b0ad56e8eb5 100644 --- a/dbms/src/Storages/System/StorageSystemClusters.cpp +++ b/dbms/src/Storages/System/StorageSystemClusters.cpp @@ -30,13 +30,13 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, const Context { size_t i = 0; res_columns[i++]->insert(cluster_name); - res_columns[i++]->insert(static_cast(shard_info.shard_num)); - res_columns[i++]->insert(static_cast(shard_info.weight)); - res_columns[i++]->insert(static_cast(address.replica_num)); + res_columns[i++]->insert(shard_info.shard_num); + res_columns[i++]->insert(shard_info.weight); + res_columns[i++]->insert(address.replica_num); res_columns[i++]->insert(address.host_name); res_columns[i++]->insert(DNSResolver::instance().resolveHost(address.host_name).toString()); - res_columns[i++]->insert(static_cast(address.port)); - res_columns[i++]->insert(static_cast(shard_info.isLocal())); + res_columns[i++]->insert(address.port); + res_columns[i++]->insert(shard_info.isLocal()); res_columns[i++]->insert(address.user); res_columns[i++]->insert(address.default_database); }; diff --git a/dbms/src/Storages/System/StorageSystemColumns.cpp b/dbms/src/Storages/System/StorageSystemColumns.cpp index 40802f16466..f0b1bbe5f96 100644 --- a/dbms/src/Storages/System/StorageSystemColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemColumns.cpp @@ -166,11 +166,11 @@ protected: else { if (columns_mask[src_index++]) - res_columns[res_index++]->insert(static_cast(it->second.data_compressed)); + res_columns[res_index++]->insert(it->second.data_compressed); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(static_cast(it->second.data_uncompressed)); + res_columns[res_index++]->insert(it->second.data_uncompressed); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(static_cast(it->second.marks)); + res_columns[res_index++]->insert(it->second.marks); } } diff --git a/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp b/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp index c8d692fddd8..824aa88550e 100644 --- a/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp +++ b/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp @@ -23,12 +23,12 @@ void StorageSystemDataTypeFamilies::fillData(MutableColumns & res_columns, const for (const auto & name : names) { res_columns[0]->insert(name); - res_columns[1]->insert(UInt64(factory.isCaseInsensitive(name))); + res_columns[1]->insert(factory.isCaseInsensitive(name)); if (factory.isAlias(name)) res_columns[2]->insert(factory.aliasTo(name)); else - res_columns[2]->insert(String("")); + res_columns[2]->insert(""); } } diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp index 665b992c829..defcb947632 100644 --- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp +++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp @@ -57,10 +57,10 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con res_columns[i++]->insert(dict_struct.getKeyDescription()); res_columns[i++]->insert(ext::map(dict_struct.attributes, [] (auto & attr) { return attr.name; })); res_columns[i++]->insert(ext::map(dict_struct.attributes, [] (auto & attr) { return attr.type->getName(); })); - res_columns[i++]->insert(static_cast(dict_ptr->getBytesAllocated())); - res_columns[i++]->insert(static_cast(dict_ptr->getQueryCount())); + res_columns[i++]->insert(dict_ptr->getBytesAllocated()); + res_columns[i++]->insert(dict_ptr->getQueryCount()); res_columns[i++]->insert(dict_ptr->getHitRate()); - res_columns[i++]->insert(static_cast(dict_ptr->getElementCount())); + res_columns[i++]->insert(dict_ptr->getElementCount()); res_columns[i++]->insert(dict_ptr->getLoadFactor()); res_columns[i++]->insert(static_cast(std::chrono::system_clock::to_time_t(dict_ptr->getCreationTime()))); res_columns[i++]->insert(dict_ptr->getSource()->toString()); diff --git a/dbms/src/Storages/System/StorageSystemEvents.cpp b/dbms/src/Storages/System/StorageSystemEvents.cpp index a91eb2d796d..6a0992af052 100644 --- a/dbms/src/Storages/System/StorageSystemEvents.cpp +++ b/dbms/src/Storages/System/StorageSystemEvents.cpp @@ -23,9 +23,9 @@ void StorageSystemEvents::fillData(MutableColumns & res_columns, const Context & if (0 != value) { - res_columns[0]->insert(String(ProfileEvents::getName(ProfileEvents::Event(i)))); + res_columns[0]->insert(ProfileEvents::getName(ProfileEvents::Event(i))); res_columns[1]->insert(value); - res_columns[2]->insert(String(ProfileEvents::getDocumentation(ProfileEvents::Event(i)))); + res_columns[2]->insert(ProfileEvents::getDocumentation(ProfileEvents::Event(i))); } } } diff --git a/dbms/src/Storages/System/StorageSystemFunctions.cpp b/dbms/src/Storages/System/StorageSystemFunctions.cpp index f63d0b9b932..1de904c3520 100644 --- a/dbms/src/Storages/System/StorageSystemFunctions.cpp +++ b/dbms/src/Storages/System/StorageSystemFunctions.cpp @@ -16,11 +16,11 @@ namespace { res_columns[0]->insert(name); res_columns[1]->insert(is_aggregate); - res_columns[2]->insert(UInt64(f.isCaseInsensitive(name))); + res_columns[2]->insert(f.isCaseInsensitive(name)); if (f.isAlias(name)) res_columns[3]->insert(f.aliasTo(name)); else - res_columns[3]->insert(String{}); + res_columns[3]->insert(""); } } diff --git a/dbms/src/Storages/System/StorageSystemGraphite.cpp b/dbms/src/Storages/System/StorageSystemGraphite.cpp index 7eab731bd12..8cd466c050e 100644 --- a/dbms/src/Storages/System/StorageSystemGraphite.cpp +++ b/dbms/src/Storages/System/StorageSystemGraphite.cpp @@ -150,13 +150,13 @@ void StorageSystemGraphite::fillData(MutableColumns & res_columns, const Context { for (const auto & ret : pattern.retentions) { - res_columns[0]->insert(Field(section)); - res_columns[1]->insert(Field(pattern.regexp)); - res_columns[2]->insert(Field(pattern.function)); - res_columns[3]->insert(nearestFieldType(ret.age)); - res_columns[4]->insert(nearestFieldType(ret.precision)); - res_columns[5]->insert(nearestFieldType(pattern.priority)); - res_columns[6]->insert(nearestFieldType(pattern.is_default)); + res_columns[0]->insert(section); + res_columns[1]->insert(pattern.regexp); + res_columns[2]->insert(pattern.function); + res_columns[3]->insert(ret.age); + res_columns[4]->insert(ret.precision); + res_columns[5]->insert(pattern.priority); + res_columns[6]->insert(pattern.is_default); } } } diff --git a/dbms/src/Storages/System/StorageSystemMergeTreeSettings.cpp b/dbms/src/Storages/System/StorageSystemMergeTreeSettings.cpp index e9d463770f3..485f490210b 100644 --- a/dbms/src/Storages/System/StorageSystemMergeTreeSettings.cpp +++ b/dbms/src/Storages/System/StorageSystemMergeTreeSettings.cpp @@ -21,9 +21,9 @@ void SystemMergeTreeSettings::fillData(MutableColumns & res_columns, const Conte const MergeTreeSettings & settings = context.getMergeTreeSettings(); #define ADD_SETTING(TYPE, NAME, DEFAULT) \ - res_columns[0]->insert(String(#NAME)); \ + res_columns[0]->insert(#NAME); \ res_columns[1]->insert(settings.NAME.toString()); \ - res_columns[2]->insert(UInt64(settings.NAME.changed)); + res_columns[2]->insert(settings.NAME.changed); APPLY_FOR_MERGE_TREE_SETTINGS(ADD_SETTING) #undef ADD_SETTING } diff --git a/dbms/src/Storages/System/StorageSystemMetrics.cpp b/dbms/src/Storages/System/StorageSystemMetrics.cpp index 4c889af1e42..b2332c52817 100644 --- a/dbms/src/Storages/System/StorageSystemMetrics.cpp +++ b/dbms/src/Storages/System/StorageSystemMetrics.cpp @@ -23,9 +23,9 @@ void StorageSystemMetrics::fillData(MutableColumns & res_columns, const Context { Int64 value = CurrentMetrics::values[i].load(std::memory_order_relaxed); - res_columns[0]->insert(String(CurrentMetrics::getName(CurrentMetrics::Metric(i)))); + res_columns[0]->insert(CurrentMetrics::getName(CurrentMetrics::Metric(i))); res_columns[1]->insert(value); - res_columns[2]->insert(String(CurrentMetrics::getDocumentation(CurrentMetrics::Metric(i)))); + res_columns[2]->insert(CurrentMetrics::getDocumentation(CurrentMetrics::Metric(i))); } } diff --git a/dbms/src/Storages/System/StorageSystemMutations.cpp b/dbms/src/Storages/System/StorageSystemMutations.cpp index 56f66511b72..51bdc94720f 100644 --- a/dbms/src/Storages/System/StorageSystemMutations.cpp +++ b/dbms/src/Storages/System/StorageSystemMutations.cpp @@ -119,7 +119,7 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, const Contex res_columns[col_num++]->insert(block_partition_ids); res_columns[col_num++]->insert(block_numbers); res_columns[col_num++]->insert(status.parts_to_do); - res_columns[col_num++]->insert(UInt64(status.is_done)); + res_columns[col_num++]->insert(status.is_done); } } } diff --git a/dbms/src/Storages/System/StorageSystemOne.cpp b/dbms/src/Storages/System/StorageSystemOne.cpp index 968fc4c2c6c..827ee5ca6a1 100644 --- a/dbms/src/Storages/System/StorageSystemOne.cpp +++ b/dbms/src/Storages/System/StorageSystemOne.cpp @@ -29,7 +29,7 @@ BlockInputStreams StorageSystemOne::read( return BlockInputStreams(1, std::make_shared( Block{ColumnWithTypeAndName( - DataTypeUInt8().createColumnConst(1, UInt64(0))->convertToFullColumnIfConst(), + DataTypeUInt8().createColumnConst(1, 0u)->convertToFullColumnIfConst(), std::make_shared(), "dummy")})); } diff --git a/dbms/src/Storages/System/StorageSystemParts.cpp b/dbms/src/Storages/System/StorageSystemParts.cpp index 71c15004e66..e46dec1c7b4 100644 --- a/dbms/src/Storages/System/StorageSystemParts.cpp +++ b/dbms/src/Storages/System/StorageSystemParts.cpp @@ -65,13 +65,13 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns, const Stor columns[i++]->insert(out.str()); } columns[i++]->insert(part->name); - columns[i++]->insert(static_cast(part_state == State::Committed)); - columns[i++]->insert(static_cast(part->marks_count)); - columns[i++]->insert(static_cast(part->rows_count)); - columns[i++]->insert(static_cast(part->bytes_on_disk)); - columns[i++]->insert(static_cast(columns_size.data_compressed)); - columns[i++]->insert(static_cast(columns_size.data_uncompressed)); - columns[i++]->insert(static_cast(columns_size.marks)); + columns[i++]->insert(part_state == State::Committed); + columns[i++]->insert(part->marks_count); + columns[i++]->insert(part->rows_count); + columns[i++]->insert(part->bytes_on_disk.load(std::memory_order_relaxed)); + columns[i++]->insert(columns_size.data_compressed); + columns[i++]->insert(columns_size.data_uncompressed); + columns[i++]->insert(columns_size.marks); columns[i++]->insert(static_cast(part->modification_time)); time_t remove_time = part->remove_time.load(std::memory_order_relaxed); @@ -80,15 +80,15 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns, const Stor /// For convenience, in returned refcount, don't add references that was due to local variables in this method: all_parts, active_parts. columns[i++]->insert(static_cast(part.use_count() - 1)); - columns[i++]->insert(static_cast(part->getMinDate())); - columns[i++]->insert(static_cast(part->getMaxDate())); + columns[i++]->insert(part->getMinDate()); + columns[i++]->insert(part->getMaxDate()); columns[i++]->insert(part->info.partition_id); columns[i++]->insert(part->info.min_block); columns[i++]->insert(part->info.max_block); - columns[i++]->insert(static_cast(part->info.level)); + columns[i++]->insert(part->info.level); columns[i++]->insert(static_cast(part->info.getDataVersion())); - columns[i++]->insert(static_cast(part->getIndexSizeInBytes())); - columns[i++]->insert(static_cast(part->getIndexSizeInAllocatedBytes())); + columns[i++]->insert(part->getIndexSizeInBytes()); + columns[i++]->insert(part->getIndexSizeInAllocatedBytes()); columns[i++]->insert(info.database); columns[i++]->insert(info.table); diff --git a/dbms/src/Storages/System/StorageSystemPartsColumns.cpp b/dbms/src/Storages/System/StorageSystemPartsColumns.cpp index 435b3dd287f..108bf1b6e96 100644 --- a/dbms/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemPartsColumns.cpp @@ -110,28 +110,28 @@ void StorageSystemPartsColumns::processNextStorage(MutableColumns & columns, con columns[j++]->insert(out.str()); } columns[j++]->insert(part->name); - columns[j++]->insert(static_cast(part_state == State::Committed)); - columns[j++]->insert(static_cast(part->marks_count)); + columns[j++]->insert(part_state == State::Committed); + columns[j++]->insert(part->marks_count); - columns[j++]->insert(static_cast(part->rows_count)); - columns[j++]->insert(static_cast(part->bytes_on_disk)); - columns[j++]->insert(static_cast(columns_size.data_compressed)); - columns[j++]->insert(static_cast(columns_size.data_uncompressed)); - columns[j++]->insert(static_cast(columns_size.marks)); - columns[j++]->insert(static_cast(part->modification_time)); - columns[j++]->insert(static_cast(part->remove_time.load(std::memory_order_relaxed))); + columns[j++]->insert(part->rows_count); + columns[j++]->insert(part->bytes_on_disk.load(std::memory_order_relaxed)); + columns[j++]->insert(columns_size.data_compressed); + columns[j++]->insert(columns_size.data_uncompressed); + columns[j++]->insert(columns_size.marks); + columns[j++]->insert(UInt64(part->modification_time)); + columns[j++]->insert(UInt64(part->remove_time.load(std::memory_order_relaxed))); - columns[j++]->insert(static_cast(use_count)); + columns[j++]->insert(UInt64(use_count)); - columns[j++]->insert(static_cast(min_date)); - columns[j++]->insert(static_cast(max_date)); + columns[j++]->insert(min_date); + columns[j++]->insert(max_date); columns[j++]->insert(part->info.partition_id); columns[j++]->insert(part->info.min_block); columns[j++]->insert(part->info.max_block); - columns[j++]->insert(static_cast(part->info.level)); - columns[j++]->insert(static_cast(part->info.getDataVersion())); - columns[j++]->insert(static_cast(index_size_in_bytes)); - columns[j++]->insert(static_cast(index_size_in_allocated_bytes)); + columns[j++]->insert(part->info.level); + columns[j++]->insert(UInt64(part->info.getDataVersion())); + columns[j++]->insert(index_size_in_bytes); + columns[j++]->insert(index_size_in_allocated_bytes); columns[j++]->insert(info.database); columns[j++]->insert(info.table); @@ -153,10 +153,10 @@ void StorageSystemPartsColumns::processNextStorage(MutableColumns & columns, con } MergeTreeDataPart::ColumnSize column_size = part->getColumnSize(column.name, *column.type); - columns[j++]->insert(static_cast(column_size.data_compressed + column_size.marks)); - columns[j++]->insert(static_cast(column_size.data_compressed)); - columns[j++]->insert(static_cast(column_size.data_uncompressed)); - columns[j++]->insert(static_cast(column_size.marks)); + columns[j++]->insert(column_size.data_compressed + column_size.marks); + columns[j++]->insert(column_size.data_compressed); + columns[j++]->insert(column_size.data_uncompressed); + columns[j++]->insert(column_size.marks); if (has_state_column) columns[j++]->insert(part->stateString()); diff --git a/dbms/src/Storages/System/StorageSystemProcesses.cpp b/dbms/src/Storages/System/StorageSystemProcesses.cpp index 6f5d27fef12..9c121d7911e 100644 --- a/dbms/src/Storages/System/StorageSystemProcesses.cpp +++ b/dbms/src/Storages/System/StorageSystemProcesses.cpp @@ -71,15 +71,15 @@ void StorageSystemProcesses::fillData(MutableColumns & res_columns, const Contex for (const auto & process : info) { size_t i = 0; - res_columns[i++]->insert(UInt64(process.client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY)); + res_columns[i++]->insert(process.client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY); res_columns[i++]->insert(process.client_info.current_user); res_columns[i++]->insert(process.client_info.current_query_id); res_columns[i++]->insert(process.client_info.current_address.host().toString()); - res_columns[i++]->insert(UInt64(process.client_info.current_address.port())); + res_columns[i++]->insert(process.client_info.current_address.port()); res_columns[i++]->insert(process.client_info.initial_user); res_columns[i++]->insert(process.client_info.initial_query_id); res_columns[i++]->insert(process.client_info.initial_address.host().toString()); - res_columns[i++]->insert(UInt64(process.client_info.initial_address.port())); + res_columns[i++]->insert(process.client_info.initial_address.port()); res_columns[i++]->insert(UInt64(process.client_info.interface)); res_columns[i++]->insert(process.client_info.os_user); res_columns[i++]->insert(process.client_info.client_hostname); @@ -87,17 +87,17 @@ void StorageSystemProcesses::fillData(MutableColumns & res_columns, const Contex res_columns[i++]->insert(process.client_info.client_version_major); res_columns[i++]->insert(process.client_info.client_version_minor); res_columns[i++]->insert(process.client_info.client_version_patch); - res_columns[i++]->insert(UInt64(process.client_info.client_revision)); + res_columns[i++]->insert(process.client_info.client_revision); res_columns[i++]->insert(UInt64(process.client_info.http_method)); res_columns[i++]->insert(process.client_info.http_user_agent); res_columns[i++]->insert(process.client_info.quota_key); res_columns[i++]->insert(process.elapsed_seconds); - res_columns[i++]->insert(UInt64(process.is_cancelled)); - res_columns[i++]->insert(UInt64(process.read_rows)); - res_columns[i++]->insert(UInt64(process.read_bytes)); - res_columns[i++]->insert(UInt64(process.total_rows)); - res_columns[i++]->insert(UInt64(process.written_rows)); - res_columns[i++]->insert(UInt64(process.written_bytes)); + res_columns[i++]->insert(process.is_cancelled); + res_columns[i++]->insert(process.read_rows); + res_columns[i++]->insert(process.read_bytes); + res_columns[i++]->insert(process.total_rows); + res_columns[i++]->insert(process.written_rows); + res_columns[i++]->insert(process.written_bytes); res_columns[i++]->insert(process.memory_usage); res_columns[i++]->insert(process.peak_memory_usage); res_columns[i++]->insert(process.query); @@ -106,7 +106,7 @@ void StorageSystemProcesses::fillData(MutableColumns & res_columns, const Contex Array threads_array; threads_array.reserve(process.thread_numbers.size()); for (const UInt32 thread_number : process.thread_numbers) - threads_array.emplace_back(UInt64(thread_number)); + threads_array.emplace_back(thread_number); res_columns[i++]->insert(threads_array); } diff --git a/dbms/src/Storages/System/StorageSystemReplicas.cpp b/dbms/src/Storages/System/StorageSystemReplicas.cpp index 2d29a8a5de6..2c1bb8f9d31 100644 --- a/dbms/src/Storages/System/StorageSystemReplicas.cpp +++ b/dbms/src/Storages/System/StorageSystemReplicas.cpp @@ -136,32 +136,32 @@ BlockInputStreams StorageSystemReplicas::read( [(*col_table)[i].safeGet()]).getStatus(status, with_zk_fields); size_t col_num = 3; - res_columns[col_num++]->insert(UInt64(status.is_leader)); - res_columns[col_num++]->insert(UInt64(status.is_readonly)); - res_columns[col_num++]->insert(UInt64(status.is_session_expired)); - res_columns[col_num++]->insert(UInt64(status.queue.future_parts)); - res_columns[col_num++]->insert(UInt64(status.parts_to_check)); + res_columns[col_num++]->insert(status.is_leader); + res_columns[col_num++]->insert(status.is_readonly); + res_columns[col_num++]->insert(status.is_session_expired); + res_columns[col_num++]->insert(status.queue.future_parts); + res_columns[col_num++]->insert(status.parts_to_check); res_columns[col_num++]->insert(status.zookeeper_path); res_columns[col_num++]->insert(status.replica_name); res_columns[col_num++]->insert(status.replica_path); - res_columns[col_num++]->insert(Int64(status.columns_version)); - res_columns[col_num++]->insert(UInt64(status.queue.queue_size)); - res_columns[col_num++]->insert(UInt64(status.queue.inserts_in_queue)); - res_columns[col_num++]->insert(UInt64(status.queue.merges_in_queue)); - res_columns[col_num++]->insert(UInt64(status.queue.part_mutations_in_queue)); - res_columns[col_num++]->insert(UInt64(status.queue.queue_oldest_time)); - res_columns[col_num++]->insert(UInt64(status.queue.inserts_oldest_time)); - res_columns[col_num++]->insert(UInt64(status.queue.merges_oldest_time)); - res_columns[col_num++]->insert(UInt64(status.queue.part_mutations_oldest_time)); + res_columns[col_num++]->insert(status.columns_version); + res_columns[col_num++]->insert(status.queue.queue_size); + res_columns[col_num++]->insert(status.queue.inserts_in_queue); + res_columns[col_num++]->insert(status.queue.merges_in_queue); + res_columns[col_num++]->insert(status.queue.part_mutations_in_queue); + res_columns[col_num++]->insert(status.queue.queue_oldest_time); + res_columns[col_num++]->insert(status.queue.inserts_oldest_time); + res_columns[col_num++]->insert(status.queue.merges_oldest_time); + res_columns[col_num++]->insert(status.queue.part_mutations_oldest_time); res_columns[col_num++]->insert(status.queue.oldest_part_to_get); res_columns[col_num++]->insert(status.queue.oldest_part_to_merge_to); res_columns[col_num++]->insert(status.queue.oldest_part_to_mutate_to); res_columns[col_num++]->insert(status.log_max_index); res_columns[col_num++]->insert(status.log_pointer); - res_columns[col_num++]->insert(UInt64(status.queue.last_queue_update)); - res_columns[col_num++]->insert(UInt64(status.absolute_delay)); - res_columns[col_num++]->insert(UInt64(status.total_replicas)); - res_columns[col_num++]->insert(UInt64(status.active_replicas)); + res_columns[col_num++]->insert(status.queue.last_queue_update); + res_columns[col_num++]->insert(status.absolute_delay); + res_columns[col_num++]->insert(status.total_replicas); + res_columns[col_num++]->insert(status.active_replicas); } Block res = getSampleBlock().cloneEmpty(); diff --git a/dbms/src/Storages/System/StorageSystemReplicationQueue.cpp b/dbms/src/Storages/System/StorageSystemReplicationQueue.cpp index 30a32cff9c9..4ac81521b0b 100644 --- a/dbms/src/Storages/System/StorageSystemReplicationQueue.cpp +++ b/dbms/src/Storages/System/StorageSystemReplicationQueue.cpp @@ -115,20 +115,20 @@ void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, const res_columns[col_num++]->insert(database); res_columns[col_num++]->insert(table); res_columns[col_num++]->insert(replica_name); - res_columns[col_num++]->insert(UInt64(j)); + res_columns[col_num++]->insert(j); res_columns[col_num++]->insert(entry.znode_name); res_columns[col_num++]->insert(entry.typeToString()); - res_columns[col_num++]->insert(UInt64(entry.create_time)); - res_columns[col_num++]->insert(UInt64(entry.quorum)); + res_columns[col_num++]->insert(entry.create_time); + res_columns[col_num++]->insert(entry.quorum); res_columns[col_num++]->insert(entry.source_replica); res_columns[col_num++]->insert(entry.new_part_name); res_columns[col_num++]->insert(parts_to_merge); - res_columns[col_num++]->insert(UInt64(entry.detach)); - res_columns[col_num++]->insert(UInt64(entry.currently_executing)); - res_columns[col_num++]->insert(UInt64(entry.num_tries)); + res_columns[col_num++]->insert(entry.detach); + res_columns[col_num++]->insert(entry.currently_executing); + res_columns[col_num++]->insert(entry.num_tries); res_columns[col_num++]->insert(entry.exception ? getExceptionMessage(entry.exception, false) : ""); res_columns[col_num++]->insert(UInt64(entry.last_attempt_time)); - res_columns[col_num++]->insert(UInt64(entry.num_postponed)); + res_columns[col_num++]->insert(entry.num_postponed); res_columns[col_num++]->insert(entry.postpone_reason); res_columns[col_num++]->insert(UInt64(entry.last_postpone_time)); } diff --git a/dbms/src/Storages/System/StorageSystemSettings.cpp b/dbms/src/Storages/System/StorageSystemSettings.cpp index 09790ac2fcb..5f958e3553f 100644 --- a/dbms/src/Storages/System/StorageSystemSettings.cpp +++ b/dbms/src/Storages/System/StorageSystemSettings.cpp @@ -26,10 +26,10 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, const Context const Settings & settings = context.getSettingsRef(); #define ADD_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ - res_columns[0]->insert(String(#NAME)); \ + res_columns[0]->insert(#NAME); \ res_columns[1]->insert(settings.NAME.toString()); \ - res_columns[2]->insert(UInt64(settings.NAME.changed)); \ - res_columns[3]->insert(String(DESCRIPTION)); + res_columns[2]->insert(settings.NAME.changed); \ + res_columns[3]->insert(DESCRIPTION); APPLY_FOR_SETTINGS(ADD_SETTING) #undef ADD_SETTING } diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index 21c1c756bd6..8ba44b1d4ad 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -122,7 +122,7 @@ protected: res_columns[res_index++]->insert(table.second->getName()); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(UInt64(1)); + res_columns[res_index++]->insert(1u); if (columns_mask[src_index++]) res_columns[res_index++]->insertDefault(); @@ -173,7 +173,7 @@ protected: res_columns[res_index++]->insert(tables_it->table()->getName()); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(UInt64(0)); + res_columns[res_index++]->insert(0u); if (columns_mask[src_index++]) res_columns[res_index++]->insert(tables_it->table()->getDataPath()); diff --git a/dbms/src/Storages/System/StorageSystemZooKeeper.cpp b/dbms/src/Storages/System/StorageSystemZooKeeper.cpp index da24cfbbed1..db498d620d3 100644 --- a/dbms/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/dbms/src/Storages/System/StorageSystemZooKeeper.cpp @@ -134,17 +134,17 @@ void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, const Contex size_t col_num = 0; res_columns[col_num++]->insert(nodes[i]); res_columns[col_num++]->insert(res.data); - res_columns[col_num++]->insert(Int64(stat.czxid)); - res_columns[col_num++]->insert(Int64(stat.mzxid)); + res_columns[col_num++]->insert(stat.czxid); + res_columns[col_num++]->insert(stat.mzxid); res_columns[col_num++]->insert(UInt64(stat.ctime / 1000)); res_columns[col_num++]->insert(UInt64(stat.mtime / 1000)); - res_columns[col_num++]->insert(Int64(stat.version)); - res_columns[col_num++]->insert(Int64(stat.cversion)); - res_columns[col_num++]->insert(Int64(stat.aversion)); - res_columns[col_num++]->insert(Int64(stat.ephemeralOwner)); - res_columns[col_num++]->insert(Int64(stat.dataLength)); - res_columns[col_num++]->insert(Int64(stat.numChildren)); - res_columns[col_num++]->insert(Int64(stat.pzxid)); + res_columns[col_num++]->insert(stat.version); + res_columns[col_num++]->insert(stat.cversion); + res_columns[col_num++]->insert(stat.aversion); + res_columns[col_num++]->insert(stat.ephemeralOwner); + res_columns[col_num++]->insert(stat.dataLength); + res_columns[col_num++]->insert(stat.numChildren); + res_columns[col_num++]->insert(stat.pzxid); res_columns[col_num++]->insert(path); /// This is the original path. In order to process the request, condition in WHERE should be triggered. } } diff --git a/libs/libdaemon/src/OwnSplitChannel.cpp b/libs/libdaemon/src/OwnSplitChannel.cpp index e6b2cb22013..93f73085ab4 100644 --- a/libs/libdaemon/src/OwnSplitChannel.cpp +++ b/libs/libdaemon/src/OwnSplitChannel.cpp @@ -40,12 +40,12 @@ void OwnSplitChannel::log(const Poco::Message & msg) MutableColumns columns = InternalTextLogsQueue::getSampleColumns(); size_t i = 0; - columns[i++]->insert(static_cast(msg_ext.time_seconds)); - columns[i++]->insert(static_cast(msg_ext.time_microseconds)); + columns[i++]->insert(msg_ext.time_seconds); + columns[i++]->insert(msg_ext.time_microseconds); columns[i++]->insert(DNSResolver::instance().getHostName()); columns[i++]->insert(msg_ext.query_id); - columns[i++]->insert(static_cast(msg_ext.thread_number)); - columns[i++]->insert(static_cast(msg.getPriority())); + columns[i++]->insert(msg_ext.thread_number); + columns[i++]->insert(Int64(msg.getPriority())); columns[i++]->insert(msg.getSource()); columns[i++]->insert(msg.getText()); From 06e955e92a10067e45b7120fe265fbe124d0be62 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Wed, 24 Oct 2018 16:12:59 +0300 Subject: [PATCH 06/79] fix combine hashes --- dbms/src/Functions/FunctionsHashing.h | 38 ++++++++++++++------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index 2c645b2f37c..04fe4069775 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -108,9 +108,10 @@ struct HalfMD5Impl return Poco::ByteOrder::flipBytes(buf.uint64_data); /// Compatibility with existing code. } - static UInt64 mergeHashes(UInt64 h1, UInt64 h2) + static UInt64 combineHashes(UInt64 h1, UInt64 h2) { - return IntHash64Impl::apply(h1) ^ h2; + UInt64 hashes[] = {h1, h2}; + return apply(reinterpret_cast(hashes), 16); } }; @@ -180,9 +181,10 @@ struct SipHash64Impl return sipHash64(begin, size); } - static UInt64 mergeHashes(UInt64 h1, UInt64 h2) + static UInt64 combineHashes(UInt64 h1, UInt64 h2) { - return IntHash64Impl::apply(h1) ^ h2; + UInt64 hashes[] = {h1, h2}; + return apply(reinterpret_cast(hashes), 16); } }; @@ -364,7 +366,7 @@ private: if (first) vec_to[i] = h; else - vec_to[i] = Impl::mergeHashes(vec_to[i], h); + vec_to[i] = Impl::combineHashes(vec_to[i], h); } } else if (auto col_from = checkAndGetColumnConst>(column)) @@ -384,7 +386,7 @@ private: else { for (size_t i = 0; i < size; ++i) - vec_to[i] = Impl::mergeHashes(vec_to[i], hash); + vec_to[i] = Impl::combineHashes(vec_to[i], hash); } } else @@ -412,7 +414,7 @@ private: if (first) vec_to[i] = h; else - vec_to[i] = Impl::mergeHashes(vec_to[i], h); + vec_to[i] = Impl::combineHashes(vec_to[i], h); current_offset = offsets[i]; } @@ -429,7 +431,7 @@ private: if (first) vec_to[i] = h; else - vec_to[i] = Impl::mergeHashes(vec_to[i], h); + vec_to[i] = Impl::combineHashes(vec_to[i], h); } } else if (const ColumnConst * col_from = checkAndGetColumnConstStringOrFixedString(column)) @@ -446,7 +448,7 @@ private: { for (size_t i = 0; i < size; ++i) { - vec_to[i] = Impl::mergeHashes(vec_to[i], hash); + vec_to[i] = Impl::combineHashes(vec_to[i], hash); } } } @@ -486,10 +488,10 @@ private: if (first) vec_to[i] = h; else - vec_to[i] = Impl::mergeHashes(vec_to[i], h); + vec_to[i] = Impl::combineHashes(vec_to[i], h); for (size_t j = current_offset; j < next_offset; ++j) - vec_to[i] = Impl::mergeHashes(vec_to[i], vec_temp[j]); + vec_to[i] = Impl::combineHashes(vec_to[i], vec_temp[j]); current_offset = offsets[i]; } @@ -626,7 +628,7 @@ struct MurmurHash2Impl32 return MurmurHash2(data, size, 0); } - static UInt32 mergeHashes(UInt32 h1, UInt32 h2) + static UInt32 combineHashes(UInt32 h1, UInt32 h2) { return IntHash32Impl::apply(h1) ^ h2; } @@ -642,7 +644,7 @@ struct MurmurHash2Impl64 return MurmurHash64A(data, size, 0); } - static UInt64 mergeHashes(UInt64 h1, UInt64 h2) + static UInt64 combineHashes(UInt64 h1, UInt64 h2) { return IntHash64Impl::apply(h1) ^ h2; } @@ -664,7 +666,7 @@ struct MurmurHash3Impl32 return h; } - static UInt32 mergeHashes(UInt32 h1, UInt32 h2) + static UInt32 combineHashes(UInt32 h1, UInt32 h2) { return IntHash32Impl::apply(h1) ^ h2; } @@ -686,7 +688,7 @@ struct MurmurHash3Impl64 return h[0] ^ h[1]; } - static UInt64 mergeHashes(UInt64 h1, UInt64 h2) + static UInt64 combineHashes(UInt64 h1, UInt64 h2) { return IntHash64Impl::apply(h1) ^ h2; } @@ -903,7 +905,7 @@ struct ImplCityHash64 using ReturnType = UInt64; using uint128_t = CityHash_v1_0_2::uint128; - static auto mergeHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } + static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } static auto apply(const char * s, const size_t len) { return CityHash_v1_0_2::CityHash64(s, len); } }; @@ -914,7 +916,7 @@ struct ImplFarmHash64 using ReturnType = UInt64; using uint128_t = NAMESPACE_FOR_HASH_FUNCTIONS::uint128_t; - static auto mergeHashes(UInt64 h1, UInt64 h2) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash128to64(uint128_t(h1, h2)); } + static auto combineHashes(UInt64 h1, UInt64 h2) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash128to64(uint128_t(h1, h2)); } static auto apply(const char * s, const size_t len) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(s, len); } }; @@ -924,7 +926,7 @@ struct ImplMetroHash64 using ReturnType = UInt64; using uint128_t = CityHash_v1_0_2::uint128; - static auto mergeHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } + static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } static auto apply(const char * s, const size_t len) { union From 89808b89b6ee276f312a9eb0dd9df184fde68e9c Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Wed, 24 Oct 2018 16:27:49 +0300 Subject: [PATCH 07/79] update test --- .../0_stateless/00746_hashing_tuples.reference | 15 +++++++++------ .../queries/0_stateless/00746_hashing_tuples.sql | 12 ++++++++---- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00746_hashing_tuples.reference b/dbms/tests/queries/0_stateless/00746_hashing_tuples.reference index b38b216a8d6..391a59e012f 100644 --- a/dbms/tests/queries/0_stateless/00746_hashing_tuples.reference +++ b/dbms/tests/queries/0_stateless/00746_hashing_tuples.reference @@ -1,12 +1,15 @@ -6847376565456338547 -15499510486101262177 -3822366986039497337 +8732148587615156034 +3856459458360415155 +1993857991550209231 +5465424717626995012 +15495040516566687427 +13266110974878256384 617416965 3293554683 -15433379 +4210800467 6847376565456338547 15499510486101262177 -3137889964064254064 +13552202417419166072 6847376565456338547 15499510486101262177 -6284898493105666575 +14474638290107799038 diff --git a/dbms/tests/queries/0_stateless/00746_hashing_tuples.sql b/dbms/tests/queries/0_stateless/00746_hashing_tuples.sql index ce26225d730..1582d74e030 100644 --- a/dbms/tests/queries/0_stateless/00746_hashing_tuples.sql +++ b/dbms/tests/queries/0_stateless/00746_hashing_tuples.sql @@ -1,15 +1,19 @@ SELECT sipHash64(1, 2, 3); SELECT sipHash64(1, 3, 2); -SELECT sipHash64('a', [1, 2, 3], 4); +SELECT sipHash64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); + +SELECT halfMD5(1, 2, 3); +SELECT halfMD5(1, 3, 2); +SELECT halfMD5(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); SELECT murmurHash2_32(1, 2, 3); SELECT murmurHash2_32(1, 3, 2); -SELECT murmurHash2_32('a', [1, 2, 3], 4); +SELECT murmurHash2_32(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], (1, 2)))); SELECT murmurHash2_64(1, 2, 3); SELECT murmurHash2_64(1, 3, 2); -SELECT murmurHash2_64('a', [1, 2, 3], 4); +SELECT murmurHash2_64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); SELECT murmurHash3_64(1, 2, 3); SELECT murmurHash3_64(1, 3, 2); -SELECT murmurHash3_64('a', [1, 2, 3], 4); +SELECT murmurHash3_64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); From 8aa0c423aad50567d1043fe8fab392d433696bff Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 25 Oct 2018 07:16:41 +0300 Subject: [PATCH 08/79] Fix for the case when there is no filter. --- dbms/tests/external_dictionaries/generate_and_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/external_dictionaries/generate_and_test.py b/dbms/tests/external_dictionaries/generate_and_test.py index db2f4fe5e9c..a2cab7006a7 100755 --- a/dbms/tests/external_dictionaries/generate_and_test.py +++ b/dbms/tests/external_dictionaries/generate_and_test.py @@ -670,7 +670,7 @@ def generate_dictionaries(args): (layout_range_hashed, range_hashed_range_type)], ]) - dict_name_filter = args.filter.split('/')[0] + dict_name_filter = args.filter.split('/')[0] if args.filter else None for (name, key_idx, has_parent), (source, layout) in zip(dictionaries, sources_and_layouts): if args.filter and not fnmatch.fnmatch(name, dict_name_filter): continue From 7ebc44a0c877c3ad097111c4ec31d3cdb8444a9c Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 25 Oct 2018 07:42:02 +0300 Subject: [PATCH 09/79] Fixed test case for UUID_ dictionary attribute. --- dbms/tests/external_dictionaries/generate_and_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/external_dictionaries/generate_and_test.py b/dbms/tests/external_dictionaries/generate_and_test.py index a2cab7006a7..7722d9b34b2 100755 --- a/dbms/tests/external_dictionaries/generate_and_test.py +++ b/dbms/tests/external_dictionaries/generate_and_test.py @@ -303,7 +303,7 @@ def generate_data(args): 'UInt8_ tinyint unsigned, UInt16_ smallint unsigned, UInt32_ int unsigned, UInt64_ bigint unsigned, ' 'Int8_ tinyint, Int16_ smallint, Int32_ int, Int64_ bigint, ' 'Float32_ float, Float64_ double, ' - 'String_ text, Date_ date, DateTime_ datetime, Parent bigint unsigned, UUID_ varchar(36)' + 'String_ text, Date_ date, DateTime_ datetime, UUID_ varchar(36)' ');' 'load data local infile \'{0}/generated/{file}\' into table {table_name};" | mysql $MYSQL_OPTIONS --local-infile=1' .format(prefix, table_name=table_name, col_type=col_type, file=file), shell=True) From 95c618b163410e83c10ea63d5c50ae92c2e900cd Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 26 Oct 2018 18:13:02 +0300 Subject: [PATCH 10/79] ExpressionAnalyzer: remove duplicated code --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 51 ++----------------- .../PredicateExpressionsOptimizer.cpp | 28 ++-------- .../PredicateExpressionsOptimizer.h | 2 - dbms/src/Interpreters/evaluateQualified.cpp | 32 ++++++++++++ dbms/src/Interpreters/evaluateQualified.h | 4 ++ 5 files changed, 44 insertions(+), 73 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index f90868d7a36..b889336b620 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -275,55 +275,14 @@ bool ExpressionAnalyzer::isRemoteStorage() const } -static std::vector getTableExpressions(const ASTPtr & query) -{ - ASTSelectQuery * select_query = typeid_cast(query.get()); - - std::vector table_expressions; - - if (select_query && select_query->tables) - { - for (const auto & element : select_query->tables->children) - { - ASTTablesInSelectQueryElement & select_element = static_cast(*element); - - if (select_element.table_expression) - table_expressions.emplace_back(static_cast(*select_element.table_expression)); - } - } - - return table_expressions; -} - void ExpressionAnalyzer::translateQualifiedNames() { if (!select_query || !select_query->tables || select_query->tables->children.empty()) return; - std::vector tables; - std::vector tables_expression = getTableExpressions(query); + std::vector tables = getDatabaseAndTableWithAliases(select_query, context.getCurrentDatabase()); LogAST log; - - for (const auto & table_expression : tables_expression) - { - auto table = getTableNameWithAliasFromTableExpression(table_expression, context.getCurrentDatabase()); - - { /// debug print - size_t depth = 0; - DumpASTNode dump(table_expression, log.stream(), depth, "getTableNames"); - if (table_expression.database_and_table_name) - DumpASTNode(*table_expression.database_and_table_name, log.stream(), depth); - if (table_expression.table_function) - DumpASTNode(*table_expression.table_function, log.stream(), depth); - if (table_expression.subquery) - DumpASTNode(*table_expression.subquery, log.stream(), depth); - dump.print("getTableNameWithAlias", table.database + '.' + table.table + ' ' + table.alias); - } - - tables.emplace_back(table); - } - TranslateQualifiedNamesVisitor visitor(source_columns, tables, log.stream()); visitor.visit(query); } @@ -602,13 +561,13 @@ void ExpressionAnalyzer::normalizeTree() TableNamesAndColumnNames table_names_and_column_names; if (select_query && select_query->tables && !select_query->tables->children.empty()) { - std::vector tables_expression = getTableExpressions(query); + std::vector tables_expression = getSelectTablesExpression(select_query); bool first = true; - for (const auto & table_expression : tables_expression) + for (const auto * table_expression : tables_expression) { - const auto table_name = getTableNameWithAliasFromTableExpression(table_expression, context.getCurrentDatabase()); - NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(table_expression, context); + const auto table_name = getTableNameWithAliasFromTableExpression(*table_expression, context.getCurrentDatabase()); + NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context); if (!first) { diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 1e4bb996f90..222b9dd5131 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -44,11 +44,8 @@ bool PredicateExpressionsOptimizer::optimizeImpl( /// split predicate with `and` PredicateExpressions outer_predicate_expressions = splitConjunctionPredicate(outer_expression); - std::vector tables_expression = getSelectTablesExpression(ast_select); - std::vector database_and_table_with_aliases; - for (const auto & table_expression : tables_expression) - database_and_table_with_aliases.emplace_back( - getTableNameWithAliasFromTableExpression(*table_expression, context.getCurrentDatabase())); + std::vector database_and_table_with_aliases = + getDatabaseAndTableWithAliases(ast_select, context.getCurrentDatabase()); bool is_rewrite_subquery = false; for (const auto & outer_predicate : outer_predicate_expressions) @@ -336,7 +333,7 @@ ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_que if (!select_query->tables || select_query->tables->children.empty()) return {}; - std::vector tables_expression = getSelectTablesExpression(select_query); + std::vector tables_expression = getSelectTablesExpression(select_query); if (const auto qualified_asterisk = typeid_cast(asterisk.get())) { @@ -406,25 +403,6 @@ ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_que return projection_columns; } -std::vector PredicateExpressionsOptimizer::getSelectTablesExpression(ASTSelectQuery * select_query) -{ - if (!select_query->tables) - return {}; - - std::vector tables_expression; - const ASTTablesInSelectQuery & tables_in_select_query = static_cast(*select_query->tables); - - for (const auto & child : tables_in_select_query.children) - { - ASTTablesInSelectQueryElement * tables_element = static_cast(child.get()); - - if (tables_element->table_expression) - tables_expression.emplace_back(static_cast(tables_element->table_expression.get())); - } - - return tables_expression; -} - void PredicateExpressionsOptimizer::cleanExpressionAlias(ASTPtr & expression) { const auto my_alias = expression->tryGetAlias(); diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h index c1a02a7df18..d75d9135f8a 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h @@ -105,8 +105,6 @@ private: ASTs getSelectQueryProjectionColumns(ASTPtr & ast); - std::vector getSelectTablesExpression(ASTSelectQuery * select_query); - ASTs evaluateAsterisk(ASTSelectQuery * select_query, const ASTPtr & asterisk); void cleanExpressionAlias(ASTPtr & expression); diff --git a/dbms/src/Interpreters/evaluateQualified.cpp b/dbms/src/Interpreters/evaluateQualified.cpp index 205885011d1..7f3002edad1 100644 --- a/dbms/src/Interpreters/evaluateQualified.cpp +++ b/dbms/src/Interpreters/evaluateQualified.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -164,4 +165,35 @@ void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const } } +std::vector getSelectTablesExpression(const ASTSelectQuery * select_query) +{ + if (!select_query->tables) + return {}; + + std::vector tables_expression; + + for (const auto & child : select_query->tables->children) + { + ASTTablesInSelectQueryElement * tables_element = static_cast(child.get()); + + if (tables_element->table_expression) + tables_expression.emplace_back(static_cast(tables_element->table_expression.get())); + } + + return tables_expression; +} + +std::vector getDatabaseAndTableWithAliases(const ASTSelectQuery * select_query, const String & current_database) +{ + std::vector tables_expression = getSelectTablesExpression(select_query); + + std::vector database_and_table_with_aliases; + database_and_table_with_aliases.reserve(tables_expression.size()); + + for (const auto & table_expression : tables_expression) + database_and_table_with_aliases.emplace_back(getTableNameWithAliasFromTableExpression(*table_expression, current_database)); + + return database_and_table_with_aliases; +} + } diff --git a/dbms/src/Interpreters/evaluateQualified.h b/dbms/src/Interpreters/evaluateQualified.h index 94833190d81..39dcf77fbfd 100644 --- a/dbms/src/Interpreters/evaluateQualified.h +++ b/dbms/src/Interpreters/evaluateQualified.h @@ -9,6 +9,7 @@ namespace DB class IAST; using ASTPtr = std::shared_ptr; +class ASTSelectQuery; class ASTIdentifier; struct ASTTableExpression; @@ -36,4 +37,7 @@ size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifie std::pair getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier); +std::vector getSelectTablesExpression(const ASTSelectQuery * select_query); +std::vector getDatabaseAndTableWithAliases(const ASTSelectQuery * select_query, const String & current_database); + } From f22779ad15c6be57a7d2ff2b9423b796f091725c Mon Sep 17 00:00:00 2001 From: hotid Date: Fri, 26 Oct 2018 20:14:43 +0300 Subject: [PATCH 11/79] stop generating empty WHERE () section in mysql queries --- dbms/src/Storages/transformQueryForExternalDatabase.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index 2615e8d0d92..43d20d7a4c0 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -119,6 +119,7 @@ String transformQueryForExternalDatabase( { if (function->name == "and") { + bool compatibleFound = false; auto new_function_and = std::make_shared(); auto new_function_and_arguments = std::make_shared(); new_function_and->arguments = new_function_and_arguments; @@ -126,9 +127,13 @@ String transformQueryForExternalDatabase( for (const auto & elem : function->arguments->children) if (isCompatible(*elem)) + { new_function_and_arguments->children.push_back(elem); + compatibleFound = true; + } - select->where_expression = std::move(new_function_and); + if (compatibleFound) + select->where_expression = std::move(new_function_and); } } } From aa0f812b1ef73bf704f3ec4203ef67d8aedd3fe0 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sun, 28 Oct 2018 08:15:57 +0300 Subject: [PATCH 12/79] Tests for ClickHouse as range_hashed dictionary source --- .../generate_and_test.py | 65 ++++++++++++++----- 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/dbms/tests/external_dictionaries/generate_and_test.py b/dbms/tests/external_dictionaries/generate_and_test.py index 7722d9b34b2..2774ccaa6d3 100755 --- a/dbms/tests/external_dictionaries/generate_and_test.py +++ b/dbms/tests/external_dictionaries/generate_and_test.py @@ -141,7 +141,7 @@ def generate_structure(args): base_name = 'range_hashed_' + range_hashed_range_type dictionaries.extend([ [ 'file_' + base_name, 3, False ], - # [ 'clickhouse_' + base_name, 3, True ], + [ 'clickhouse_' + base_name, 3, False ], # [ 'executable_flat' + base_name, 3, True ] ]) @@ -150,8 +150,6 @@ def generate_structure(args): base_name = 'range_hashed_' + range_hashed_range_type dictionaries.extend([ ['mysql_' + base_name, 3, False], - # [ 'clickhouse_' + base_name, 3, True ], - # [ 'executable_flat' + base_name, 3, True ] ]) @@ -230,6 +228,22 @@ range_hashed_mysql_column_types = { 'DateTime': 'datetime', } +range_hashed_clickhouse_column_types = { + 'UInt8': 'UInt8', + 'UInt16': 'UInt16', + 'UInt32': 'UInt32', + 'UInt64': 'UInt64', + 'Int8': 'Int8', + 'Int16': 'Int16', + 'Int32': 'Int32', + 'Int64': 'Int64', + # default type (Date) for compatibility with older versions: + '': 'Date', + 'Date': 'Date', + 'DateTime': 'DateTime', +} + + def dump_report(destination, suite, test_case, report): if destination is not None: destination_file = os.path.join(destination, suite, test_case + ".xml") @@ -291,9 +305,26 @@ def generate_data(args): query = file_source_query % comma_separated(chain(keys, columns(), ['Parent'] if 1 == len(keys) else [])) call([args.client, '--port', args.port, '--query', query], 'generated/' + file) + table_name = "test.dictionary_source_" + range_hashed_range_type + col_type = range_hashed_clickhouse_column_types[range_hashed_range_type] + + source_tsv_full_path = "{0}/generated/{1}".format(prefix, file) + print 'Creating Clickhouse table for "{0}" range_hashed dictionary...'.format(range_hashed_range_type) + system('cat {source} | {ch} --port={port} -m -n --query "' + 'create database if not exists test;' + 'drop table if exists {table_name};' + 'create table {table_name} (' + 'id UInt64, StartDate {col_type}, EndDate {col_type},' + 'UInt8_ UInt8, UInt16_ UInt16, UInt32_ UInt32, UInt64_ UInt64,' + 'Int8_ Int8, Int16_ Int16, Int32_ Int32, Int64_ Int64,' + 'Float32_ Float32, Float64_ Float64,' + 'String_ String,' + 'Date_ Date, DateTime_ DateTime, UUID_ UUID' + ') engine=Log; insert into {table_name} format TabSeparated' + '"'.format(table_name=table_name, col_type=col_type, source=source_tsv_full_path, ch=args.client, port=args.port)) + if not args.no_mysql: - print 'Creating MySQL table for "{0}"...'.format(range_hashed_range_type) - table_name = "test.dictionary_source_" + range_hashed_range_type + print 'Creating MySQL table for "{0}" range_hashed dictionary...'.format(range_hashed_range_type) col_type = range_hashed_mysql_column_types[range_hashed_range_type] subprocess.check_call('echo "' 'create database if not exists test;' @@ -305,8 +336,8 @@ def generate_data(args): 'Float32_ float, Float64_ double, ' 'String_ text, Date_ date, DateTime_ datetime, UUID_ varchar(36)' ');' - 'load data local infile \'{0}/generated/{file}\' into table {table_name};" | mysql $MYSQL_OPTIONS --local-infile=1' - .format(prefix, table_name=table_name, col_type=col_type, file=file), shell=True) + 'load data local infile \'{source}\' into table {table_name};" | mysql $MYSQL_OPTIONS --local-infile=1' + .format(prefix, table_name=table_name, col_type=col_type, source=source_tsv_full_path), shell=True) # create MySQL table from complete_query @@ -406,7 +437,7 @@ def generate_dictionaries(args): default test - dictionary_source
+ dictionary_source{key_type}
''' % args.port @@ -559,33 +590,34 @@ def generate_dictionaries(args): ''' + source_clickhouse_deafult = source_clickhouse.format(key_type="") sources_and_layouts = [ # Simple key dictionaries [ source_file % (generated_prefix + files[0]), layout_flat], - [ source_clickhouse, layout_flat ], + [ source_clickhouse_deafult, layout_flat ], [ source_executable % (generated_prefix + files[0]), layout_flat ], [ source_file % (generated_prefix + files[0]), layout_hashed], - [ source_clickhouse, layout_hashed ], + [ source_clickhouse_deafult, layout_hashed ], [ source_executable % (generated_prefix + files[0]), layout_hashed ], - [ source_clickhouse, layout_cache ], + [ source_clickhouse_deafult, layout_cache ], [ source_executable_cache % (generated_prefix + files[0]), layout_cache ], # Complex key dictionaries with (UInt8, UInt8) key [ source_file % (generated_prefix + files[1]), layout_complex_key_hashed], - [ source_clickhouse, layout_complex_key_hashed ], + [ source_clickhouse_deafult, layout_complex_key_hashed ], [ source_executable % (generated_prefix + files[1]), layout_complex_key_hashed ], - [ source_clickhouse, layout_complex_key_cache ], + [ source_clickhouse_deafult, layout_complex_key_cache ], [ source_executable_cache % (generated_prefix + files[1]), layout_complex_key_cache ], # Complex key dictionaries with (String, UInt8) key [ source_file % (generated_prefix + files[2]), layout_complex_key_hashed], - [ source_clickhouse, layout_complex_key_hashed ], + [ source_clickhouse_deafult, layout_complex_key_hashed ], [ source_executable % (generated_prefix + files[2]), layout_complex_key_hashed ], - [ source_clickhouse, layout_complex_key_cache ], + [ source_clickhouse_deafult, layout_complex_key_cache ], [ source_executable_cache % (generated_prefix + files[2]), layout_complex_key_cache ], ] @@ -655,9 +687,10 @@ def generate_dictionaries(args): ]) for range_hashed_range_type in range_hashed_range_types: + key_type = "_" + range_hashed_range_type sources_and_layouts.extend([ [ source_file % (generated_prefix + (files[3].format(range_hashed_range_type=range_hashed_range_type))), (layout_range_hashed, range_hashed_range_type) ], - # [ source_clickhouse, layout_range_hashed ], + [ source_clickhouse.format(key_type=key_type), (layout_range_hashed, range_hashed_range_type) ], # [ source_executable, layout_range_hashed ] ]) From 2a0c967b6bbf8954e8ba60ca1612df8a155f8a2c Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 29 Oct 2018 14:32:28 +0300 Subject: [PATCH 13/79] add RU changelog for v18.14.11 --- CHANGELOG_RU.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index 30fb526f3a6..2569c655c9b 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,3 +1,12 @@ +## ClickHouse release 18.14.11, 2018-10-29 + +### Исправления ошибок: + +* Исправлена ошибка `Block structure mismatch in UNION stream: different number of columns` в запросах с LIMIT. [#2156](https://github.com/yandex/ClickHouse/issues/2156) +* Исправлены ошибки при слиянии данных в таблицах, содержащих массивы внутри Nested структур. [#3397](https://github.com/yandex/ClickHouse/pull/3397) +* Исправлен неправильный результат запросов при выключенной настройке `merge_tree_uniform_read_distribution` (включена по умолчанию). [#3429](https://github.com/yandex/ClickHouse/pull/3429) +* Исправлена ошибка при вставке в Distributed таблицу в формате Native. [#3411](https://github.com/yandex/ClickHouse/issues/3411) + ## ClickHouse release 18.14.10, 2018-10-23 * Настройка `compile_expressions` (JIT компиляция выражений) выключена по умолчанию. [#3410](https://github.com/yandex/ClickHouse/pull/3410) From 444b6e1e43a5bb9809b2dd01681c71b1c17fcb26 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 29 Oct 2018 16:18:41 +0300 Subject: [PATCH 14/79] Add validation of array data --- dbms/src/Formats/ValuesRowInputStream.cpp | 30 +++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/dbms/src/Formats/ValuesRowInputStream.cpp b/dbms/src/Formats/ValuesRowInputStream.cpp index 559ac658a6a..13d013a8ac9 100644 --- a/dbms/src/Formats/ValuesRowInputStream.cpp +++ b/dbms/src/Formats/ValuesRowInputStream.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,20 @@ namespace ErrorCodes } +bool is_array_type_compatible(const DataTypeArray & type, const Field & value) +{ + if (type.getNestedType()->isNullable()) + return true; + + const Array & array = DB::get(value); + size_t size = array.size(); + for (size_t i = 0; i < size; ++i) + if (array[i].isNull()) + return false; + + return true; +} + ValuesRowInputStream::ValuesRowInputStream(ReadBuffer & istr_, const Block & header_, const Context & context_, const FormatSettings & format_settings) : istr(istr_), header(header_), context(std::make_unique(context_)), format_settings(format_settings) { @@ -116,14 +131,15 @@ bool ValuesRowInputStream::read(MutableColumns & columns) std::pair value_raw = evaluateConstantExpression(ast, *context); Field value = convertFieldToType(value_raw.first, type, value_raw.second.get()); - if (value.isNull()) + const auto * array_type = typeid_cast(&type); + + /// Check that we are indeed allowed to insert a NULL. + if ((value.isNull() && !type.isNullable()) || (array_type && !is_array_type_compatible(*array_type, value))) { - /// Check that we are indeed allowed to insert a NULL. - if (!type.isNullable()) - throw Exception{"Expression returns value " + applyVisitor(FieldVisitorToString(), value) - + ", that is out of range of type " + type.getName() - + ", at: " + String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)), - ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE}; + throw Exception{"Expression returns value " + applyVisitor(FieldVisitorToString(), value) + + ", that is out of range of type " + type.getName() + + ", at: " + String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)), + ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE}; } columns[i]->insert(value); From fc99b7d389b5135bcbfb96ada592e574e88827f6 Mon Sep 17 00:00:00 2001 From: Alex Zatelepin Date: Mon, 29 Oct 2018 17:17:58 +0300 Subject: [PATCH 15/79] style fixes --- dbms/src/Storages/transformQueryForExternalDatabase.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index 43d20d7a4c0..d143cb32ff8 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -119,20 +119,22 @@ String transformQueryForExternalDatabase( { if (function->name == "and") { - bool compatibleFound = false; + bool compatible_found = false; auto new_function_and = std::make_shared(); auto new_function_and_arguments = std::make_shared(); new_function_and->arguments = new_function_and_arguments; new_function_and->children.push_back(new_function_and_arguments); for (const auto & elem : function->arguments->children) + { if (isCompatible(*elem)) { new_function_and_arguments->children.push_back(elem); - compatibleFound = true; + compatible_found = true; } + } - if (compatibleFound) + if (compatible_found) select->where_expression = std::move(new_function_and); } } From 6efd0f6bf7ab8787122cb9622b970610545b7ab6 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 29 Oct 2018 19:21:01 +0300 Subject: [PATCH 16/79] Remove incorrect statement from docs Column that is in the USING clause is not required to be present in the column list of the left side of the join (but it must be present in the table definition). This reverts commit f2533a9653531c8381e5dd9365e3ec1e9f5838ed. --- docs/en/query_language/select.md | 36 -------------------------------- 1 file changed, 36 deletions(-) diff --git a/docs/en/query_language/select.md b/docs/en/query_language/select.md index 14603d1066d..53164e67d80 100644 --- a/docs/en/query_language/select.md +++ b/docs/en/query_language/select.md @@ -346,42 +346,6 @@ The table name can be specified instead of a subquery. This is equivalent to the All columns that are not needed for the JOIN are deleted from the subquery. -Example: - -``` sql -SELECT - endpoint, - metric, - value, - idcName -FROM datapoints -ALL INNER JOIN -( - SELECT - ips AS endpoint, - idcName - FROM cmdb -) USING (endpoint) -LIMIT 10 -``` - -``` -┌─endpoint─────┬─metric────────────┬───────value─┬─idcName─┐ -│ 192.168.1.100 │ disk.total │ 50465866000 │ office │ -│ 192.168.1.100 │ disk.total │ 494927870 │ office │ -│ 192.168.1.100 │ disk.used.percent │ 48 │ office │ -│ 192.168.1.100 │ disk.used.percent │ 50 │ office │ -│ 192.168.1.100 │ disk.util │ 0 │ office │ -│ 192.168.1.100 │ disk.util │ 0 │ office │ -│ 192.168.1.100 │ disk.util │ 0 │ office │ -│ 192.168.1.100 │ disk.util │ 0 │ office │ -│ 192.168.1.100 │ load.15min │ 0.02 │ office │ -│ 192.168.1.100 │ load.1min │ 0.11 │ office │ -└──────────────┴───────────────────┴─────────────┴─────────┘ -``` - -Notice the column in `USING`(endpoint) must be in former `SELECT` column list and subquery's. - There are several types of JOINs: `INNER` or `LEFT` type:If INNER is specified, the result will contain only those rows that have a matching row in the right table. From 31bc680ac5f4bb1d0360a8ba4696fa84bb47d6ab Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 29 Oct 2018 21:00:36 +0300 Subject: [PATCH 17/79] ErrorCodes::SYNTAX_ERROR now causes HTTP_BAD_REQUEST --- dbms/programs/server/HTTPHandler.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 1a2f321fa00..9d9324b9a3e 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -63,6 +63,8 @@ namespace ErrorCodes extern const int TOO_BIG_AST; extern const int UNEXPECTED_AST_STRUCTURE; + extern const int SYNTAX_ERROR; + extern const int UNKNOWN_TABLE; extern const int UNKNOWN_FUNCTION; extern const int UNKNOWN_IDENTIFIER; @@ -109,6 +111,8 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti exception_code == ErrorCodes::TOO_BIG_AST || exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE) return HTTPResponse::HTTP_BAD_REQUEST; + else if (exception_code == ErrorCodes::SYNTAX_ERROR) + return HTTPResponse::HTTP_BAD_REQUEST; else if (exception_code == ErrorCodes::UNKNOWN_TABLE || exception_code == ErrorCodes::UNKNOWN_FUNCTION || exception_code == ErrorCodes::UNKNOWN_IDENTIFIER || From 604ba01ab5bd397830ea5f196551a7f9e6c5dde4 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 29 Oct 2018 21:33:51 +0300 Subject: [PATCH 18/79] fix using clickhouse-client docker image as a "binary" https://github.com/yandex/ClickHouse/pull/3195#issuecomment-434027727 --- docker/client/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index e1780db7c6f..3c78bb71978 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -26,4 +26,4 @@ ENV LANG en_US.UTF-8 ENV LANGUAGE en_US:en ENV LC_ALL en_US.UTF-8 -CMD ["/usr/bin/clickhouse-client"] +ENTRYPOINT ["/usr/bin/clickhouse-client"] From d2518fdb3fbf22cf708c079018d5bae2052ef007 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 29 Oct 2018 22:04:28 +0300 Subject: [PATCH 19/79] more refactoring: funcs to extract database_and_table from AST --- dbms/src/Analyzers/ExecuteTableFunctions.cpp | 1 - dbms/src/Interpreters/ExpressionAnalyzer.cpp | 16 +--- .../InJoinSubqueriesPreprocessor.cpp | 18 ++--- .../Interpreters/InterpreterInsertQuery.cpp | 1 - .../Interpreters/InterpreterSelectQuery.cpp | 45 +++++------ .../InterpreterShowProcesslistQuery.cpp | 1 - .../InterpreterShowTablesQuery.cpp | 1 - dbms/src/Interpreters/ProcessList.cpp | 23 ++---- dbms/src/Interpreters/QueryNormalizer.cpp | 1 - .../TranslateQualifiedNamesVisitor.cpp | 21 +++-- dbms/src/Interpreters/evaluateQualified.cpp | 78 +++++++++++++++++++ dbms/src/Interpreters/evaluateQualified.h | 3 + dbms/src/Parsers/ASTEnumElement.h | 1 - dbms/src/Parsers/ASTQueryWithOnCluster.cpp | 2 - dbms/src/Parsers/ASTSelectQuery.cpp | 40 ---------- dbms/src/Parsers/ASTSelectQuery.h | 2 - dbms/src/Parsers/ParserDescribeTableQuery.cpp | 1 - dbms/src/Parsers/ParserInsertQuery.cpp | 1 - dbms/src/Parsers/ParserOptimizeQuery.cpp | 1 - dbms/src/Parsers/ParserSelectQuery.cpp | 1 - dbms/src/Parsers/ParserSystemQuery.cpp | 2 - .../src/Parsers/ParserTablesInSelectQuery.cpp | 1 - dbms/src/Storages/Kafka/StorageKafka.cpp | 1 - dbms/src/Storages/MergeTree/KeyCondition.cpp | 1 + dbms/src/Storages/MergeTree/KeyCondition.h | 1 - dbms/src/Storages/MergeTree/MergeTreeData.cpp | 1 - .../MergeTree/MergeTreeDataSelectExecutor.cpp | 1 + dbms/src/Storages/MutationCommands.cpp | 1 - dbms/src/Storages/StorageDistributed.cpp | 1 - dbms/src/Storages/StorageFactory.cpp | 1 - dbms/src/Storages/StorageMaterializedView.cpp | 32 ++++---- dbms/src/Storages/StorageMerge.cpp | 1 - dbms/src/Storages/StorageURL.cpp | 1 - .../Storages/System/StorageSystemColumns.cpp | 1 - .../src/TableFunctions/TableFunctionMerge.cpp | 1 - 35 files changed, 146 insertions(+), 159 deletions(-) diff --git a/dbms/src/Analyzers/ExecuteTableFunctions.cpp b/dbms/src/Analyzers/ExecuteTableFunctions.cpp index 4dcdaf790e5..5e969c802e5 100644 --- a/dbms/src/Analyzers/ExecuteTableFunctions.cpp +++ b/dbms/src/Analyzers/ExecuteTableFunctions.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index b889336b620..b71bfef2af3 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -172,19 +172,9 @@ ExpressionAnalyzer::ExpressionAnalyzer( if (!storage && select_query) { - auto select_database = select_query->database(); - auto select_table = select_query->table(); - - if (select_table - && !typeid_cast(select_table.get()) - && !typeid_cast(select_table.get())) - { - String database = select_database - ? typeid_cast(*select_database).name - : ""; - const String & table = typeid_cast(*select_table).name; - storage = context.tryGetTable(database, table); - } + DatabaseAndTableWithAlias db_and_table; + if (getDatabaseAndTable(*select_query, 0, db_and_table)) + storage = context.tryGetTable(db_and_table.database, db_and_table.table); } if (storage && source_columns.empty()) diff --git a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 45105594c4f..332536a7129 100644 --- a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -81,22 +82,13 @@ void forEachTable(IAST * node, F && f) StoragePtr tryGetTable(const ASTPtr & database_and_table, const Context & context) { - String database; - String table; - const ASTIdentifier * id = static_cast(database_and_table.get()); + if (!id) + throw Exception("Logical error: identifier expected", ErrorCodes::LOGICAL_ERROR); - if (id->children.empty()) - table = id->name; - else if (id->children.size() == 2) - { - database = static_cast(id->children[0].get())->name; - table = static_cast(id->children[1].get())->name; - } - else - throw Exception("Logical error: unexpected number of components in table expression", ErrorCodes::LOGICAL_ERROR); + std::pair db_and_table = getDatabaseAndTableNameFromIdentifier(*id); - return context.tryGetTable(database, table); + return context.tryGetTable(db_and_table.first, db_and_table.second); } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index d303154c3a3..f058550a441 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -10,7 +10,6 @@ #include #include -#include #include #include diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index aeeb5998a2c..2e9ed5c7d0e 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -146,7 +147,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( max_streams = settings.max_threads; - const auto & table_expression = query.table(); + ASTPtr table_expression = getTableFunctionOrSubquery(query, 0); if (input) { @@ -205,7 +206,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (query_analyzer->isRewriteSubqueriesPredicate()) { /// remake interpreter_subquery when PredicateOptimizer is rewrite subqueries and main table is subquery - if (typeid_cast(table_expression.get())) + if (table_expression && typeid_cast(table_expression.get())) interpreter_subquery = std::make_unique( table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze); @@ -236,29 +237,21 @@ InterpreterSelectQuery::InterpreterSelectQuery( void InterpreterSelectQuery::getDatabaseAndTableNames(String & database_name, String & table_name) { - auto query_database = query.database(); - auto query_table = query.table(); + DatabaseAndTableWithAlias db_and_table; + if (getDatabaseAndTable(query, 0, db_and_table)) + { + table_name = db_and_table.table; + database_name = db_and_table.database; - /** If the table is not specified - use the table `system.one`. - * If the database is not specified - use the current database. - */ - if (query_database) - database_name = typeid_cast(*query_database).name; - if (query_table) - table_name = typeid_cast(*query_table).name; - - if (!query_table) + /// If the database is not specified - use the current database. + if (database_name.empty() && !context.tryGetTable("", table_name)) + database_name = context.getCurrentDatabase(); + } + else /// If the table is not specified - use the table `system.one`. { database_name = "system"; table_name = "one"; } - else if (!query_database) - { - if (context.tryGetTable("", table_name)) - database_name = ""; - else - database_name = context.getCurrentDatabase(); - } } @@ -884,8 +877,12 @@ void InterpreterSelectQuery::executeFetchColumns( /// If we need less number of columns that subquery have - update the interpreter. if (required_columns.size() < source_header.columns()) { + ASTPtr subquery = getTableFunctionOrSubquery(query, 0); + if (!subquery) + throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR); + interpreter_subquery = std::make_unique( - query.table(), getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze); + subquery, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze); if (query_analyzer->hasAggregation()) interpreter_subquery->ignoreWithTotals(); @@ -1362,11 +1359,9 @@ bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query) * In other cases, totals will be computed on the initiating server of the query, and it is not necessary to read the data to the end. */ - auto query_table = query.table(); - if (query_table) + if (auto query_table = getTableFunctionOrSubquery(query, 0)) { - auto ast_union = typeid_cast(query_table.get()); - if (ast_union) + if (auto ast_union = typeid_cast(query_table.get())) { for (const auto & elem : ast_union->list_of_selects->children) if (hasWithTotalsInAnySubqueryInFromClause(typeid_cast(*elem))) diff --git a/dbms/src/Interpreters/InterpreterShowProcesslistQuery.cpp b/dbms/src/Interpreters/InterpreterShowProcesslistQuery.cpp index 0c977a53ce8..697b286fe75 100644 --- a/dbms/src/Interpreters/InterpreterShowProcesslistQuery.cpp +++ b/dbms/src/Interpreters/InterpreterShowProcesslistQuery.cpp @@ -5,7 +5,6 @@ #include #include -#include namespace DB diff --git a/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp b/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp index df4332c5b16..ab15d1f0112 100644 --- a/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/dbms/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index 557a006663d..6e8413f7049 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -1,10 +1,10 @@ #include #include #include +#include #include #include #include -#include #include #include #include @@ -51,28 +51,15 @@ static bool isUnlimitedQuery(const IAST * ast) if (!ast_selects->list_of_selects || ast_selects->list_of_selects->children.empty()) return false; - auto ast_select = typeid_cast(ast_selects->list_of_selects->children[0].get()); - + auto ast_select = typeid_cast(ast_selects->list_of_selects->children[0].get()); if (!ast_select) return false; - auto ast_database = ast_select->database(); - if (!ast_database) + DatabaseAndTableWithAlias database_and_table; + if (!getDatabaseAndTable(*ast_select, 0, database_and_table)) return false; - auto ast_table = ast_select->table(); - if (!ast_table) - return false; - - auto ast_database_id = typeid_cast(ast_database.get()); - if (!ast_database_id) - return false; - - auto ast_table_id = typeid_cast(ast_table.get()); - if (!ast_table_id) - return false; - - return ast_database_id->name == "system" && ast_table_id->name == "processes"; + return database_and_table.database == "system" && database_and_table.table == "processes"; } return false; diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index 4aa2db7ad7f..eff50bfb235 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -9,7 +9,6 @@ #include #include #include -//#include #include namespace DB diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 5ee99318c68..9457ecb8b42 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -65,18 +65,23 @@ void TranslateQualifiedNamesVisitor::visit(ASTQualifiedAsterisk *, ASTPtr & ast, if (num_components > 2) throw Exception("Qualified asterisk cannot have more than two qualifiers", ErrorCodes::UNKNOWN_ELEMENT_IN_AST); + std::pair db_and_table = getDatabaseAndTableNameFromIdentifier(*ident); + for (const auto & table_names : tables) { /// database.table.*, table.* or alias.* - if ((num_components == 2 - && !table_names.database.empty() - && static_cast(*ident->children[0]).name == table_names.database - && static_cast(*ident->children[1]).name == table_names.table) - || (num_components == 0 - && ((!table_names.table.empty() && ident->name == table_names.table) - || (!table_names.alias.empty() && ident->name == table_names.alias)))) + if (num_components == 2) { - return; + if (!table_names.database.empty() && + db_and_table.first == table_names.database && + db_and_table.second == table_names.table) + return; + } + else if (num_components == 0) + { + if ((!table_names.table.empty() && db_and_table.second == table_names.table) || + (!table_names.alias.empty() && db_and_table.second == table_names.alias)) + return; } } diff --git a/dbms/src/Interpreters/evaluateQualified.cpp b/dbms/src/Interpreters/evaluateQualified.cpp index 7f3002edad1..e2c3467edb9 100644 --- a/dbms/src/Interpreters/evaluateQualified.cpp +++ b/dbms/src/Interpreters/evaluateQualified.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -196,4 +197,81 @@ std::vector getDatabaseAndTableWithAliases(const ASTS return database_and_table_with_aliases; } + +static const ASTTableExpression * getTableExpression(const ASTSelectQuery & select, size_t table_number) +{ + if (!select.tables) + return {}; + + ASTTablesInSelectQuery & tables_in_select_query = static_cast(*select.tables); + if (tables_in_select_query.children.size() <= table_number) + return {}; + + ASTTablesInSelectQueryElement & tables_element = + static_cast(*tables_in_select_query.children[table_number]); + if (!tables_element.table_expression) + return {}; + + return static_cast(tables_element.table_expression.get()); +} + +bool getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number, DatabaseAndTableWithAlias & db_and_table) +{ + const ASTTableExpression * table_expression = getTableExpression(select, table_number); + if (!table_expression) + return false; + + ASTPtr database_and_table_name = table_expression->database_and_table_name; + if (!database_and_table_name) + return false; + + if (database_and_table_name->children.empty()) + { + const ASTIdentifier * db_name = typeid_cast(database_and_table_name.get()); + if (!db_name) + throw Exception("Logical error: Unexpected database node type. Identifier expected.", ErrorCodes::LOGICAL_ERROR); + + db_and_table = DatabaseAndTableWithAlias{{}, db_name->name, {}}; + return true; + } + else if (database_and_table_name->children.size() == 2) + { + const ASTIdentifier * db = typeid_cast(database_and_table_name->children[0].get()); + const ASTIdentifier * table = typeid_cast(database_and_table_name->children[1].get()); + + if (db && table) + { + db_and_table = DatabaseAndTableWithAlias{db->name, table->name, {}}; + return true; + } + } + + return false; +} + +ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number) +{ + const ASTTableExpression * table_expression = getTableExpression(select, table_number); + if (table_expression) + { +#if 1 /// TODO: It hides some logical error in InterpreterSelectQuery & distributed tables + if (table_expression->database_and_table_name) + { + if (table_expression->database_and_table_name->children.empty()) + return table_expression->database_and_table_name; + + if (table_expression->database_and_table_name->children.size() == 2) + return table_expression->database_and_table_name->children[1]; + } +#endif + if (table_expression->table_function) + return table_expression->table_function; + + if (table_expression->subquery) + return static_cast(table_expression->subquery.get())->children[0]; + } + + return nullptr; +} + } diff --git a/dbms/src/Interpreters/evaluateQualified.h b/dbms/src/Interpreters/evaluateQualified.h index 39dcf77fbfd..3107e21e429 100644 --- a/dbms/src/Interpreters/evaluateQualified.h +++ b/dbms/src/Interpreters/evaluateQualified.h @@ -40,4 +40,7 @@ std::pair getDatabaseAndTableNameFromIdentifier(const ASTIdentif std::vector getSelectTablesExpression(const ASTSelectQuery * select_query); std::vector getDatabaseAndTableWithAliases(const ASTSelectQuery * select_query, const String & current_database); +bool getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number, DatabaseAndTableWithAlias &); +ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number); + } diff --git a/dbms/src/Parsers/ASTEnumElement.h b/dbms/src/Parsers/ASTEnumElement.h index 10b4e1e7482..cd07db53d04 100644 --- a/dbms/src/Parsers/ASTEnumElement.h +++ b/dbms/src/Parsers/ASTEnumElement.h @@ -1,7 +1,6 @@ #pragma once #include -#include namespace DB diff --git a/dbms/src/Parsers/ASTQueryWithOnCluster.cpp b/dbms/src/Parsers/ASTQueryWithOnCluster.cpp index f37b741081e..9519a33c1e5 100644 --- a/dbms/src/Parsers/ASTQueryWithOnCluster.cpp +++ b/dbms/src/Parsers/ASTQueryWithOnCluster.cpp @@ -2,8 +2,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 0cf19a28851..8e20e1d46a6 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -242,46 +242,6 @@ static const ASTTablesInSelectQueryElement * getFirstTableJoin(const ASTSelectQu } -ASTPtr ASTSelectQuery::database() const -{ - const ASTTableExpression * table_expression = getFirstTableExpression(*this); - if (!table_expression || !table_expression->database_and_table_name || table_expression->database_and_table_name->children.empty()) - return {}; - - if (table_expression->database_and_table_name->children.size() != 2) - throw Exception("Logical error: more than two components in table expression", ErrorCodes::LOGICAL_ERROR); - - return table_expression->database_and_table_name->children[0]; -} - - -ASTPtr ASTSelectQuery::table() const -{ - const ASTTableExpression * table_expression = getFirstTableExpression(*this); - if (!table_expression) - return {}; - - if (table_expression->database_and_table_name) - { - if (table_expression->database_and_table_name->children.empty()) - return table_expression->database_and_table_name; - - if (table_expression->database_and_table_name->children.size() != 2) - throw Exception("Logical error: more than two components in table expression", ErrorCodes::LOGICAL_ERROR); - - return table_expression->database_and_table_name->children[1]; - } - - if (table_expression->table_function) - return table_expression->table_function; - - if (table_expression->subquery) - return static_cast(table_expression->subquery.get())->children.at(0); - - throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); -} - - ASTPtr ASTSelectQuery::sample_size() const { const ASTTableExpression * table_expression = getFirstTableExpression(*this); diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index 0ffdb44395e..1e0a2ffd575 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -39,8 +39,6 @@ public: ASTPtr settings; /// Compatibility with old parser of tables list. TODO remove - ASTPtr database() const; - ASTPtr table() const; ASTPtr sample_size() const; ASTPtr sample_offset() const; ASTPtr array_join_expression_list() const; diff --git a/dbms/src/Parsers/ParserDescribeTableQuery.cpp b/dbms/src/Parsers/ParserDescribeTableQuery.cpp index ebfc3baa33f..0f768e22324 100644 --- a/dbms/src/Parsers/ParserDescribeTableQuery.cpp +++ b/dbms/src/Parsers/ParserDescribeTableQuery.cpp @@ -1,4 +1,3 @@ -#include #include #include diff --git a/dbms/src/Parsers/ParserInsertQuery.cpp b/dbms/src/Parsers/ParserInsertQuery.cpp index 399aa43ea98..73aca09c210 100644 --- a/dbms/src/Parsers/ParserInsertQuery.cpp +++ b/dbms/src/Parsers/ParserInsertQuery.cpp @@ -1,5 +1,4 @@ #include -#include #include #include diff --git a/dbms/src/Parsers/ParserOptimizeQuery.cpp b/dbms/src/Parsers/ParserOptimizeQuery.cpp index e0dcf7ffb47..835db12cbb3 100644 --- a/dbms/src/Parsers/ParserOptimizeQuery.cpp +++ b/dbms/src/Parsers/ParserOptimizeQuery.cpp @@ -4,7 +4,6 @@ #include #include -#include #include diff --git a/dbms/src/Parsers/ParserSelectQuery.cpp b/dbms/src/Parsers/ParserSelectQuery.cpp index ffd9273dd8a..74f2e735ccd 100644 --- a/dbms/src/Parsers/ParserSelectQuery.cpp +++ b/dbms/src/Parsers/ParserSelectQuery.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/dbms/src/Parsers/ParserSystemQuery.cpp b/dbms/src/Parsers/ParserSystemQuery.cpp index 87140a3f6f7..1bf7c7219dc 100644 --- a/dbms/src/Parsers/ParserSystemQuery.cpp +++ b/dbms/src/Parsers/ParserSystemQuery.cpp @@ -1,8 +1,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/dbms/src/Parsers/ParserTablesInSelectQuery.cpp b/dbms/src/Parsers/ParserTablesInSelectQuery.cpp index 088cd567fba..0ba2a403a94 100644 --- a/dbms/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/dbms/src/Parsers/ParserTablesInSelectQuery.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index fb55db8bbb9..adaffed6c7c 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index 30a7ce1f733..745e1112a1f 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB diff --git a/dbms/src/Storages/MergeTree/KeyCondition.h b/dbms/src/Storages/MergeTree/KeyCondition.h index e2eb179dcba..cb06909eb87 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.h +++ b/dbms/src/Storages/MergeTree/KeyCondition.h @@ -10,7 +10,6 @@ #include #include #include -#include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index a31d12d932f..1c959f41759 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 6e2b2d3a20a..196259733c4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/dbms/src/Storages/MutationCommands.cpp b/dbms/src/Storages/MutationCommands.cpp index 685aa883190..6ba9c23a257 100644 --- a/dbms/src/Storages/MutationCommands.cpp +++ b/dbms/src/Storages/MutationCommands.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 7b442169de5..8144639d998 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -16,7 +16,6 @@ #include #include -#include #include #include #include diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 05a61343108..546e8f27843 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index 022f77fa5e4..ebe0560e142 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -2,11 +2,11 @@ #include #include #include -#include #include #include #include +#include #include #include @@ -27,23 +27,26 @@ namespace ErrorCodes static void extractDependentTable(ASTSelectQuery & query, String & select_database_name, String & select_table_name) { - auto query_table = query.table(); + DatabaseAndTableWithAlias db_and_table; + bool is_table = getDatabaseAndTable(query, 0, db_and_table); + ASTPtr subquery = getTableFunctionOrSubquery(query, 0); - if (!query_table) + if (!is_table && !subquery) return; - if (auto ast_id = typeid_cast(query_table.get())) + if (is_table) { - auto query_database = query.database(); + select_table_name = db_and_table.table; - if (!query_database) + if (db_and_table.database.empty()) + { + db_and_table.database = select_database_name; query.setDatabaseIfNeeded(select_database_name); - - select_table_name = ast_id->name; - select_database_name = query_database ? typeid_cast(*query_database).name : select_database_name; - + } + else + select_database_name = db_and_table.database; } - else if (auto ast_select = typeid_cast(query_table.get())) + else if (auto ast_select = typeid_cast(subquery.get())) { if (ast_select->list_of_selects->children.size() != 1) throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); @@ -64,12 +67,11 @@ static void checkAllowedQueries(const ASTSelectQuery & query) if (query.prewhere_expression || query.final() || query.sample_size()) throw Exception("MATERIALIZED VIEW cannot have PREWHERE, SAMPLE or FINAL.", DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); - auto query_table = query.table(); - - if (!query_table) + ASTPtr subquery = getTableFunctionOrSubquery(query, 0); + if (!subquery) return; - if (auto ast_select = typeid_cast(query_table.get())) + if (auto ast_select = typeid_cast(subquery.get())) { if (ast_select->list_of_selects->children.size() != 1) throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index ebda49c15e9..0bb5c832cfe 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 84091d83a05..8f17799a232 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -3,7 +3,6 @@ #include #include -#include #include #include diff --git a/dbms/src/Storages/System/StorageSystemColumns.cpp b/dbms/src/Storages/System/StorageSystemColumns.cpp index 40802f16466..66f56996ddb 100644 --- a/dbms/src/Storages/System/StorageSystemColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemColumns.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include diff --git a/dbms/src/TableFunctions/TableFunctionMerge.cpp b/dbms/src/TableFunctions/TableFunctionMerge.cpp index 45aa42fea32..b5dace28d41 100644 --- a/dbms/src/TableFunctions/TableFunctionMerge.cpp +++ b/dbms/src/TableFunctions/TableFunctionMerge.cpp @@ -3,7 +3,6 @@ #include #include -#include #include #include #include From d538f706794fd24b89d8e47546dbb6843a163f99 Mon Sep 17 00:00:00 2001 From: abyss7 <5627721+abyss7@users.noreply.github.com> Date: Tue, 30 Oct 2018 17:05:44 +0300 Subject: [PATCH 20/79] Fix build and tests on Fedora (#3496) * Fix some tests and build on Fedora 28 * Update contrib/ssl * Try `sudo` first, then without `sudo`. --- .gitignore | 3 +++ contrib/ssl | 2 +- dbms/tests/CMakeLists.txt | 2 +- .../queries/0_stateless/00417_system_build_options.sh | 2 +- dbms/tests/queries/0_stateless/00428_partition.sh | 8 ++++++-- libs/libcommon/include/common/readline_use.h | 1 - 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 6e969a8459c..bc598cf0d0d 100644 --- a/.gitignore +++ b/.gitignore @@ -246,3 +246,6 @@ website/presentations website/package-lock.json .DS_Store */.DS_Store + +# Ignore files for locally disabled tests +/dbms/tests/queries/**/*.disabled diff --git a/contrib/ssl b/contrib/ssl index de02224a42c..919f6f1331d 160000 --- a/contrib/ssl +++ b/contrib/ssl @@ -1 +1 @@ -Subproject commit de02224a42c69e3d8c9112c82018816f821878d0 +Subproject commit 919f6f1331d500bfdd26f8bbbf88e92c0119879b diff --git a/dbms/tests/CMakeLists.txt b/dbms/tests/CMakeLists.txt index 3f42f94158b..6a983134937 100644 --- a/dbms/tests/CMakeLists.txt +++ b/dbms/tests/CMakeLists.txt @@ -27,7 +27,7 @@ if (ENABLE_TESTS) # maybe add --no-long ? # if you want disable some tests: env TEST_OPT0='--skip compile' - add_test(NAME with_server COMMAND bash -c "env BUILD_DIR=${ClickHouse_BINARY_DIR} INTERNAL_COMPILER_BIN_ROOT=${INTERNAL_COMPILER_BIN_ROOT} ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse-test-server") + add_test(NAME with_server COMMAND bash -c "env BUILD_DIR=${ClickHouse_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse-test-server") endif () if (ENABLE_TEST_INTEGRATION) diff --git a/dbms/tests/queries/0_stateless/00417_system_build_options.sh b/dbms/tests/queries/0_stateless/00417_system_build_options.sh index 4de22e36194..4c4b5276a1b 100755 --- a/dbms/tests/queries/0_stateless/00417_system_build_options.sh +++ b/dbms/tests/queries/0_stateless/00417_system_build_options.sh @@ -3,4 +3,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -$CLICKHOUSE_CLIENT --query="SELECT * FROM system.build_options" | perl -lnE 'print $1 if /(BUILD_DATE|BUILD_TYPE|CXX_COMPILER|CXX_FLAGS|LINK_FLAGS)\s+\S+/'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM system.build_options" | perl -lnE 'print $1 if /(BUILD_DATE|BUILD_TYPE|CXX_COMPILER)\s+\S+/ || /(CXX_FLAGS|LINK_FLAGS)/'; diff --git a/dbms/tests/queries/0_stateless/00428_partition.sh b/dbms/tests/queries/0_stateless/00428_partition.sh index 31e92603024..ce6ad9e1cd8 100755 --- a/dbms/tests/queries/0_stateless/00428_partition.sh +++ b/dbms/tests/queries/0_stateless/00428_partition.sh @@ -18,14 +18,18 @@ $chl "INSERT INTO test.partition_428 (p, k) VALUES(toDate(31), 1)" $chl "INSERT INTO test.partition_428 (p, k) VALUES(toDate(1), 2)" for part in `$chl "SELECT name FROM system.parts WHERE database='test' AND table='partition_428'"`; do - sudo -n cat $ch_dir/data/test/partition_428/$part/columns.txt | wc -l # 2 header lines + 3 columns + # 2 header lines + 3 columns + (sudo -n cat $ch_dir/data/test/partition_428/$part/columns.txt 2>/dev/null || \ + cat $ch_dir/data/test/partition_428/$part/columns.txt) | wc -l done $chl "ALTER TABLE test.partition_428 DETACH PARTITION 197001" $chl "ALTER TABLE test.partition_428 ATTACH PARTITION 197001" for part in `$chl "SELECT name FROM system.parts WHERE database='test' AND table='partition_428'"`; do - sudo -n cat $ch_dir/data/test/partition_428/$part/columns.txt | wc -l # 2 header lines + 3 columns + # 2 header lines + 3 columns + (sudo -n cat $ch_dir/data/test/partition_428/$part/columns.txt 2>/dev/null || \ + cat $ch_dir/data/test/partition_428/$part/columns.txt) | wc -l done $chl "ALTER TABLE test.partition_428 MODIFY COLUMN v1 Int8" diff --git a/libs/libcommon/include/common/readline_use.h b/libs/libcommon/include/common/readline_use.h index 97622b26839..549676ef9b2 100644 --- a/libs/libcommon/include/common/readline_use.h +++ b/libs/libcommon/include/common/readline_use.h @@ -10,7 +10,6 @@ #include #elif USE_LIBEDIT #include - #include // Y_IGNORE #else #include #include From 4ff0391ca6cc57f919401d83e12ea89fb0cb0c74 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 30 Oct 2018 19:31:21 +0300 Subject: [PATCH 21/79] DatabaseAndTableWithAlias - the only place for db & table expraction from AST --- dbms/src/Interpreters/ActionsVisitor.cpp | 4 +- ...fied.cpp => DatabaseAndTableWithAlias.cpp} | 146 +++++++----------- ...ualified.h => DatabaseAndTableWithAlias.h} | 15 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 29 ++-- .../InJoinSubqueriesPreprocessor.cpp | 6 +- .../Interpreters/InterpreterSelectQuery.cpp | 9 +- .../PredicateExpressionsOptimizer.cpp | 16 +- .../PredicateExpressionsOptimizer.h | 2 +- dbms/src/Interpreters/ProcessList.cpp | 9 +- dbms/src/Interpreters/QueryNormalizer.h | 2 +- .../TranslateQualifiedNamesVisitor.cpp | 10 +- .../TranslateQualifiedNamesVisitor.h | 2 +- dbms/src/Interpreters/interpretSubquery.cpp | 8 +- dbms/src/Storages/StorageMaterializedView.cpp | 17 +- 14 files changed, 118 insertions(+), 157 deletions(-) rename dbms/src/Interpreters/{evaluateQualified.cpp => DatabaseAndTableWithAlias.cpp} (67%) rename dbms/src/Interpreters/{evaluateQualified.h => DatabaseAndTableWithAlias.h} (61%) diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index 29d8f190fbf..8eebf869303 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -568,8 +568,8 @@ void ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_bloc /// and the table has the type Set (a previously prepared set). if (identifier) { - auto database_table = getDatabaseAndTableNameFromIdentifier(*identifier); - StoragePtr table = context.tryGetTable(database_table.first, database_table.second); + DatabaseAndTableWithAlias database_table(*identifier); + StoragePtr table = context.tryGetTable(database_table.database, database_table.table); if (table) { diff --git a/dbms/src/Interpreters/evaluateQualified.cpp b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp similarity index 67% rename from dbms/src/Interpreters/evaluateQualified.cpp rename to dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp index e2c3467edb9..f43e75ed8d5 100644 --- a/dbms/src/Interpreters/evaluateQualified.cpp +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -48,46 +48,6 @@ void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip) } } - -DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression, - const String & current_database) -{ - DatabaseAndTableWithAlias database_and_table_with_alias; - - if (table_expression.database_and_table_name) - { - const auto & identifier = static_cast(*table_expression.database_and_table_name); - - database_and_table_with_alias.alias = identifier.tryGetAlias(); - - if (table_expression.database_and_table_name->children.empty()) - { - database_and_table_with_alias.database = current_database; - database_and_table_with_alias.table = identifier.name; - } - else - { - if (table_expression.database_and_table_name->children.size() != 2) - throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR); - - database_and_table_with_alias.database = static_cast(*identifier.children[0]).name; - database_and_table_with_alias.table = static_cast(*identifier.children[1]).name; - } - } - else if (table_expression.table_function) - { - database_and_table_with_alias.alias = table_expression.table_function->tryGetAlias(); - } - else if (table_expression.subquery) - { - database_and_table_with_alias.alias = table_expression.subquery->tryGetAlias(); - } - else - throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR); - - return database_and_table_with_alias; -} - /// Get the number of components of identifier which are correspond to 'alias.', 'table.' or 'databas.table.' from names. size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & names) @@ -122,19 +82,44 @@ size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifie return num_qualifiers_to_strip; } -std::pair getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier) + +DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database) { - std::pair res; - res.second = identifier.name; + database = current_database; + table = identifier.name; + alias = identifier.tryGetAlias(); + if (!identifier.children.empty()) { if (identifier.children.size() != 2) - throw Exception("Qualified table name could have only two components", ErrorCodes::LOGICAL_ERROR); + throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR); - res.first = typeid_cast(*identifier.children[0]).name; - res.second = typeid_cast(*identifier.children[1]).name; + const ASTIdentifier * db_identifier = typeid_cast(identifier.children[0].get()); + const ASTIdentifier * table_identifier = typeid_cast(identifier.children[1].get()); + if (!db_identifier || !table_identifier) + throw Exception("Logical error: identifiers expected", ErrorCodes::LOGICAL_ERROR); + + database = db_identifier->name; + table = table_identifier->name; } - return res; +} + +DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database) +{ + if (table_expression.database_and_table_name) + { + const auto * identifier = static_cast(table_expression.database_and_table_name.get()); + if (!identifier) + throw Exception("Logical error: identifier expected", ErrorCodes::LOGICAL_ERROR); + + *this = DatabaseAndTableWithAlias(*identifier, current_database); + } + else if (table_expression.table_function) + alias = table_expression.table_function->tryGetAlias(); + else if (table_expression.subquery) + alias = table_expression.subquery->tryGetAlias(); + else + throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR); } String DatabaseAndTableWithAlias::getQualifiedNamePrefix() const @@ -166,14 +151,14 @@ void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const } } -std::vector getSelectTablesExpression(const ASTSelectQuery * select_query) +std::vector getSelectTablesExpression(const ASTSelectQuery & select_query) { - if (!select_query->tables) + if (!select_query.tables) return {}; std::vector tables_expression; - for (const auto & child : select_query->tables->children) + for (const auto & child : select_query.tables->children) { ASTTablesInSelectQueryElement * tables_element = static_cast(child.get()); @@ -184,20 +169,6 @@ std::vector getSelectTablesExpression(const ASTSelec return tables_expression; } -std::vector getDatabaseAndTableWithAliases(const ASTSelectQuery * select_query, const String & current_database) -{ - std::vector tables_expression = getSelectTablesExpression(select_query); - - std::vector database_and_table_with_aliases; - database_and_table_with_aliases.reserve(tables_expression.size()); - - for (const auto & table_expression : tables_expression) - database_and_table_with_aliases.emplace_back(getTableNameWithAliasFromTableExpression(*table_expression, current_database)); - - return database_and_table_with_aliases; -} - - static const ASTTableExpression * getTableExpression(const ASTSelectQuery & select, size_t table_number) { if (!select.tables) @@ -209,44 +180,41 @@ static const ASTTableExpression * getTableExpression(const ASTSelectQuery & sele ASTTablesInSelectQueryElement & tables_element = static_cast(*tables_in_select_query.children[table_number]); + if (!tables_element.table_expression) return {}; return static_cast(tables_element.table_expression.get()); } -bool getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number, DatabaseAndTableWithAlias & db_and_table) +std::vector getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database) +{ + std::vector tables_expression = getSelectTablesExpression(select_query); + + std::vector database_and_table_with_aliases; + database_and_table_with_aliases.reserve(tables_expression.size()); + + for (const auto & table_expression : tables_expression) + database_and_table_with_aliases.emplace_back(DatabaseAndTableWithAlias(*table_expression, current_database)); + + return database_and_table_with_aliases; +} + +std::shared_ptr getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number) { const ASTTableExpression * table_expression = getTableExpression(select, table_number); if (!table_expression) - return false; + return {}; ASTPtr database_and_table_name = table_expression->database_and_table_name; if (!database_and_table_name) - return false; + return {}; - if (database_and_table_name->children.empty()) - { - const ASTIdentifier * db_name = typeid_cast(database_and_table_name.get()); - if (!db_name) - throw Exception("Logical error: Unexpected database node type. Identifier expected.", ErrorCodes::LOGICAL_ERROR); + const ASTIdentifier * identifier = typeid_cast(database_and_table_name.get()); + if (!identifier) + return {}; - db_and_table = DatabaseAndTableWithAlias{{}, db_name->name, {}}; - return true; - } - else if (database_and_table_name->children.size() == 2) - { - const ASTIdentifier * db = typeid_cast(database_and_table_name->children[0].get()); - const ASTIdentifier * table = typeid_cast(database_and_table_name->children[1].get()); - - if (db && table) - { - db_and_table = DatabaseAndTableWithAlias{db->name, table->name, {}}; - return true; - } - } - - return false; + return std::make_shared(*identifier); } ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number) diff --git a/dbms/src/Interpreters/evaluateQualified.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h similarity index 61% rename from dbms/src/Interpreters/evaluateQualified.h rename to dbms/src/Interpreters/DatabaseAndTableWithAlias.h index 3107e21e429..35194e21e64 100644 --- a/dbms/src/Interpreters/evaluateQualified.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -14,12 +14,16 @@ class ASTIdentifier; struct ASTTableExpression; +/// Extracts database name (and/or alias) from table expression or identifier struct DatabaseAndTableWithAlias { String database; String table; String alias; + DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = ""); + DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database); + /// "alias." or "database.table." if alias is empty String getQualifiedNamePrefix() const; @@ -29,18 +33,13 @@ struct DatabaseAndTableWithAlias void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip); -DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression, - const String & current_database); - size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & names); -std::pair getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier); +std::vector getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database); +std::shared_ptr getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number); -std::vector getSelectTablesExpression(const ASTSelectQuery * select_query); -std::vector getDatabaseAndTableWithAliases(const ASTSelectQuery * select_query, const String & current_database); - -bool getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number, DatabaseAndTableWithAlias &); +std::vector getSelectTablesExpression(const ASTSelectQuery & select_query); ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number); } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index b71bfef2af3..cfc0d340001 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -59,7 +59,7 @@ #include #include #include -#include +#include #include #include @@ -172,9 +172,8 @@ ExpressionAnalyzer::ExpressionAnalyzer( if (!storage && select_query) { - DatabaseAndTableWithAlias db_and_table; - if (getDatabaseAndTable(*select_query, 0, db_and_table)) - storage = context.tryGetTable(db_and_table.database, db_and_table.table); + if (auto db_and_table = getDatabaseAndTable(*select_query, 0)) + storage = context.tryGetTable(db_and_table->database, db_and_table->table); } if (storage && source_columns.empty()) @@ -270,7 +269,7 @@ void ExpressionAnalyzer::translateQualifiedNames() if (!select_query || !select_query->tables || select_query->tables->children.empty()) return; - std::vector tables = getDatabaseAndTableWithAliases(select_query, context.getCurrentDatabase()); + std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); LogAST log; TranslateQualifiedNamesVisitor visitor(source_columns, tables, log.stream()); @@ -523,8 +522,8 @@ static NamesAndTypesList getNamesAndTypeListFromTableExpression(const ASTTableEx else if (table_expression.database_and_table_name) { const auto & identifier = static_cast(*table_expression.database_and_table_name); - auto database_table = getDatabaseAndTableNameFromIdentifier(identifier); - const auto & table = context.getTable(database_table.first, database_table.second); + DatabaseAndTableWithAlias database_table(identifier); + const auto & table = context.getTable(database_table.database, database_table.table); names_and_type_list = table->getSampleBlockNonMaterialized().getNamesAndTypesList(); } @@ -551,12 +550,12 @@ void ExpressionAnalyzer::normalizeTree() TableNamesAndColumnNames table_names_and_column_names; if (select_query && select_query->tables && !select_query->tables->children.empty()) { - std::vector tables_expression = getSelectTablesExpression(select_query); + std::vector tables_expression = getSelectTablesExpression(*select_query); bool first = true; for (const auto * table_expression : tables_expression) { - const auto table_name = getTableNameWithAliasFromTableExpression(*table_expression, context.getCurrentDatabase()); + DatabaseAndTableWithAlias table_name(*table_expression, context.getCurrentDatabase()); NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context); if (!first) @@ -1234,7 +1233,7 @@ const ExpressionAnalyzer::AnalyzedJoin::JoinedColumnsList & ExpressionAnalyzer:: if (const ASTTablesInSelectQueryElement * node = select_query_with_join->join()) { const auto & table_expression = static_cast(*node->table_expression); - auto table_name_with_alias = getTableNameWithAliasFromTableExpression(table_expression, context.getCurrentDatabase()); + DatabaseAndTableWithAlias table_name_with_alias(table_expression, context.getCurrentDatabase()); auto columns = getNamesAndTypeListFromTableExpression(table_expression, context); @@ -1292,8 +1291,8 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (table_to_join.database_and_table_name) { const auto & identifier = static_cast(*table_to_join.database_and_table_name); - auto database_table = getDatabaseAndTableNameFromIdentifier(identifier); - StoragePtr table = context.tryGetTable(database_table.first, database_table.second); + DatabaseAndTableWithAlias database_table(identifier); + StoragePtr table = context.tryGetTable(database_table.database, database_table.table); if (table) { @@ -1835,8 +1834,8 @@ void ExpressionAnalyzer::collectJoinedColumnsFromJoinOnExpr() const auto & left_table_expression = static_cast(*left_tables_element->table_expression); const auto & right_table_expression = static_cast(*right_tables_element->table_expression); - auto left_source_names = getTableNameWithAliasFromTableExpression(left_table_expression, context.getCurrentDatabase()); - auto right_source_names = getTableNameWithAliasFromTableExpression(right_table_expression, context.getCurrentDatabase()); + DatabaseAndTableWithAlias left_source_names(left_table_expression, context.getCurrentDatabase()); + DatabaseAndTableWithAlias right_source_names(right_table_expression, context.getCurrentDatabase()); /// Stores examples of columns which are only from one table. struct TableBelonging @@ -1989,7 +1988,7 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns) const auto & table_join = static_cast(*node->table_join); const auto & table_expression = static_cast(*node->table_expression); - auto joined_table_name = getTableNameWithAliasFromTableExpression(table_expression, context.getCurrentDatabase()); + DatabaseAndTableWithAlias joined_table_name(table_expression, context.getCurrentDatabase()); auto add_name_to_join_keys = [&](Names & join_keys, ASTs & join_asts, const ASTPtr & ast, bool right_table) { diff --git a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 332536a7129..554aad3acc8 100644 --- a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -86,9 +86,9 @@ StoragePtr tryGetTable(const ASTPtr & database_and_table, const Context & contex if (!id) throw Exception("Logical error: identifier expected", ErrorCodes::LOGICAL_ERROR); - std::pair db_and_table = getDatabaseAndTableNameFromIdentifier(*id); + DatabaseAndTableWithAlias db_and_table(*id); - return context.tryGetTable(db_and_table.first, db_and_table.second); + return context.tryGetTable(db_and_table.database, db_and_table.table); } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 2e9ed5c7d0e..d81c3b42bc0 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include @@ -237,11 +237,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( void InterpreterSelectQuery::getDatabaseAndTableNames(String & database_name, String & table_name) { - DatabaseAndTableWithAlias db_and_table; - if (getDatabaseAndTable(query, 0, db_and_table)) + if (auto db_and_table = getDatabaseAndTable(query, 0)) { - table_name = db_and_table.table; - database_name = db_and_table.database; + table_name = db_and_table->table; + database_name = db_and_table->database; /// If the database is not specified - use the current database. if (database_name.empty() && !context.tryGetTable("", table_name)) diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 222b9dd5131..23fade42196 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -45,7 +45,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl( PredicateExpressions outer_predicate_expressions = splitConjunctionPredicate(outer_expression); std::vector database_and_table_with_aliases = - getDatabaseAndTableWithAliases(ast_select, context.getCurrentDatabase()); + getDatabaseAndTables(*ast_select, context.getCurrentDatabase()); bool is_rewrite_subquery = false; for (const auto & outer_predicate : outer_predicate_expressions) @@ -258,15 +258,14 @@ bool PredicateExpressionsOptimizer::optimizeExpression(const ASTPtr & outer_expr void PredicateExpressionsOptimizer::getAllSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns) { - const auto tables_expression = getSelectTablesExpression(ast_select); + const auto tables_expression = getSelectTablesExpression(*ast_select); for (const auto & table_expression : tables_expression) { if (table_expression->subquery) { /// Use qualifiers to translate the columns of subqueries - const auto database_and_table_with_alias = - getTableNameWithAliasFromTableExpression(*table_expression, context.getCurrentDatabase()); + DatabaseAndTableWithAlias database_and_table_with_alias(*table_expression, context.getCurrentDatabase()); String qualified_name_prefix = database_and_table_with_alias.getQualifiedNamePrefix(); getSubqueryProjectionColumns(all_subquery_projection_columns, qualified_name_prefix, static_cast(table_expression->subquery.get())->children[0]); @@ -333,7 +332,7 @@ ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_que if (!select_query->tables || select_query->tables->children.empty()) return {}; - std::vector tables_expression = getSelectTablesExpression(select_query); + std::vector tables_expression = getSelectTablesExpression(*select_query); if (const auto qualified_asterisk = typeid_cast(asterisk.get())) { @@ -351,8 +350,7 @@ ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_que for (auto it = tables_expression.begin(); it != tables_expression.end(); ++it) { const ASTTableExpression * table_expression = *it; - const auto database_and_table_with_alias = - getTableNameWithAliasFromTableExpression(*table_expression, context.getCurrentDatabase()); + DatabaseAndTableWithAlias database_and_table_with_alias(*table_expression, context.getCurrentDatabase()); /// database.table.* if (num_components == 2 && !database_and_table_with_alias.database.empty() && static_cast(*ident->children[0]).name == database_and_table_with_alias.database @@ -391,8 +389,8 @@ ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_que else if (table_expression->database_and_table_name) { const auto database_and_table_ast = static_cast(table_expression->database_and_table_name.get()); - const auto database_and_table_name = getDatabaseAndTableNameFromIdentifier(*database_and_table_ast); - storage = context.getTable(database_and_table_name.first, database_and_table_name.second); + DatabaseAndTableWithAlias database_and_table_name(*database_and_table_ast); + storage = context.getTable(database_and_table_name.database, database_and_table_name.table); } const auto block = storage->getSampleBlock(); diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h index d75d9135f8a..e999489475c 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index 6e8413f7049..3c31841d549 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -55,11 +55,10 @@ static bool isUnlimitedQuery(const IAST * ast) if (!ast_select) return false; - DatabaseAndTableWithAlias database_and_table; - if (!getDatabaseAndTable(*ast_select, 0, database_and_table)) - return false; + if (auto database_and_table = getDatabaseAndTable(*ast_select, 0)) + return database_and_table->database == "system" && database_and_table->table == "processes"; - return database_and_table.database == "system" && database_and_table.table == "processes"; + return false; } return false; diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index fd0d8603b3e..376b3ba6e07 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB { diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 9457ecb8b42..ebd575d314d 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -65,7 +65,7 @@ void TranslateQualifiedNamesVisitor::visit(ASTQualifiedAsterisk *, ASTPtr & ast, if (num_components > 2) throw Exception("Qualified asterisk cannot have more than two qualifiers", ErrorCodes::UNKNOWN_ELEMENT_IN_AST); - std::pair db_and_table = getDatabaseAndTableNameFromIdentifier(*ident); + DatabaseAndTableWithAlias db_and_table(*ident); for (const auto & table_names : tables) { @@ -73,14 +73,14 @@ void TranslateQualifiedNamesVisitor::visit(ASTQualifiedAsterisk *, ASTPtr & ast, if (num_components == 2) { if (!table_names.database.empty() && - db_and_table.first == table_names.database && - db_and_table.second == table_names.table) + db_and_table.database == table_names.database && + db_and_table.table == table_names.table) return; } else if (num_components == 0) { - if ((!table_names.table.empty() && db_and_table.second == table_names.table) || - (!table_names.alias.empty() && db_and_table.second == table_names.alias)) + if ((!table_names.table.empty() && db_and_table.table == table_names.table) || + (!table_names.alias.empty() && db_and_table.table == table_names.alias)) return; } } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index 607e3de7217..6f7d9ca7f54 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -5,7 +5,7 @@ #include #include -#include +#include namespace DB { diff --git a/dbms/src/Interpreters/interpretSubquery.cpp b/dbms/src/Interpreters/interpretSubquery.cpp index 305e76f7d4a..a585f7edc42 100644 --- a/dbms/src/Interpreters/interpretSubquery.cpp +++ b/dbms/src/Interpreters/interpretSubquery.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include namespace DB { @@ -69,10 +69,10 @@ std::shared_ptr interpretSubquery( } else { - auto database_table = getDatabaseAndTableNameFromIdentifier(*table); - const auto & storage = context.getTable(database_table.first, database_table.second); + DatabaseAndTableWithAlias database_table(*table); + const auto & storage = context.getTable(database_table.database, database_table.table); columns = storage->getColumns().ordinary; - select_query->replaceDatabaseAndTable(database_table.first, database_table.second); + select_query->replaceDatabaseAndTable(database_table.database, database_table.table); } select_expression_list->children.reserve(columns.size()); diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index ebe0560e142..d89f818eaaa 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include @@ -27,24 +27,23 @@ namespace ErrorCodes static void extractDependentTable(ASTSelectQuery & query, String & select_database_name, String & select_table_name) { - DatabaseAndTableWithAlias db_and_table; - bool is_table = getDatabaseAndTable(query, 0, db_and_table); + auto db_and_table = getDatabaseAndTable(query, 0); ASTPtr subquery = getTableFunctionOrSubquery(query, 0); - if (!is_table && !subquery) + if (!db_and_table && !subquery) return; - if (is_table) + if (db_and_table) { - select_table_name = db_and_table.table; + select_table_name = db_and_table->table; - if (db_and_table.database.empty()) + if (db_and_table->database.empty()) { - db_and_table.database = select_database_name; + db_and_table->database = select_database_name; query.setDatabaseIfNeeded(select_database_name); } else - select_database_name = db_and_table.database; + select_database_name = db_and_table->database; } else if (auto ast_select = typeid_cast(subquery.get())) { From 892ab4ce8afadc04a0c18e1accfd4aba0cc5e840 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 31 Oct 2018 12:17:47 +0300 Subject: [PATCH 22/79] Better code for check on null values inside array IN expression --- dbms/src/Formats/ValuesRowInputStream.cpp | 28 ++++--------------- dbms/src/Interpreters/convertFieldToType.cpp | 5 ++++ .../00748_insert_array_with_null.reference | 1 + .../00748_insert_array_with_null.sql | 11 ++++++++ 4 files changed, 23 insertions(+), 22 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00748_insert_array_with_null.reference create mode 100644 dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql diff --git a/dbms/src/Formats/ValuesRowInputStream.cpp b/dbms/src/Formats/ValuesRowInputStream.cpp index 13d013a8ac9..2dd27ce8df7 100644 --- a/dbms/src/Formats/ValuesRowInputStream.cpp +++ b/dbms/src/Formats/ValuesRowInputStream.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -30,20 +29,6 @@ namespace ErrorCodes } -bool is_array_type_compatible(const DataTypeArray & type, const Field & value) -{ - if (type.getNestedType()->isNullable()) - return true; - - const Array & array = DB::get(value); - size_t size = array.size(); - for (size_t i = 0; i < size; ++i) - if (array[i].isNull()) - return false; - - return true; -} - ValuesRowInputStream::ValuesRowInputStream(ReadBuffer & istr_, const Block & header_, const Context & context_, const FormatSettings & format_settings) : istr(istr_), header(header_), context(std::make_unique(context_)), format_settings(format_settings) { @@ -131,15 +116,14 @@ bool ValuesRowInputStream::read(MutableColumns & columns) std::pair value_raw = evaluateConstantExpression(ast, *context); Field value = convertFieldToType(value_raw.first, type, value_raw.second.get()); - const auto * array_type = typeid_cast(&type); - /// Check that we are indeed allowed to insert a NULL. - if ((value.isNull() && !type.isNullable()) || (array_type && !is_array_type_compatible(*array_type, value))) + if (value.isNull()) { - throw Exception{"Expression returns value " + applyVisitor(FieldVisitorToString(), value) - + ", that is out of range of type " + type.getName() - + ", at: " + String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)), - ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE}; + if (!type.isNullable()) + throw Exception{"Expression returns value " + applyVisitor(FieldVisitorToString(), value) + + ", that is out of range of type " + type.getName() + + ", at: " + String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)), + ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE}; } columns[i]->insert(value); diff --git a/dbms/src/Interpreters/convertFieldToType.cpp b/dbms/src/Interpreters/convertFieldToType.cpp index 58b0c164c35..a9d94ddd4f3 100644 --- a/dbms/src/Interpreters/convertFieldToType.cpp +++ b/dbms/src/Interpreters/convertFieldToType.cpp @@ -218,7 +218,12 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID Array res(src_arr_size); for (size_t i = 0; i < src_arr_size; ++i) + { res[i] = convertFieldToType(src_arr[i], *nested_type); + if (res[i].isNull() && !type_array->getNestedType()->isNullable()) + throw Exception("Type mismatch of array elements in IN or VALUES section. Expected: " + type_array->getNestedType()->getName() + + ". Got NULL in position " + toString(i + 1), ErrorCodes::TYPE_MISMATCH); + } return res; } diff --git a/dbms/tests/queries/0_stateless/00748_insert_array_with_null.reference b/dbms/tests/queries/0_stateless/00748_insert_array_with_null.reference new file mode 100644 index 00000000000..1ffffa4aa6b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00748_insert_array_with_null.reference @@ -0,0 +1 @@ +['aaaaa','bbbbb','ccccc'] diff --git a/dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql b/dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql new file mode 100644 index 00000000000..027f9b6b472 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS test.arraytest; + +CREATE TABLE test.arraytest ( created_date Date DEFAULT toDate(created_at), created_at DateTime DEFAULT now(), strings Array(String) DEFAULT emptyArrayString()) ENGINE = MergeTree(created_date, cityHash64(created_at), (created_date, cityHash64(created_at)), 8192); + +INSERT INTO test.arraytest (created_at, strings) VALUES (now(), ['aaaaa', 'bbbbb', 'ccccc']); +INSERT INTO test.arraytest (created_at, strings) VALUES (now(), ['aaaaa', 'bbbbb', null]); -- { clientError 53 } + +SELECT strings from test.arraytest; + +DROP TABLE IF EXISTS test.arraytest; + From 791a1422de075e82d18f3be9f5ec284f684e1246 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 31 Oct 2018 12:24:58 +0300 Subject: [PATCH 23/79] Enable merge with DIRECT_IO when summary size of participating parts more than 10GB --- dbms/src/Storages/MergeTree/MergeTreeSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index 9a4d9c0e9c2..f7fa9bf6703 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -149,7 +149,7 @@ struct MergeTreeSettings M(SettingUInt64, finished_mutations_to_keep, 100) \ \ /** Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled) */ \ - M(SettingUInt64, min_merge_bytes_to_use_direct_io, 0) + M(SettingUInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024) /// Settings that should not change after the creation of a table. #define APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(M) \ From 252109f314b7b37d5432c4243ad8f357cf16d2f5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 31 Oct 2018 14:45:53 +0300 Subject: [PATCH 24/79] Add test with O_DIRECT merge --- .../00750_merge_tree_merge_with_o_direct.reference | 3 +++ .../00750_merge_tree_merge_with_o_direct.sql | 11 +++++++++++ 2 files changed, 14 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.reference create mode 100644 dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.sql diff --git a/dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.reference b/dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.reference new file mode 100644 index 00000000000..77a14d12483 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.reference @@ -0,0 +1,3 @@ +2018-10-31 05:05:00 0 +2018-10-31 06:06:00 10 +2018-10-28 10:00:00 20 diff --git a/dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.sql b/dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.sql new file mode 100644 index 00000000000..f058199bcca --- /dev/null +++ b/dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS test.sample_merge_tree; + +CREATE TABLE test.sample_merge_tree (dt DateTime, x UInt64) ENGINE = MergeTree PARTITION BY toYYYYMMDD(dt) ORDER BY x SETTINGS min_merge_bytes_to_use_direct_io=1, index_granularity = 8192; + +INSERT INTO test.sample_merge_tree VALUES (toDateTime('2018-10-31 05:05:00'), 0), (toDateTime('2018-10-31 06:06:00'), 10), (toDateTime('2018-10-28 10:00:00'), 20); + +OPTIMIZE TABLE test.sample_merge_tree FINAL; + +SELECT * FROM test.sample_merge_tree; + +DROP TABLE IF EXISTS test.sample_merge_tree; From 5e487608d27c83c8003b8c54d2e883e1f1abe858 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 31 Oct 2018 15:49:50 +0300 Subject: [PATCH 25/79] Strict order in select --- .../0_stateless/00750_merge_tree_merge_with_o_direct.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.sql b/dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.sql index f058199bcca..5957ec3f50f 100644 --- a/dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.sql +++ b/dbms/tests/queries/0_stateless/00750_merge_tree_merge_with_o_direct.sql @@ -6,6 +6,6 @@ INSERT INTO test.sample_merge_tree VALUES (toDateTime('2018-10-31 05:05:00'), 0) OPTIMIZE TABLE test.sample_merge_tree FINAL; -SELECT * FROM test.sample_merge_tree; +SELECT * FROM test.sample_merge_tree ORDER BY x; DROP TABLE IF EXISTS test.sample_merge_tree; From b7c794bf7d1de8cbc3e1a196553956794d244796 Mon Sep 17 00:00:00 2001 From: zamulla Date: Wed, 31 Oct 2018 15:56:30 +0300 Subject: [PATCH 26/79] Typo --- docs/ru/query_language/dicts/external_dicts_dict_structure.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/query_language/dicts/external_dicts_dict_structure.md b/docs/ru/query_language/dicts/external_dicts_dict_structure.md index e26eb0282bc..76271effe9c 100644 --- a/docs/ru/query_language/dicts/external_dicts_dict_structure.md +++ b/docs/ru/query_language/dicts/external_dicts_dict_structure.md @@ -60,7 +60,7 @@ ClickHouse поддерживает следующие виды ключей: ### Составной ключ -Ключем может быть кортеж (`tuple`) из полей произвольных типов. [layout](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout) в этом случае должен быть `complex_key_hashed` или `complex_key_cache`. +Ключом может быть кортеж (`tuple`) из полей произвольных типов. [layout](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout) в этом случае должен быть `complex_key_hashed` или `complex_key_cache`. !!! tip "Совет" Cоставной ключ может состоять из одного элемента. Это даёт возможность использовать в качестве ключа, например, строку. From d8674a264a4d93927b1b0c8ee9f43e281d42db67 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 31 Oct 2018 15:58:12 +0300 Subject: [PATCH 27/79] ExpressionAnalyzer fix columns deduplication --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 26 ++++++-------------- dbms/src/Interpreters/ExpressionAnalyzer.h | 3 --- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index cfc0d340001..5c476072674 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -176,21 +176,22 @@ ExpressionAnalyzer::ExpressionAnalyzer( storage = context.tryGetTable(db_and_table->database, db_and_table->table); } - if (storage && source_columns.empty()) + if (storage) { auto physical_columns = storage->getColumns().getAllPhysical(); if (source_columns.empty()) source_columns.swap(physical_columns); else - { source_columns.insert(source_columns.end(), physical_columns.begin(), physical_columns.end()); - removeDuplicateColumns(source_columns); + + if (select_query) + { + const auto & storage_aliases = storage->getColumns().aliases; + source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end()); } } - else - removeDuplicateColumns(source_columns); - addAliasColumns(); + removeDuplicateColumns(source_columns); translateQualifiedNames(); @@ -576,19 +577,6 @@ void ExpressionAnalyzer::normalizeTree() } -void ExpressionAnalyzer::addAliasColumns() -{ - if (!select_query) - return; - - if (!storage) - return; - - const auto & storage_aliases = storage->getColumns().aliases; - source_columns.insert(std::end(source_columns), std::begin(storage_aliases), std::end(storage_aliases)); -} - - void ExpressionAnalyzer::executeScalarSubqueries() { LogAST log; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 4117d8d3fe1..b55fe541968 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -322,9 +322,6 @@ private: void optimizeIfWithConstantConditionImpl(ASTPtr & current_ast); bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) const; - /// Adds a list of ALIAS columns from the table. - void addAliasColumns(); - /// Replacing scalar subqueries with constant values. void executeScalarSubqueries(); From 6bbbd8721c234db72ad1e118b776e65e57b14469 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 31 Oct 2018 18:09:53 +0300 Subject: [PATCH 28/79] more tests for joins --- .../0_stateless/00722_inner_join.reference | 12 +++++++++ .../queries/0_stateless/00722_inner_join.sql | 25 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00722_inner_join.reference b/dbms/tests/queries/0_stateless/00722_inner_join.reference index 2fa6ca7df49..9fdac0e26a1 100644 --- a/dbms/tests/queries/0_stateless/00722_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00722_inner_join.reference @@ -1,6 +1,18 @@ +┌─database─┬─name─┐ +│ system │ one │ +└──────────┴──────┘ +┌─database─┬─name─┐ +│ system │ one │ +└──────────┴──────┘ +┌─database─┬─name─┐ +│ system │ one │ +└──────────┴──────┘ ┌─x──────┬─name─┐ │ system │ one │ └────────┴──────┘ +┌─database─┬─name─┐ +│ system │ one │ +└──────────┴──────┘ ┌─x──────┬─name─┐ │ system │ one │ └────────┴──────┘ diff --git a/dbms/tests/queries/0_stateless/00722_inner_join.sql b/dbms/tests/queries/0_stateless/00722_inner_join.sql index ab1df6f48b1..9d9c4c48d4e 100644 --- a/dbms/tests/queries/0_stateless/00722_inner_join.sql +++ b/dbms/tests/queries/0_stateless/00722_inner_join.sql @@ -2,12 +2,37 @@ CREATE DATABASE IF NOT EXISTS test; DROP TABLE IF EXISTS test.one; CREATE TABLE test.one(dummy UInt8) ENGINE = Memory; +SELECT database, t.name + FROM system.tables AS t + ALL INNER JOIN (SELECT name AS database FROM system.databases) AS db USING database + WHERE database = 'system' AND t.name = 'one' + FORMAT PrettyCompactNoEscapes; + +SELECT database, t.name + FROM (SELECT name AS database FROM system.databases) AS db + ALL INNER JOIN system.tables AS t USING database + WHERE database = 'system' AND t.name = 'one' + FORMAT PrettyCompactNoEscapes; + +SELECT database, t.name + FROM (SELECT name, database FROM system.tables) AS t + ALL INNER JOIN (SELECT name AS database FROM system.databases) AS db USING database + WHERE database = 'system' AND t.name = 'one' + FORMAT PrettyCompactNoEscapes; + SELECT x, t.name FROM (SELECT name, database AS x FROM system.tables) AS t ALL INNER JOIN (SELECT name AS x FROM system.databases) AS db USING x WHERE x = 'system' AND t.name = 'one' FORMAT PrettyCompactNoEscapes; +SELECT database, t.name + FROM (SELECT name, database FROM system.tables) AS t + JOIN (SELECT name AS database FROM system.databases) AS db USING database + WHERE database = 'system' AND t.name = 'one' + SETTINGS join_default_strictness = 'ALL' + FORMAT PrettyCompactNoEscapes; + SELECT x, t.name FROM (SELECT name, database AS x FROM system.tables) AS t JOIN (SELECT name AS x FROM system.databases) AS db USING x From 1eae5c38ea269647e09bcb7f414f8b92a641ab5a Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 31 Oct 2018 20:31:04 +0300 Subject: [PATCH 29/79] remove code duplicates for creating table AST node --- .../Interpreters/GlobalSubqueriesVisitor.h | 2 +- .../InJoinSubqueriesPreprocessor.cpp | 23 ++----------- dbms/src/Parsers/ASTSelectQuery.cpp | 34 +++++++++---------- dbms/src/Parsers/ASTSelectQuery.h | 3 ++ 4 files changed, 23 insertions(+), 39 deletions(-) diff --git a/dbms/src/Interpreters/GlobalSubqueriesVisitor.h b/dbms/src/Interpreters/GlobalSubqueriesVisitor.h index f645059a03c..2e5cb910786 100644 --- a/dbms/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/dbms/src/Interpreters/GlobalSubqueriesVisitor.h @@ -139,7 +139,7 @@ private: * instead of doing a subquery, you just need to read it. */ - auto database_and_table_name = ASTIdentifier::createSpecial(external_table_name); + auto database_and_table_name = createDatabaseAndTableNode("", external_table_name); if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) { diff --git a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 554aad3acc8..36b85fa4b5c 100644 --- a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -91,24 +91,6 @@ StoragePtr tryGetTable(const ASTPtr & database_and_table, const Context & contex return context.tryGetTable(db_and_table.database, db_and_table.table); } - -void replaceDatabaseAndTable(ASTPtr & database_and_table, const String & database_name, const String & table_name) -{ - ASTPtr table = ASTIdentifier::createSpecial(table_name); - - if (!database_name.empty()) - { - ASTPtr database = ASTIdentifier::createSpecial(database_name); - - database_and_table = ASTIdentifier::createSpecial(database_name + "." + table_name); - database_and_table->children = {database, table}; - } - else - { - database_and_table = ASTIdentifier::createSpecial(table_name); - } -} - } @@ -148,7 +130,7 @@ void InJoinSubqueriesPreprocessor::process(ASTSelectQuery * query) const forEachNonGlobalSubquery(query, [&] (IAST * subquery, IAST * function, IAST * table_join) { - forEachTable(subquery, [&] (ASTPtr & database_and_table) + forEachTable(subquery, [&] (ASTPtr & database_and_table) { StoragePtr storage = tryGetTable(database_and_table, context); @@ -191,7 +173,8 @@ void InJoinSubqueriesPreprocessor::process(ASTSelectQuery * query) const std::string table; std::tie(database, table) = getRemoteDatabaseAndTableName(*storage); - replaceDatabaseAndTable(database_and_table, database, table); + /// TODO: find a way to avoid AST node replacing + database_and_table = createDatabaseAndTableNode(database, table); } else throw Exception("InJoinSubqueriesPreprocessor: unexpected value of 'distributed_product_mode' setting", ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 8e20e1d46a6..efbbe6066bd 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -19,6 +19,19 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } +ASTPtr createDatabaseAndTableNode(const String & database_name, const String & table_name) +{ + if (database_name.empty()) + return ASTIdentifier::createSpecial(table_name); + + ASTPtr database = ASTIdentifier::createSpecial(database_name); + ASTPtr table = ASTIdentifier::createSpecial(table_name); + + ASTPtr database_and_table = ASTIdentifier::createSpecial(database_name + "." + table_name); + database_and_table->children = {database, table}; + return database_and_table; +} + ASTPtr ASTSelectQuery::clone() const { @@ -323,12 +336,9 @@ void ASTSelectQuery::setDatabaseIfNeeded(const String & database_name) } else if (table_expression->database_and_table_name->children.empty()) { - ASTPtr database = ASTIdentifier::createSpecial(database_name); - ASTPtr table = table_expression->database_and_table_name; + const ASTIdentifier & identifier = static_cast(*table_expression->database_and_table_name); - const String & old_name = static_cast(*table_expression->database_and_table_name).name; - table_expression->database_and_table_name = ASTIdentifier::createSpecial(database_name + "." + old_name); - table_expression->database_and_table_name->children = {database, table}; + table_expression->database_and_table_name = createDatabaseAndTableNode(database_name, identifier.name); } else if (table_expression->database_and_table_name->children.size() != 2) { @@ -356,19 +366,7 @@ void ASTSelectQuery::replaceDatabaseAndTable(const String & database_name, const table_expression = table_expr.get(); } - ASTPtr table = ASTIdentifier::createSpecial(table_name); - - if (!database_name.empty()) - { - ASTPtr database = ASTIdentifier::createSpecial(database_name); - - table_expression->database_and_table_name = ASTIdentifier::createSpecial(database_name + "." + table_name); - table_expression->database_and_table_name->children = {database, table}; - } - else - { - table_expression->database_and_table_name = ASTIdentifier::createSpecial(table_name); - } + table_expression->database_and_table_name = createDatabaseAndTableNode(database_name, table_name); } diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index 1e0a2ffd575..1c88186cb31 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -53,4 +53,7 @@ protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; + +ASTPtr createDatabaseAndTableNode(const String & database_name, const String & table_name); + } From 1db565193cba2e442213e2729904ee19aaa992be Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 31 Oct 2018 20:45:41 +0300 Subject: [PATCH 30/79] description for TranslateQualifiedNamesVisitor --- dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index 6f7d9ca7f54..eaf5a4f7ba0 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -18,7 +18,7 @@ struct ASTTableJoin; class NamesAndTypesList; -/// It visits nodes, find identifiers and translate their names to needed form. +/// It visits nodes, find columns (general identifiers and asterisks) and translate their names according to tables' names. class TranslateQualifiedNamesVisitor { public: From d88450db4d6811e8f3ab2b47f38367a087ef44ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Vavru=C5=A1a?= Date: Wed, 31 Oct 2018 16:21:23 -0700 Subject: [PATCH 31/79] DictionarySourceFactory: fix mistaken connect timeout for read timeout It looks like this was accidental, as XDBCStorage and bridges use http_receive_timeout, but the dictionary passes http_connection_timeout, so it times out on long queries. --- dbms/src/Dictionaries/DictionarySourceFactory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/DictionarySourceFactory.cpp b/dbms/src/Dictionaries/DictionarySourceFactory.cpp index bb778fbce62..1da2268c1e5 100644 --- a/dbms/src/Dictionaries/DictionarySourceFactory.cpp +++ b/dbms/src/Dictionaries/DictionarySourceFactory.cpp @@ -156,7 +156,7 @@ DictionarySourcePtr DictionarySourceFactory::create( { #if USE_POCO_SQLODBC || USE_POCO_DATAODBC const auto & global_config = context.getConfigRef(); - BridgeHelperPtr bridge = std::make_shared>(global_config, context.getSettings().http_connection_timeout, config.getString(config_prefix + ".odbc.connection_string")); + BridgeHelperPtr bridge = std::make_shared>(global_config, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".odbc.connection_string")); return std::make_unique(dict_struct, config, config_prefix + ".odbc", sample_block, context, bridge); #else throw Exception{"Dictionary source of type `odbc` is disabled because poco library was built without ODBC support.", @@ -167,7 +167,7 @@ DictionarySourcePtr DictionarySourceFactory::create( { throw Exception{"Dictionary source of type `jdbc` is disabled until consistent support for nullable fields.", ErrorCodes::SUPPORT_IS_DISABLED}; -// BridgeHelperPtr bridge = std::make_shared>(config, context.getSettings().http_connection_timeout, config.getString(config_prefix + ".connection_string")); +// BridgeHelperPtr bridge = std::make_shared>(config, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".connection_string")); // return std::make_unique(dict_struct, config, config_prefix + ".jdbc", sample_block, context, bridge); } else if ("executable" == source_type) From d603cb1eb8606f6fcb57cb60673ebae42c111503 Mon Sep 17 00:00:00 2001 From: "igor.lapko" Date: Thu, 1 Nov 2018 09:01:34 +0200 Subject: [PATCH 32/79] fixed group by int16 and Date types on AMD EPYC 7401P machine --- dbms/src/Interpreters/Aggregator.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 4d99f731517..52f7accf232 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1475,7 +1475,9 @@ void NO_INLINE Aggregator::mergeDataImpl( Table & table_src, Arena * arena) const { - for (auto it = table_src.begin(); it != table_src.end(); ++it) + decltype(table_src.end()) end = table_src.end(); + + for (auto it = table_src.begin(); it != end; ++it) { decltype(it) res_it; bool inserted; From d7fe560090d353f350ded6ebc3a09dfa596d9795 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 1 Nov 2018 17:14:01 +0800 Subject: [PATCH 33/79] restore query context before polling --- dbms/programs/server/TCPHandler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 79c8e08fd14..5c9b3a2d86d 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -121,6 +121,9 @@ void TCPHandler::runImpl() while (1) { + /// Restore context of request. + query_context = connection_context; + /// We are waiting for a packet from the client. Thus, every `POLL_INTERVAL` seconds check whether we need to shut down. while (!static_cast(*in).poll(global_settings.poll_interval * 1000000) && !server.isCancelled()) ; @@ -145,9 +148,6 @@ void TCPHandler::runImpl() try { - /// Restore context of request. - query_context = connection_context; - /// If a user passed query-local timeouts, reset socket to initial state at the end of the query SCOPE_EXIT({state.timeout_setter.reset();}); From 091e0ba0c2739628d27e05a44be417c146baff05 Mon Sep 17 00:00:00 2001 From: ggerogery Date: Wed, 31 Oct 2018 09:16:27 +0300 Subject: [PATCH 34/79] Update index.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Небольшие исправления текста. --- docs/ru/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/index.md b/docs/ru/index.md index a2f6945bd04..24e97aaff83 100644 --- a/docs/ru/index.md +++ b/docs/ru/index.md @@ -58,7 +58,7 @@ ClickHouse - столбцовая система управления базам ## Причины, по которым столбцовые СУБД лучше подходят для OLAP сценария -Столбцовые СУБД лучше (от 100 раз по скорости обработки большинства запросов) подходят для OLAP сценария работы. Причины в деталях буду разъяснены ниже, а сам факт проще проще продемонстрировать визуально: +Столбцовые СУБД лучше (от 100 раз по скорости обработки большинства запросов) подходят для OLAP сценария работы. Причины в деталях будут разъяснены ниже, а сам факт проще продемонстрировать визуально: **Строковые СУБД** @@ -76,7 +76,7 @@ ClickHouse - столбцовая система управления базам 2. Так как данные читаются пачками, то их проще сжимать. Данные, лежащие по столбцам также лучше сжимаются. За счёт этого, дополнительно уменьшается объём ввода-вывода. 3. За счёт уменьшения ввода-вывода, больше данных влезает в системный кэш. -Для примера, для запроса "посчитать количество записей для каждой рекламной системы", требуется прочитать один столбец "идентификатор рекламной системы", который занимает 1 байт в несжатом виде. Если большинство переходов было не с рекламных систем, то можно рассчитывать хотя бы на десятикратное сжатие этого столбца. При использовании быстрого алгоритма сжатия, возможно разжатие данных со скоростью более нескольких гигабайт несжатых данных в секунду. То есть, такой запрос может выполняться со скоростью около нескольких миллиардов строк в секунду на одном сервере. На практике, такая скорость действительно достигается. +Например, для запроса "посчитать количество записей для каждой рекламной системы", требуется прочитать один столбец "идентификатор рекламной системы", который занимает 1 байт в несжатом виде. Если большинство переходов было не с рекламных систем, то можно рассчитывать хотя бы на десятикратное сжатие этого столбца. При использовании быстрого алгоритма сжатия, возможно разжатие данных со скоростью более нескольких гигабайт несжатых данных в секунду. То есть, такой запрос может выполняться со скоростью около нескольких миллиардов строк в секунду на одном сервере. На практике, такая скорость действительно достигается.
Пример ``` From abcc692dbfec67b9b0ef8b674c7502cbc40bb117 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 31 Oct 2018 14:11:41 +0300 Subject: [PATCH 35/79] CLICKHOUSE-4096: Fix nested unaliased joins segfault --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 10 ++++- dbms/src/Interpreters/ExpressionAnalyzer.h | 5 +++ ...inner_join_of_unnamed_subqueries.reference | 1 + ...00749_inner_join_of_unnamed_subqueries.sql | 40 +++++++++++++++++++ 4 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00749_inner_join_of_unnamed_subqueries.reference create mode 100644 dbms/tests/queries/0_stateless/00749_inner_join_of_unnamed_subqueries.sql diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index b889336b620..bd7032ebde2 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1250,13 +1250,19 @@ const ExpressionAnalyzer::AnalyzedJoin::JoinedColumnsList & ExpressionAnalyzer:: for (auto & column : columns) { - columns_from_joined_table.emplace_back(column, column.name); + JoinedColumn joined_column(column, column.name); if (source_columns.contains(column.name)) { auto qualified_name = table_name_with_alias.getQualifiedNamePrefix() + column.name; - columns_from_joined_table.back().name_and_type.name = qualified_name; + joined_column.name_and_type.name = qualified_name; } + + /// We don't want to add duplicate columns. + /// They may appear from nested joins with unaliased subqueries, but actually they exist only in AST + if (std::find(columns_from_joined_table.begin(), columns_from_joined_table.end(), joined_column) == columns_from_joined_table.end()) + columns_from_joined_table.push_back(joined_column); + } } } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 4117d8d3fe1..67521f1adc2 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -259,6 +259,11 @@ private: JoinedColumn(const NameAndTypePair & name_and_type_, const String & original_name_) : name_and_type(name_and_type_), original_name(original_name_) {} + + bool operator==(const JoinedColumn & o) const + { + return name_and_type == o.name_and_type && original_name == o.original_name; + } }; using JoinedColumnsList = std::list; diff --git a/dbms/tests/queries/0_stateless/00749_inner_join_of_unnamed_subqueries.reference b/dbms/tests/queries/0_stateless/00749_inner_join_of_unnamed_subqueries.reference new file mode 100644 index 00000000000..7b39e1997c1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00749_inner_join_of_unnamed_subqueries.reference @@ -0,0 +1 @@ +1 hello diff --git a/dbms/tests/queries/0_stateless/00749_inner_join_of_unnamed_subqueries.sql b/dbms/tests/queries/0_stateless/00749_inner_join_of_unnamed_subqueries.sql new file mode 100644 index 00000000000..f2cf8579bab --- /dev/null +++ b/dbms/tests/queries/0_stateless/00749_inner_join_of_unnamed_subqueries.sql @@ -0,0 +1,40 @@ +DROP TABLE IF EXISTS test.left_table; +DROP TABLE IF EXISTS test.right_table; + +CREATE TABLE test.left_table(APIKey Int32, SomeColumn String) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO test.left_table VALUES(1, 'somestr'); + +CREATE TABLE test.right_table(APIKey Int32, EventValueForPostback String) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO test.right_table VALUES(1, 'hello'), (2, 'WORLD'); + +SELECT + APIKey, + ConversionEventValue +FROM + test.left_table AS left_table +ALL INNER JOIN + ( + SELECT * + FROM + ( + SELECT + APIKey, + EventValueForPostback AS ConversionEventValue + FROM + test.right_table AS right_table + ) + ALL INNER JOIN + ( + SELECT + APIKey + FROM + test.left_table as left_table + GROUP BY + APIKey + ) USING (APIKey) + ) USING (APIKey); + +DROP TABLE IF EXISTS test.left_table; +DROP TABLE IF EXISTS test.right_table; From 63c6647be462942b6f4dfeff9c219916c28e70da Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 1 Nov 2018 10:54:27 +0300 Subject: [PATCH 36/79] Better comment --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index bd7032ebde2..da121178774 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1258,8 +1258,7 @@ const ExpressionAnalyzer::AnalyzedJoin::JoinedColumnsList & ExpressionAnalyzer:: joined_column.name_and_type.name = qualified_name; } - /// We don't want to add duplicate columns. - /// They may appear from nested joins with unaliased subqueries, but actually they exist only in AST + /// We don't want to select duplicate columns from the joined subquery if they appear if (std::find(columns_from_joined_table.begin(), columns_from_joined_table.end(), joined_column) == columns_from_joined_table.end()) columns_from_joined_table.push_back(joined_column); From 1a71a443b88eb200df3824177692ba5533afe98f Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 1 Nov 2018 14:32:44 +0300 Subject: [PATCH 37/79] resolve review remarks --- dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp | 6 +++--- dbms/src/Interpreters/DatabaseAndTableWithAlias.h | 3 ++- dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp index f43e75ed8d5..df430ee0fbd 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp @@ -108,7 +108,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & { if (table_expression.database_and_table_name) { - const auto * identifier = static_cast(table_expression.database_and_table_name.get()); + const auto * identifier = typeid_cast(table_expression.database_and_table_name.get()); if (!identifier) throw Exception("Logical error: identifier expected", ErrorCodes::LOGICAL_ERROR); @@ -200,7 +200,7 @@ std::vector getDatabaseAndTables(const ASTSelectQuery return database_and_table_with_aliases; } -std::shared_ptr getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number) +std::optional getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number) { const ASTTableExpression * table_expression = getTableExpression(select, table_number); if (!table_expression) @@ -214,7 +214,7 @@ std::shared_ptr getDatabaseAndTable(const ASTSelectQu if (!identifier) return {}; - return std::make_shared(*identifier); + return *identifier; } ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number) diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index 35194e21e64..2444f037d8e 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace DB @@ -37,7 +38,7 @@ size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifie const DatabaseAndTableWithAlias & names); std::vector getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database); -std::shared_ptr getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number); +std::optional getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number); std::vector getSelectTablesExpression(const ASTSelectQuery & select_query); ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number); diff --git a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 36b85fa4b5c..58e28f9bfc6 100644 --- a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -82,7 +82,7 @@ void forEachTable(IAST * node, F && f) StoragePtr tryGetTable(const ASTPtr & database_and_table, const Context & context) { - const ASTIdentifier * id = static_cast(database_and_table.get()); + const ASTIdentifier * id = typeid_cast(database_and_table.get()); if (!id) throw Exception("Logical error: identifier expected", ErrorCodes::LOGICAL_ERROR); From 2d565f1099aeaf143b1457a141ce25682175224b Mon Sep 17 00:00:00 2001 From: "igor.lapko" Date: Thu, 1 Nov 2018 13:42:15 +0200 Subject: [PATCH 38/79] code style fix --- dbms/src/Interpreters/Aggregator.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 52f7accf232..d594f82050a 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1475,9 +1475,8 @@ void NO_INLINE Aggregator::mergeDataImpl( Table & table_src, Arena * arena) const { - decltype(table_src.end()) end = table_src.end(); - for (auto it = table_src.begin(); it != end; ++it) + for (auto it = table_src.begin(), end = table_src.end(); it != end; ++it) { decltype(it) res_it; bool inserted; From 1a052ed83b20c204adb74a783707ffece78642cf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Nov 2018 15:37:20 +0300 Subject: [PATCH 39/79] Updated meetup links [#CLICKHOUSE-3] --- README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e5ca7ce2ec5..a488c55643c 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,11 @@ ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time. +🎤🥂 **ClickHouse Meetup in [Amsterdam on November 15](https://events.yandex.com/events/meetings/15-11-2018/)** 🍰🔥🐻 + ## Useful Links * [Official website](https://clickhouse.yandex/) has quick high-level overview of ClickHouse on main page. * [Tutorial](https://clickhouse.yandex/tutorial.html) shows how to set up and query small ClickHouse cluster. * [Documentation](https://clickhouse.yandex/docs/en/) provides more in-depth information. * [Contacts](https://clickhouse.yandex/#contacts) can help to get your questions answered if there are any. - -## Upcoming Meetups - -* [Beijing on October 28](http://www.clickhouse.com.cn/topic/5ba0e3f99d28dfde2ddc62a1) -* [Amsterdam on November 15](https://events.yandex.com/events/meetings/15-11-2018/) From 00fa7d9fad1f3253daf2a0f53adc15c22b021691 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 1 Nov 2018 15:42:57 +0300 Subject: [PATCH 40/79] Update Aggregator.cpp --- dbms/src/Interpreters/Aggregator.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index d594f82050a..f854bdb894b 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1475,7 +1475,6 @@ void NO_INLINE Aggregator::mergeDataImpl( Table & table_src, Arena * arena) const { - for (auto it = table_src.begin(), end = table_src.end(); it != end; ++it) { decltype(it) res_it; From 53e2ec08ce1fd0ac076ddbcfddb10e937540e574 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Thu, 1 Nov 2018 16:27:01 +0300 Subject: [PATCH 41/79] Bitwise aggregate functions are translated into russian (#3486) * Update of english version of descriprion of the table function `file`. * New syntax for ReplacingMergeTree. Some improvements in text. * Significantly change article about SummingMergeTree. Article is restructured, text is changed in many places of the document. New syntax for table creation is described. * Descriptions of AggregateFunction and AggregatingMergeTree are updated. Russian version. * New syntax for new syntax of CREATE TABLE * Added english docs on Aggregating, Replacing and SummingMergeTree. * CollapsingMergeTree docs. English version. * 1. Update of CollapsingMergeTree. 2. Minor changes in markup * Update aggregatefunction.md * Update aggregatefunction.md * Update aggregatefunction.md * Update aggregatingmergetree.md * GraphiteMergeTree docs update. New syntax for creation of Replicated* tables. Minor changes in *MergeTree tables creation syntax. * Markup fix * Markup and language fixes * Clarification in the CollapsingMergeTree article * Bitwise aggregate functions are translated info russian. --- .../query_language/agg_functions/reference.md | 12 +- .../query_language/agg_functions/reference.md | 216 +++++++++++++----- 2 files changed, 166 insertions(+), 62 deletions(-) diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index 6044d4e3b43..711ce006be2 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -81,7 +81,7 @@ binary decimal 01010101 = 85 ``` -The query: +Query: ``` SELECT groupBitAnd(num) FROM t @@ -339,14 +339,14 @@ Creates an array from different argument values. Memory consumption is the same ## quantile(level)(x) -Approximates the 'level' quantile. 'level' is a constant, a floating-point number from 0 to 1. -We recommend using a 'level' value in the range of 0.01..0.99 -Don't use a 'level' value equal to 0 or 1 – use the 'min' and 'max' functions for these cases. +Approximates the `level` quantile. `level` is a constant, a floating-point number from 0 to 1. +We recommend using a `level` value in the range of `[0.01, 0.99]` +Don't use a `level` value equal to 0 or 1 – use the `min` and `max` functions for these cases. -In this function, as well as in all functions for calculating quantiles, the 'level' parameter can be omitted. In this case, it is assumed to be equal to 0.5 (in other words, the function will calculate the median). +In this function, as well as in all functions for calculating quantiles, the `level` parameter can be omitted. In this case, it is assumed to be equal to 0.5 (in other words, the function will calculate the median). Works for numbers, dates, and dates with times. -Returns: for numbers – Float64; for dates – a date; for dates with times – a date with time. +Returns: for numbers – `Float64`; for dates – a date; for dates with times – a date with time. Uses [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with a reservoir size up to 8192. If necessary, the result is output with linear approximation from the two neighboring values. diff --git a/docs/ru/query_language/agg_functions/reference.md b/docs/ru/query_language/agg_functions/reference.md index 2cd156591d4..d454df4919b 100644 --- a/docs/ru/query_language/agg_functions/reference.md +++ b/docs/ru/query_language/agg_functions/reference.md @@ -2,14 +2,12 @@ # Справочник функций - ## count() Считает количество строк. Принимает ноль аргументов, возвращает UInt64. -Не поддерживается синтаксис `COUNT(DISTINCT x)` - для этого есть отдельная агрегатная функция `uniq`. - -Запрос вида `SELECT count() FROM table` не оптимизируется, так как количество записей в таблице нигде не хранится отдельно - из таблицы будет выбран какой-нибудь достаточно маленький столбец, и будет посчитано количество значений в нём. +Не поддерживается синтаксис `COUNT (DISTINCT x)`. Для этого существует агрегатная функция`uniq`. +Запрос вида `SELECT count() FROM table` не оптимизируется, так как количество записей в таблице нигде не хранится отдельно. Из таблицы будет выбран какой-нибудь достаточно маленький столбец, и будет посчитано количество значений в нём. @@ -23,7 +21,6 @@ При наличии в запросе `SELECT` секции `GROUP BY` или хотя бы одной агрегатной функции, ClickHouse (в отличие от, например, MySQL) требует, чтобы все выражения в секциях `SELECT`, `HAVING`, `ORDER BY` вычислялись из ключей или из агрегатных функций. То есть, каждый выбираемый из таблицы столбец, должен использоваться либо в ключах, либо внутри агрегатных функций. Чтобы получить поведение, как в MySQL, вы можете поместить остальные столбцы в агрегатную функцию `any`. - ## anyHeavy(x) Выбирает часто встречающееся значение с помощью алгоритма "[heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf)". Если существует значение, которое встречается чаще, чем в половине случаев, в каждом потоке выполнения запроса, то возвращается данное значение. В общем случае, результат недетерминирован. @@ -33,43 +30,172 @@ anyHeavy(column) ``` **Аргументы** -- `column` - Имя столбца. + +- `column` – Имя столбца. **Пример** -Возьмем набор данных [OnTime](../../getting_started/example_datasets/ontime.md#example_datasets-ontime) и выберем произвольное часто встречающееся значение в столбце `AirlineID`. +Возьмём набор данных [OnTime](../../getting_started/example_datasets/ontime.md#example_datasets-ontime) и выберем произвольное часто встречающееся значение в столбце `AirlineID`. -``` sql +```sql SELECT anyHeavy(AirlineID) AS res FROM ontime ``` + ``` ┌───res─┐ │ 19690 │ └───────┘ ``` - ## anyLast(x) Выбирает последнее попавшееся значение. Результат так же недетерминирован, как и для функции `any`. +##groupBitAnd + +Применяет побитовое `И` для последовательности чисел. + +``` +groupBitAnd(expr) +``` + +**Параметры** + +`expr` – Выражение, результат которого имеет тип `UInt*`. + +**Возвращаемое значение** + +Значение типа `UInt*`. + +**Пример** + +Тестовые данные: + +``` +binary decimal +00101100 = 44 +00011100 = 28 +00001101 = 13 +01010101 = 85 +``` + +Запрос: + +``` +SELECT groupBitAnd(num) FROM t +``` + +Где `num` – столбец с тестовыми данными. + +Результат: + +``` +binary decimal +00000100 = 4 +``` + +##groupBitOr + +Применяет побитовое `ИЛИ` для последовательности чисел. + +``` +groupBitOr (expr) +``` + +**Параметры** + +`expr` – Выражение, результат которого имеет тип `UInt*`. + +**Возвращаемое значение** + +Значение типа `UInt*`. + +**Пример** + +Тестовые данные: + +``` +binary decimal +00101100 = 44 +00011100 = 28 +00001101 = 13 +01010101 = 85 +``` + +Запрос: + +``` +SELECT groupBitOr(num) FROM t +``` + +Где `num` – столбец с тестовыми данными. + +Результат: + +``` +binary decimal +01111101 = 125 +``` + +##groupBitXor + +Применяет побитовое `ИСКЛЮЧАЮЩЕЕ ИЛИ` для последовательности чисел. + +``` +groupBitXor(expr) +``` + +**Параметры** + +`expr` – Выражение, результат которого имеет тип `UInt*`. + +**Возвращаемое значение** + +Значение типа `UInt*`. + +**Пример** + +Тестовые данные: + +``` +binary decimal +00101100 = 44 +00011100 = 28 +00001101 = 13 +01010101 = 85 +``` + +Запрос: + +``` +SELECT groupBitXor(num) FROM t +``` + +Где `num` – столбец с тестовыми данными. + +Результат: + +``` +binary decimal +01101000 = 104 +``` + ## min(x) Вычисляет минимум. - ## max(x) Вычисляет максимум. - ## argMin(arg, val) Вычисляет значение arg при минимальном значении val. Если есть несколько разных значений arg для минимальных значений val, то выдаётся первое попавшееся из таких значений. **Пример:** + ``` ┌─user─────┬─salary─┐ │ director │ 5000 │ @@ -95,7 +221,6 @@ SELECT argMin(user, salary) FROM salary Вычисляет сумму. Работает только для чисел. - ## sumWithOverflow(x) Вычисляет сумму чисел, используя для результата тот же тип данных, что и для входных параметров. Если сумма выйдет за максимальное значение для заданного типа данных, то функция вернёт ошибку. @@ -106,13 +231,13 @@ SELECT argMin(user, salary) FROM salary ## sumMap(key, value) -Производит суммирование массива 'value' по соотвествующим ключам заданным в массиве 'key'. +Производит суммирование массива 'value' по соответствующим ключам заданным в массиве 'key'. Количество элементов в 'key' и 'value' должно быть одинаковым для каждой строки, для которой происходит суммирование. -Возвращает кортеж из двух массивов - ключи в отсортированном порядке и значения, просуммированные по соотвествующим ключам. +Возвращает кортеж из двух массивов - ключи в отсортированном порядке и значения, просуммированные по соответствующим ключам. Пример: -``` sql +```sql CREATE TABLE sum_map( date Date, timeslot DateTime, @@ -140,13 +265,11 @@ GROUP BY timeslot └─────────────────────┴──────────────────────────────────────────────┘ ``` - ## avg(x) Вычисляет среднее. Работает только для чисел. -Результат всегда - Float64. - +Результат всегда Float64. @@ -159,29 +282,26 @@ GROUP BY timeslot Результат детерминирован (не зависит от порядка выполнения запроса). -Данная функция обеспечивает отличную точность даже для множеств огромной кардинальности (10B+ элементов) и рекомендуется к использованию по умолчанию. - +Функция обеспечивает высокую точность даже для множеств с высокой кардинальностью (более 10 миллиардов элементов). Рекомендуется для использования по умолчанию. ## uniqCombined(x) Приближённо вычисляет количество различных значений аргумента. Работает для чисел, строк, дат, дат-с-временем, для нескольких аргументов и аргументов-кортежей. -Используется комбинация трёх алгоритмов: массив, хэш-таблица и [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) с таблицей коррекции погрешности. Расход памяти в несколько раз меньше, чем у функции `uniq`, а точность в несколько раз выше. Скорость работы чуть ниже, чем у функции `uniq`, но иногда может быть даже выше - в случае распределённых запросов, в которых по сети передаётся большое количество состояний агрегации. Максимальный размер состояния составляет 96 KiB (HyperLogLog из 217 6-битовых ячеек). +Используется комбинация трех алгоритмов: массив, хэш-таблица и [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) с таблицей коррекции погрешности. Расход памяти в несколько раз меньше, чем у функции `uniq`, а точность в несколько раз выше. Скорость работы чуть ниже, чем у функции `uniq`, но иногда может быть даже выше - в случае распределённых запросов, в которых по сети передаётся большое количество состояний агрегации. Максимальный размер состояния составляет 96 KiB (HyperLogLog из 217 6-битовых ячеек). Результат детерминирован (не зависит от порядка выполнения запроса). Функция `uniqCombined` является хорошим выбором по умолчанию для подсчёта количества различных значений, но стоит иметь ввиду что для множеств большой кардинальности (200M+) ошибка оценки будет только расти и для множеств огромной кардинальности (1B+ элементов) функция возвращает результат с очень большой неточностью. - ## uniqHLL12(x) Приближённо вычисляет количество различных значений аргумента, используя алгоритм [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). -Используется 212 5-битовых ячеек. Размер состояния чуть больше 2.5 КБ. Результат является не точным(ошибка до ~10%) для небольших множеств (<10K элементов), но для множеств большой кардинальности (10K - 100M) результат довольно точен (ошибка до ~1.6%) и начиная с 100M ошибка оценки будет только расти и для множеств огромной кардинальности (1B+ элементов) функция возвращает результат с очень большой неточностью. +Используется 212 5-битовых ячеек. Размер состояния чуть больше 2.5 КБ. Результат не очень точный (ошибка до ~10%) для небольших множеств (<10К элементов). Однако, для множеств с большой кардинальностью (10К - 100М) результат имеет ошибку до ~1.6%. Начиная со 100M, ошибка оценки увеличивается и для множеств огромной кардинальности (1B+ элементов) результат будет очень неточным. Результат детерминирован (не зависит от порядка выполнения запроса). -Данная функция не рекомендуется к использованию, и в большинстве случаев, используйте функцию `uniq` или `uniqCombined`. - +Мы не рекомендуем использовать эту функцию. В большинстве случаев, используйте функцию `uniq` или `uniqCombined`. ## uniqExact(x) @@ -191,7 +311,6 @@ GROUP BY timeslot Функция `uniqExact` расходует больше оперативки, чем функция `uniq`, так как размер состояния неограниченно растёт по мере роста количества различных значений. - ## groupArray(x), groupArray(max_size)(x) Составляет массив из значений аргумента. @@ -204,7 +323,6 @@ GROUP BY timeslot - ## groupArrayInsertAt(x) Вставляет в массив значение в заданную позицию. @@ -213,20 +331,18 @@ GROUP BY timeslot Опциональные параметры: -- Значение по умолчанию для подстановки на пустые позиции. -- Длина результирующего массива. Например, если вы хотите получать массисы одинакового размера для всех агрегатных ключей. При использовании этого параметра значение по умолчанию задавать обязательно. - +- Значение по умолчанию для подстановки на пустые позиции. +- Длина результирующего массива. Например, если вы хотите получать массивы одинакового размера для всех агрегатных ключей. При использовании этого параметра значение по умолчанию задавать обязательно. ## groupUniqArray(x) Составляет массив из различных значений аргумента. Расход оперативки такой же, как у функции `uniqExact`. - ## quantile(level)(x) Приближённо вычисляет квантиль уровня level. level - константа, число с плавающей запятой от 0 до 1. -Рекомендуется использовать значения level в диапазоне 0.01..0.99. -Не используйте значения level, равные 0 или 1 - для таких случаев есть функции min и max. +Рекомендуется использовать значения level в диапазоне `[0.01, 0.99]`. +Не используйте значение 'level' равное 0 или 1 – используйте функции 'min' и 'max' для этих случаев. В этой функции, равно как и во всех функциях для расчёта квантилей, параметр level может быть не указан. В таком случае, он принимается равным 0.5 - то есть, функция будет вычислять медиану. @@ -241,15 +357,13 @@ GROUP BY timeslot При использовании нескольких функций `quantile` (и аналогичных) с разными уровнями в запросе, внутренние состояния не объединяются (то есть, запрос работает менее эффективно, чем мог бы). В этом случае, используйте функцию `quantiles` (и аналогичные). - ## quantileDeterministic(level)(x, determinator) Работает аналогично функции `quantile`, но, в отличие от неё, результат является детерминированным и не зависит от порядка выполнения запроса. Для этого, функция принимает второй аргумент - «детерминатор». Это некоторое число, хэш от которого используется вместо генератора случайных чисел в алгоритме reservoir sampling. Для правильной работы функции, одно и то же значение детерминатора не должно встречаться слишком часто. В качестве детерминатора вы можете использовать идентификатор события, идентификатор посетителя и т. п. -Не используйте эту функцию для рассчёта таймингов. Для этого есть более подходящая функции - `quantileTiming`. - +Не используйте эту функцию для расчёта таймингов. Для этого есть более подходящая функция - `quantileTiming`. ## quantileTiming(level)(x) @@ -262,8 +376,8 @@ GROUP BY timeslot Иначе: -- если время меньше 1024 мс., то вычисление точное. -- иначе вычисление идёт с округлением до числа, кратного 16 мс. +- если время меньше 1024 мс., то вычисление точное. +- иначе вычисление идёт с округлением до числа, кратного 16 мс. При передаче в функцию отрицательных значений, поведение не определено. @@ -273,44 +387,37 @@ GROUP BY timeslot Для своей задачи (расчёт квантилей времени загрузки страниц), использование этой функции эффективнее и результат точнее, чем для функции `quantile`. - ## quantileTimingWeighted(level)(x, weight) Отличается от функции `quantileTiming` наличием второго аргумента - «веса». Вес - неотрицательное целое число. Результат считается так же, как если бы в функцию `quantileTiming` значение `x` было передано `weight` количество раз. - ## quantileExact(level)(x) Вычисляет квантиль уровня level точно. Для этого, все переданные значения складываются в массив, который затем частично сортируется. Поэтому, функция потребляет O(n) памяти, где n - количество переданных значений. Впрочем, для случая маленького количества значений, функция весьма эффективна. - ## quantileExactWeighted(level)(x, weight) Вычисляет квантиль уровня level точно. При этом, каждое значение учитывается с весом weight - как будто оно присутствует weight раз. Аргументы функции можно рассматривать как гистограммы, где значению x соответствует «столбик» гистограммы высоты weight, а саму функцию можно рассматривать как суммирование гистограмм. В качестве алгоритма используется хэш-таблица. Из-за этого, в случае, если передаваемые значения часто повторяются, функция потребляет меньше оперативки, чем `quantileExact`. Вы можете использовать эту функцию вместо `quantileExact`, указав в качестве веса число 1. - ## quantileTDigest(level)(x) -Вычисляет квантиль уровня level приближённо, с использованием алгоритма [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf). Максимальная погрешность составляет 1%. Расход памяти на состояние пропорционален логарифму от количества переданных значений. +Вычисляет квантиль уровня level приближенно, с использованием алгоритма [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf). Максимальная погрешность составляет 1%. Расход памяти на состояние пропорционален логарифму от количества переданных значений. Производительность функции ниже `quantile`, `quantileTiming`. По соотношению размера состояния и точности, функция существенно лучше, чем `quantile`. Результат зависит от порядка выполнения запроса, и является недетерминированным. - ## median(x) Для всех quantile-функций, также присутствуют соответствующие median-функции: `median`, `medianDeterministic`, `medianTiming`, `medianTimingWeighted`, `medianExact`, `medianExactWeighted`, `medianTDigest`. Они являются синонимами и их поведение ничем не отличается. - ## quantiles(level1, level2, ...)(x) Для всех quantile-функций, также присутствуют соответствующие quantiles-функции: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. Эти функции за один проход вычисляют все квантили перечисленных уровней и возвращают массив вычисленных значений. - ## varSamp(x) Вычисляет величину `Σ((x - x̅)^2) / (n - 1)`, где `n` - размер выборки, `x̅`- среднее значение `x`. @@ -329,39 +436,38 @@ GROUP BY timeslot Результат равен квадратному корню от `varSamp(x)`. - ## stddevPop(x) Результат равен квадратному корню от `varPop(x)`. - ## topK(N)(column) Возвращает массив наиболее часто встречающихся значений в указанном столбце. Результирующий массив упорядочен по убыванию частоты значения (не по самим значениям). -Реализует [Filtered Space-Saving](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) алгоритм для анализа TopK, на основе reduce-and-combine -алгоритма из методики [Parallel Space Saving](https://arxiv.org/pdf/1401.0702.pdf). +Реализует [Filtered Space-Saving](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) алгоритм для анализа TopK, на основе reduce-and-combine алгоритма из методики [Parallel Space Saving](https://arxiv.org/pdf/1401.0702.pdf). ``` topK(N)(column) ``` -Функция не дает гарантированного результата, при определенных условиях возможны ошибки и вернутся частые, но не наиболее частые значения. +Функция не дает гарантированного результата. В некоторых ситуациях могут возникать ошибки, и функция возвращает частые, но не наиболее частые значения. Рекомендуем использовать значения `N < 10`, при больших `N` снижается производительность. Максимально возможное значение `N = 65536`. **Аргументы** + - 'N' - Количество значений. -- 'x' - Столбец. +- 'x' – Столбец. **Пример** -Возьмем набор данных [OnTime](../../getting_started/example_datasets/ontime.md#example_datasets-ontime) и выберем 3 наиболее часто встречающихся значения в столбце `AirlineID`. +Возьмём набор данных [OnTime](../../getting_started/example_datasets/ontime.md#example_datasets-ontime) и выберем 3 наиболее часто встречающихся значения в столбце `AirlineID`. -``` sql +```sql SELECT topK(3)(AirlineID) AS res FROM ontime ``` + ``` ┌─res─────────────────┐ │ [19393,19790,19805] │ @@ -374,12 +480,10 @@ FROM ontime Возвращает Float64. В случае, когда `n <= 1`, возвращается +∞. - ## covarPop(x, y) Вычисляет величину `Σ((x - x̅)(y - y̅)) / n`. - ## corr(x, y) Вычисляет коэффициент корреляции Пирсона: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`. From bcab45b3fc23f20f68dab6c1a950d11b6d389d8f Mon Sep 17 00:00:00 2001 From: BayoNet Date: Thu, 1 Nov 2018 16:28:45 +0300 Subject: [PATCH 42/79] Partial sync between ru and en version (#3464) * Update of english version of descriprion of the table function `file`. * New syntax for ReplacingMergeTree. Some improvements in text. * Significantly change article about SummingMergeTree. Article is restructured, text is changed in many places of the document. New syntax for table creation is described. * Descriptions of AggregateFunction and AggregatingMergeTree are updated. Russian version. * New syntax for new syntax of CREATE TABLE * Added english docs on Aggregating, Replacing and SummingMergeTree. * CollapsingMergeTree docs. English version. * 1. Update of CollapsingMergeTree. 2. Minor changes in markup * Update aggregatefunction.md * Update aggregatefunction.md * Update aggregatefunction.md * Update aggregatingmergetree.md * GraphiteMergeTree docs update. New syntax for creation of Replicated* tables. Minor changes in *MergeTree tables creation syntax. * Markup fix * Markup and language fixes * Clarification in the CollapsingMergeTree article * DOCAPI-4821. Sync between ru and en versions of docs. * Fixed the ambiguity in geo functions description. * Example of JOIN in ru docs * Deleted misinforming example. --- docs/en/operations/configuration_files.md | 2 +- .../en/operations/server_settings/settings.md | 1 + .../operations/settings/query_complexity.md | 4 ++- .../en/operations/table_engines/dictionary.md | 2 +- docs/en/operations/table_engines/mergetree.md | 2 +- .../dicts/external_dicts_dict_lifetime.md | 6 ++--- docs/en/query_language/functions/geo.md | 11 ++++---- .../functions/string_search_functions.md | 3 ++- .../functions/type_conversion_functions.md | 4 +++ docs/en/query_language/operators.md | 8 ++++-- docs/ru/operations/table_engines/merge.md | 27 +++++++++++++++++++ docs/ru/operations/tips.md | 2 +- docs/ru/query_language/functions/geo.md | 11 ++++---- docs/ru/query_language/operators.md | 6 ++--- 14 files changed, 63 insertions(+), 26 deletions(-) diff --git a/docs/en/operations/configuration_files.md b/docs/en/operations/configuration_files.md index a7cdfb124ee..799b9e5de75 100644 --- a/docs/en/operations/configuration_files.md +++ b/docs/en/operations/configuration_files.md @@ -14,7 +14,7 @@ If `replace` is specified, it replaces the entire element with the specified one If `remove` is specified, it deletes the element. -The config can also define "substitutions". If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](server_settings/settings.md#server_settings-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros]() server_settings/settings.md#server_settings-macros)). +The config can also define "substitutions". If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](server_settings/settings.md#server_settings-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](server_settings/settings.md#server_settings-macros)). Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 8e10969ed6b..42dce70b948 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -532,6 +532,7 @@ Columns in the log: - database_name – The name of the database. - table_name – Name of the table. - part_name – Name of the data part. +- partition_id – The identifier of the partition. - size_in_bytes – Size of the data part in bytes. - merged_from – An array of names of data parts that make up the merge (also used when downloading a merged part). - merge_time_ms – Time spent on the merge. diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index 9e49dc58ca3..67418128ffa 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -2,7 +2,9 @@ Restrictions on query complexity are part of the settings. They are used in order to provide safer execution from the user interface. -Almost all the restrictions only apply to SELECTs.For distributed query processing, restrictions are applied on each server separately. +Almost all the restrictions only apply to `SELECT`. For distributed query processing, restrictions are applied on each server separately. + +ClickHouse checks the restrictions for data parts, not for each row. It means that you can exceed the value of restriction with a size of the data part. Restrictions on the "maximum amount of something" can take the value 0, which means "unrestricted". Most restrictions also have an 'overflow_mode' setting, meaning what to do when the limit is exceeded. diff --git a/docs/en/operations/table_engines/dictionary.md b/docs/en/operations/table_engines/dictionary.md index eed7f7afaf4..067611605ba 100644 --- a/docs/en/operations/table_engines/dictionary.md +++ b/docs/en/operations/table_engines/dictionary.md @@ -2,7 +2,7 @@ # Dictionary -The `Dictionary` engine displays the dictionary data as a ClickHouse table. +The `Dictionary` engine displays the [dictionary](../../query_language/dicts/external_dicts.md#dicts-external_dicts) data as a ClickHouse table. As an example, consider a dictionary of `products` with the following configuration: diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index 7b4ecd51fe7..ac260313389 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -135,7 +135,7 @@ If the data query specifies: - `CounterID in ('a', 'h')`, the server reads the data in the ranges of marks `[0, 3)` and `[6, 8)`. - `CounterID IN ('a', 'h') AND Date = 3`, the server reads the data in the ranges of marks `[1, 3)` and `[7, 8)`. -- `Date = 3`, the server reads the data in the range of marks `[1, 10)`. +- `Date = 3`, the server reads the data in the range of marks `[1, 10]`. The examples above show that it is always more effective to use an index than a full scan. diff --git a/docs/en/query_language/dicts/external_dicts_dict_lifetime.md b/docs/en/query_language/dicts/external_dicts_dict_lifetime.md index c04829fce10..7ce661ab5a8 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_lifetime.md +++ b/docs/en/query_language/dicts/external_dicts_dict_lifetime.md @@ -35,13 +35,13 @@ Example of settings: When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [ source](external_dicts_dict_sources.md#dicts-external_dicts_dict_sources): -> - For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. +- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. - For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query. - Dictionaries from other sources are updated every time by default. -For MySQL (InnoDB) and ODBC sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: +For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: -> - The dictionary table must have a field that always changes when the source data is updated. +- The dictionary table must have a field that always changes when the source data is updated. - The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `` field in the settings for the [source](external_dicts_dict_sources.md#dicts-external_dicts_dict_sources). Example of settings: diff --git a/docs/en/query_language/functions/geo.md b/docs/en/query_language/functions/geo.md index 05725b95aed..0620dbbe776 100644 --- a/docs/en/query_language/functions/geo.md +++ b/docs/en/query_language/functions/geo.md @@ -10,10 +10,10 @@ greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg) **Input parameters** -- `lon1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`. -- `lat1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`. -- `lon2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`. -- `lat2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`. +- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`. +- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`. +- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`. +- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`. Positive values correspond to North latitude and East longitude, and negative values correspond to South latitude and West longitude. @@ -45,8 +45,7 @@ pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ) **Input parameters** -- `x` — Latitude of the point. -- `y` — Longitude of the point. +- `x, y` — Coordinates of a point on the plane. - `xᵢ, yᵢ` — Coordinates of the center of the `i`-th ellipsis. - `aᵢ, bᵢ` — Axes of the `i`-th ellipsis in meters. diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md index 182152e36ed..22af3c8550d 100644 --- a/docs/en/query_language/functions/string_search_functions.md +++ b/docs/en/query_language/functions/string_search_functions.md @@ -18,7 +18,8 @@ For a case-insensitive search, use the function `positionCaseInsensitiveUTF8`. ## match(haystack, pattern) -Checks whether the string matches the 'pattern' regular expression. A re2 regular expression. +Checks whether the string matches the `pattern` regular expression. A `re2` regular expression. The [syntax](https://github.com/google/re2/wiki/Syntax) of the `re2` regular expressions is more limited than the syntax of the Perl regular expressions. + Returns 0 if it doesn't match, or 1 if it matches. Note that the backslash symbol (`\`) is used for escaping in the regular expression. The same symbol is used for escaping in string literals. So in order to escape the symbol in a regular expression, you must write two backslashes (\\) in a string literal. diff --git a/docs/en/query_language/functions/type_conversion_functions.md b/docs/en/query_language/functions/type_conversion_functions.md index c4b4099d7e6..5fdd1a1a45b 100644 --- a/docs/en/query_language/functions/type_conversion_functions.md +++ b/docs/en/query_language/functions/type_conversion_functions.md @@ -12,6 +12,10 @@ ## toDate, toDateTime +## toDecimal32(value, S), toDecimal64(value, S), toDecimal128(value, S) + +Converts `value` to [Decimal](../../data_types/decimal.md#data_type-decimal) of precision `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. + ## toString Functions for converting between numbers, strings (but not fixed strings), dates, and dates with times. diff --git a/docs/en/query_language/operators.md b/docs/en/query_language/operators.md index 613dc50a58b..3f8e0a3f6fe 100644 --- a/docs/en/query_language/operators.md +++ b/docs/en/query_language/operators.md @@ -91,11 +91,15 @@ The conditional operator calculates the values of b and c, then checks whether c CASE [x] WHEN a THEN b [WHEN ... THEN ...] - ELSE c + [ELSE c] END ``` -If "x" is specified, then transform(x, \[a, ...\], \[b, ...\], c). Otherwise – multiIf(a, b, ..., c). +If `x` is specified, then `transform(x, [a, ...], [b, ...], c)` function is used. Otherwise – `multiIf(a, b, ..., c)`. + +If there is no `ELSE c` clause in the expression, the default value is `NULL`. + +The `transform` function does not work with `NULL`. ## Concatenation Operator diff --git a/docs/ru/operations/table_engines/merge.md b/docs/ru/operations/table_engines/merge.md index 3b2174d52fa..1b4e4d6a984 100644 --- a/docs/ru/operations/table_engines/merge.md +++ b/docs/ru/operations/table_engines/merge.md @@ -24,6 +24,33 @@ Merge(hits, '^WatchLog') Типичный способ использования движка `Merge` — работа с большим количеством таблиц типа `TinyLog`, как с одной. +Пример 2: + +Пусть есть старая таблица `WatchLog_old`. Необходимо изменить партиционирование без перемещения данных в новую таблицу `WatchLog_new`. При этом в выборке должны участвовать данные обеих таблиц. + +``` +CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64) +ENGINE=MergeTree(date, (UserId, EventType), 8192); +INSERT INTO WatchLog_old VALUES ('2018-01-01', 1, 'hit', 3); + +CREATE TABLE WatchLog_new(date Date, UserId Int64, EventType String, Cnt UInt64) +ENGINE=MergeTree PARTITION BY date ORDER BY (UserId, EventType) SETTINGS index_granularity=8192; +INSERT INTO WatchLog_new VALUES ('2018-01-02', 2, 'hit', 3); + +CREATE TABLE WatchLog as WatchLog_old ENGINE=Merge(currentDatabase(), '^WatchLog'); + +SELECT * +FROM WatchLog + +┌───────date─┬─UserId─┬─EventType─┬─Cnt─┐ +│ 2018-01-01 │ 1 │ hit │ 3 │ +└────────────┴────────┴───────────┴─────┘ +┌───────date─┬─UserId─┬─EventType─┬─Cnt─┐ +│ 2018-01-02 │ 2 │ hit │ 3 │ +└────────────┴────────┴───────────┴─────┘ + +``` + ## Виртуальные столбцы Виртуальные столбцы — столбцы, предоставляемые движком таблиц независимо от определения таблицы. То есть, такие столбцы не указываются в `CREATE TABLE`, но доступны для `SELECT`. diff --git a/docs/ru/operations/tips.md b/docs/ru/operations/tips.md index af5ab00c31a..4a6d8787a36 100644 --- a/docs/ru/operations/tips.md +++ b/docs/ru/operations/tips.md @@ -21,7 +21,7 @@ Turbo-Boost крайне не рекомендуется отключать. П Нужно всегда использовать `performance` scaling governor. `ondemand` scaling governor работает намного хуже при постоянно высоком спросе. ```bash -sudo echo 'performance' | tee /sys/devices/system/cpu/cpu\*/cpufreq/scaling_governor +sudo echo 'performance' | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor ``` ## Ограничение CPU diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index f1b460a4c41..0b33bf5dab9 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -10,10 +10,10 @@ greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg) **Входные параметры** -- `lon1Deg` — широта первой точки в градусах. Диапазон — `[-90°, 90°]`. -- `lat1Deg` — долгота первой точки в градусах. Диапазон — `[-180°, 180°]`. -- `lon2Deg` — широта второй точки в градусах. Диапазон — `[-90°, 90°]`. -- `lat2Deg` — долгота второй точки в градусах. Диапазон — `[-180°, 180°]`. +- `lon1Deg` — долгота первой точки в градусах. Диапазон — `[-180°, 180°]`. +- `lat1Deg` — широта первой точки в градусах. Диапазон — `[-90°, 90°]`. +- `lon2Deg` — долгота второй точки в градусах. Диапазон — `[-180°, 180°]`. +- `lat2Deg` — широта второй точки в градусах. Диапазон — `[-90°, 90°]`. Положительные значения соответствуют северной широте и восточной долготе, отрицательные — южной широте и западной долготе. @@ -45,8 +45,7 @@ pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ) **Входные параметры** -- `x` — широта точки. -- `y` — долгота точки. +- `x, y` — координаты точки на плоскости. - `xᵢ, yᵢ` — координаты центра `i`-го эллипса. - `aᵢ, bᵢ` — полуоси `i`-го эллипса в метрах. diff --git a/docs/ru/query_language/operators.md b/docs/ru/query_language/operators.md index 3814e1699d4..9d82d07feed 100644 --- a/docs/ru/query_language/operators.md +++ b/docs/ru/query_language/operators.md @@ -95,9 +95,9 @@ CASE [x] END ``` -В случае указания x - функция transform(x, \[a, ...\], \[b, ...\], c). Иначе - multiIf(a, b, ..., c). -При отсутствии секции `ELSE c`, значением по умолчанию будет NULL. -P.S. Функция transform не умеет работать с NULL. +В случае указания `x` - функция `transform(x, [a, ...], [b, ...], c)`. Иначе — `multiIf(a, b, ..., c)`. +При отсутствии секции `ELSE c`, значением по умолчанию будет `NULL`. +P.S. Функция `transform` не умеет работать с `NULL`. ## Оператор склеивания строк From 10e604428a7103661143f40cf05a0173afee7b55 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 1 Nov 2018 16:47:52 +0300 Subject: [PATCH 43/79] CLICKHOUSE-4098 Correct /etc/init.d/clickhouse-server status exit code --- debian/clickhouse-server.init | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index 3c4b65a21ba..92978c72257 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -328,12 +328,14 @@ status() { if is_running; then echo "$PROGRAM service is running" + exit 0 else if is_cron_disabled; then echo "$PROGRAM service is stopped"; else echo "$PROGRAM: process unexpectedly terminated" fi + exit 3 fi } @@ -342,7 +344,6 @@ status() case "$1" in status) status - exit 0 ;; esac From a7437b93a97a6da47ba5c0658dff032cf877fe55 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 1 Nov 2018 17:05:37 +0300 Subject: [PATCH 44/79] Commited StorageSystemContributors.generated.cpp (#3510) * CLICKHOUSE-4085 system.contributors * fi * Fix random * Commited StorageSystemContributors.generated.cpp * fix * Update CMakeLists.txt --- .gitignore | 2 - dbms/src/Storages/System/CMakeLists.txt | 8 +- .../StorageSystemContributors.generated.cpp | 262 ++++++++++++++++++ release_lib.sh | 3 +- 4 files changed, 269 insertions(+), 6 deletions(-) create mode 100644 dbms/src/Storages/System/StorageSystemContributors.generated.cpp diff --git a/.gitignore b/.gitignore index bc598cf0d0d..8359edbabde 100644 --- a/.gitignore +++ b/.gitignore @@ -9,8 +9,6 @@ # auto generated files *.logrt -dbms/src/Storages/System/StorageSystemContributors.generated.cpp - /build /build_* /docs/build diff --git a/dbms/src/Storages/System/CMakeLists.txt b/dbms/src/Storages/System/CMakeLists.txt index ed12cf6f78c..f445a3e1e29 100644 --- a/dbms/src/Storages/System/CMakeLists.txt +++ b/dbms/src/Storages/System/CMakeLists.txt @@ -1,6 +1,8 @@ -if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.generated.cpp) - execute_process(COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh) -endif() +# The file StorageSystemContributors.cpp is generated at release time and committed to the source tree. +# You can also regenerate it manually this way: +#if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.generated.cpp) +# execute_process(COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh) +#endif() set (CONFIG_BUILD ${CMAKE_CURRENT_BINARY_DIR}/StorageSystemBuildOptions.generated.cpp) get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPERTY COMPILE_DEFINITIONS) diff --git a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp new file mode 100644 index 00000000000..a92e0ccfafb --- /dev/null +++ b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp @@ -0,0 +1,262 @@ +// autogenerated by ./StorageSystemContributors.sh +const char * auto_contributors[] { +"Alberto", +"Aleksandra (Ася)", +"Alex Bocharov", +"Alex Krash", +"Alex Zatelepin", +"Alexander Avdonkin", +"Alexander Krasheninnikov", +"Alexander Lukin", +"Alexander Makarov", +"Alexander Marshalov", +"Alexander Millin", +"Alexander Prudaev", +"Alexander Sapin", +"Alexandr Krasheninnikov", +"Alexandr Orlov", +"Alexei Averchenko", +"Alexey Arno", +"Alexey Milovidov", +"Alexey Tronov", +"Alexey Vasiliev", +"Alexey Zatelepin", +"Alexsey Shestakov", +"Aliaksandr Pliutau", +"Amos Bird", +"Amy Krishnevsky", +"Anastasiya Tsarkova", +"AndreevDm", +"Andrew Grigorev", +"Andrey", +"Andrey Dudin", +"Andrey M", +"Andrey Mironov", +"Andrey Urusov", +"Anton Popov", +"Anton Tihonov", +"Anton Tikhonov", +"Arsen Hakobyan", +"Artem Andreenko", +"Artem Zuikov", +"Artemeey", +"Artemkin Pavel", +"Arthur Tokarchuk", +"Atri Sharma", +"Babacar Diassé", +"BayoNet", +"BlahGeek", +"Bogdan", +"Bogdan Voronin", +"Bolinov", +"Brett Hoerner", +"Bulat Gaifullin", +"Chen Yufei", +"Ciprian Hacman", +"Constantin S. Pan", +"CurtizJ", +"Daniel Bershatsky", +"Denis Burlaka", +"Denis Zhuravlev", +"Derek Perkins", +"Dmitry Bilunov", +"Dmitry Galuza", +"Dmitry Luhtionov", +"Dmitry Moskowski", +"Dmitry S..ky / skype: dvska-at-skype", +"Eugene Klimov", +"Eugene Konkov", +"Evgeniy Gatov", +"Evgeniy Udodov", +"Evgeny Konkov", +"Flowyi", +"Fruit of Eden", +"George", +"George3d6", +"Guillaume Tassery", +"Hamoon", +"Hiroaki Nakamura", +"Igor", +"Igor Hatarist", +"Igor Strykhar", +"Ilya", +"Ilya Breev", +"Ilya Khomutov", +"Ilya Korolev", +"Ilya Shipitsin", +"Ivan Babrou", +"Ivan Blinkov", +"Ivan He", +"Ivan Zhukov", +"Jason", +"Jean Baptiste Favre", +"Jonatas Freitas", +"Keiji Yoshida", +"Kirill Malev", +"Kirill Shvakov", +"KochetovNicolai", +"Konstantin Grabar", +"Konstantin Lebedev", +"Konstantin Podshumok", +"Leonardo Cecchi", +"Lev Borodin", +"Liu Cong", +"LiuCong", +"LiuYangkuan", +"Luis Bosque", +"Maks Skorokhod", +"Maksim", +"Marek Vavrusa", +"Marek Vavruša", +"Marek Vavruša", +"Marsel Arduanov", +"Max Akhmedov", +"Max Vetrov", +"Maxim Fridental", +"Maxim Khrisanfov", +"Maxim Nikulin", +"MaximAL", +"Michael Furmur", +"Michael Kolupaev", +"Michael Razuvaev", +"Mikhail Filimonov", +"Mikhail Salosin", +"Mikhail Surin", +"Mikhail f. Shiryaev", +"Milad Arabi", +"Narek Galstyan", +"Nicolae Vartolomei", +"Nikita Vasilev", +"Nikolai Kochetov", +"Nikolay Kirsh", +"Nikolay Vasiliev", +"Nikolay Volosatov", +"Okada Haruki", +"Oleg Komarov", +"Oleg Obleukhov", +"Olga Khvostikova", +"Orivej Desh", +"Pavel", +"Pavel Kartaviy", +"Pavel Kartavyy", +"Pavel Litvinenko", +"Pavel Patrin", +"Pavel Yakunin", +"Pawel Rog", +"Ravengg", +"Reto Kromer", +"Roman Lipovsky", +"Roman Nozdrin", +"Roman Peshkurov", +"Roman Tsisyk", +"SaltTan", +"Sergei Tsetlin (rekub)", +"Sergey Elantsev", +"Sergey Fedorov", +"Sergey Lazarev", +"Sergey Magidovich", +"Sergey V. Galtsev", +"Sergey Zaikin", +"Silviu Caragea", +"Simon Podlipsky", +"Sjoerd Mulder", +"Snow", +"Stanislav Pavlovichev", +"Stas Pavlovichev", +"SuperBot", +"Tangaev", +"The-Alchemist", +"Tobias Adamson", +"Tsarkova Anastasia", +"Vadim", +"Vadim Plakhtinskiy", +"Vadim Skipin", +"VadimPE", +"Valera Ryaboshapko", +"Vasily Nemkov", +"Vasily Okunev", +"Veloman Yunkan", +"Veniamin Gvozdikov", +"Victor Tarnavsky", +"Vitaliy Lyudvichenko", +"Vitaly Samigullin", +"Vlad Arkhipov", +"Vladimir Chebotarev", +"Vladimir Kolobaev", +"Vladimir Kozbin", +"Vladimir Smirnov", +"Vladislav Rassokhin", +"Vojtech Splichal", +"Vsevolod Orlov", +"Vyacheslav Alipov", +"William Shallum", +"Winter Zhang", +"Yegor Andreenko", +"Yuri Dyachenko", +"Yurii Vlasenko", +"Yury Karpovich", +"Yury Stankevich", +"abyss7", +"alesapin", +"alexey-milovidov", +"ap11", +"aprudaev", +"artpaul", +"avsharapov", +"blazerer", +"bseng", +"cekc", +"champtar", +"chenxing-xc", +"chenxing.xc", +"chertus", +"daoready", +"decaseal", +"egatov", +"elBroom", +"ezhaka", +"f1yegor", +"felixoid", +"filimonov", +"flow", +"glockbender", +"hotid", +"igor", +"ivanzhukov", +"javi", +"javi santana", +"kmeaw", +"ks1322", +"kshvakov", +"leozhang", +"liuyimin", +"lomberts", +"mfridental", +"morty", +"ns-vasilev", +"orantius", +"peshkurov", +"proller", +"pyos", +"robot-clickhouse", +"robot-metrika-test", +"root", +"santaux", +"serebrserg", +"shedx", +"stavrolia", +"sundy-li", +"sundyli", +"topvisor", +"velom", +"zamulla", +"zhang2014", +"Георгий Кондратьев", +"Дмитрий Канатников", +"Иванов Евгений", +"Павел Литвиненко", +"Смитюх Вячеслав", +"Сундуков Алексей", +"张健", +"谢磊", +nullptr }; diff --git a/release_lib.sh b/release_lib.sh index 244f3619001..328b91b8f5b 100644 --- a/release_lib.sh +++ b/release_lib.sh @@ -96,7 +96,8 @@ function gen_revision_author { gen_changelog "$VERSION_STRING" "" "$AUTHOR" "" gen_dockerfiles "$VERSION_STRING" - git commit -m "$auto_message [$VERSION_STRING] [$VERSION_REVISION]" dbms/cmake/version.cmake debian/changelog docker/*/Dockerfile + . dbms/src/Storages/System/StorageSystemContributors.sh + git commit -m "$auto_message [$VERSION_STRING] [$VERSION_REVISION]" dbms/cmake/version.cmake debian/changelog docker/*/Dockerfile dbms/src/Storages/System/StorageSystemContributors.generated.cpp git push echo "Generated version: ${VERSION_STRING}, revision: ${VERSION_REVISION}." From 71403c3ff834c8cc72969d81e346cbfb6339b4b8 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 1 Nov 2018 17:07:14 +0300 Subject: [PATCH 45/79] StorageSystemContributors.sh debug (#3491) * Sql fuzzy test * wip * wip * wip * wip * wip * Build fixes * better * bugs * wip * fix * Fix test * build fixes * better * wip * fix test * StorageSystemContributors.sh debug * debug * wip * more * Fix jemalloc include * more --- dbms/CMakeLists.txt | 4 ++++ dbms/src/Storages/System/StorageSystemContributors.sh | 8 +++++++- dbms/tests/queries/bugs/fuzzy.sql | 7 +++++++ libs/libcommon/cmake/find_jemalloc.cmake | 5 +++-- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 91171eea685..60dfa5b8cf2 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -265,6 +265,10 @@ if (NOT USE_INTERNAL_ZSTD_LIBRARY) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${ZSTD_INCLUDE_DIR}) endif () +if (USE_JEMALLOC) + target_include_directories (dbms SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # used in Interpreters/AsynchronousMetrics.cpp +endif () + target_include_directories (dbms PUBLIC ${DBMS_INCLUDE_DIR}) target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR}) diff --git a/dbms/src/Storages/System/StorageSystemContributors.sh b/dbms/src/Storages/System/StorageSystemContributors.sh index 44b4730a6f4..58895d1b781 100755 --- a/dbms/src/Storages/System/StorageSystemContributors.sh +++ b/dbms/src/Storages/System/StorageSystemContributors.sh @@ -1,13 +1,17 @@ #!/usr/bin/env bash +set -x + CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CONTRIBUTORS_FILE=${CONTRIBUTORS_FILE=$CUR_DIR/StorageSystemContributors.generated.cpp} -git shortlog --summary | perl -lnE 's/^\s+\d+\s+(.+)/"$1",/; next unless $1; say $_' > $CONTRIBUTORS_FILE.tmp +git shortlog --summary | perl -lnE 's/^\s+\d+\s+(.+)/"$1",/; next unless $1; say $_' > $CONTRIBUTORS_FILE.tmp # If git history not available - dont make target file if [ ! -s $CONTRIBUTORS_FILE.tmp ]; then + echo Empty result of git shortlog + git status exit fi @@ -16,4 +20,6 @@ echo "const char * auto_contributors[] {" >> $CONTRIBUTORS_FILE cat $CONTRIBUTORS_FILE.tmp >> $CONTRIBUTORS_FILE echo "nullptr };" >> $CONTRIBUTORS_FILE +echo "Collected `cat $CONTRIBUTORS_FILE.tmp | wc -l` contributors." rm $CONTRIBUTORS_FILE.tmp + diff --git a/dbms/tests/queries/bugs/fuzzy.sql b/dbms/tests/queries/bugs/fuzzy.sql index 52008e27ca7..a3c48df935c 100644 --- a/dbms/tests/queries/bugs/fuzzy.sql +++ b/dbms/tests/queries/bugs/fuzzy.sql @@ -7,3 +7,10 @@ SELECT extractURLParameter('ZiqSZeh?', '\0') SELECT globalNotIn(['"wh'], [NULL]); SELECT globalIn([''], [NULL]) SELECT ( SELECT toDecimal128([], rowNumberInBlock()) ) , lcm('', [[(CAST(('>A') AS String))]]); +SELECT truncate(895, -16); +SELECT (CAST((lowerUTF8('a7\xwK>-')) AS String)), [6935]; +SELECT upperUTF8(sipHash128('\0')), [], ['xD2jG']; +SELECT arrayEnumerateUniq(anyHeavy([]), []); +SELECT notIn([['']], [[NULL]]); +SELECT subtractDays((CAST((-5263074.47) AS DateTime)), -737895); +SELECT arrayEnumerateDense([], [sequenceCount(NULL)]); diff --git a/libs/libcommon/cmake/find_jemalloc.cmake b/libs/libcommon/cmake/find_jemalloc.cmake index f974688c5be..a8ef4289f5f 100644 --- a/libs/libcommon/cmake/find_jemalloc.cmake +++ b/libs/libcommon/cmake/find_jemalloc.cmake @@ -23,8 +23,9 @@ if (ENABLE_JEMALLOC) find_package (JeMalloc) endif () - if (NOT JEMALLOC_LIBRARIES AND NOT MISSING_INTERNAL_JEMALLOC_LIBRARY) + if ((NOT JEMALLOC_LIBRARIES OR NOT JEMALLOC_INCLUDE_DIR) AND NOT MISSING_INTERNAL_JEMALLOC_LIBRARY) set (JEMALLOC_LIBRARIES "jemalloc") + set (JEMALLOC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc/include") set (USE_INTERNAL_JEMALLOC_LIBRARY 1) endif () @@ -38,5 +39,5 @@ if (ENABLE_JEMALLOC) message (FATAL_ERROR "ENABLE_JEMALLOC is set to true, but it cannot be used with sanitizers") endif () - message (STATUS "Using jemalloc=${USE_JEMALLOC}: ${JEMALLOC_LIBRARIES}") + message (STATUS "Using jemalloc=${USE_JEMALLOC}: ${JEMALLOC_INCLUDE_DIR} : ${JEMALLOC_LIBRARIES}") endif () From 045b5d5cd8819118f0d891e72aebf750834b80f4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 1 Nov 2018 16:36:13 +0300 Subject: [PATCH 46/79] Fixed LowCardinality(Nullable()) (de)serialization with arena. #3227 #3256 #3212 --- dbms/src/Columns/ColumnUnique.h | 40 +++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/dbms/src/Columns/ColumnUnique.h b/dbms/src/Columns/ColumnUnique.h index c732973c31f..b60b6446155 100644 --- a/dbms/src/Columns/ColumnUnique.h +++ b/dbms/src/Columns/ColumnUnique.h @@ -62,10 +62,7 @@ public: UInt64 getUInt(size_t n) const override { return getNestedColumn()->getUInt(n); } Int64 getInt(size_t n) const override { return getNestedColumn()->getInt(n); } bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override - { - return column_holder->serializeValueIntoArena(n, arena, begin); - } + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; void updateHashWithValue(size_t n, SipHash & hash) const override { return getNestedColumn()->updateHashWithValue(n, hash); @@ -298,9 +295,44 @@ size_t ColumnUnique::uniqueInsertDataWithTerminatingZero(const char return static_cast(position); } +template +StringRef ColumnUnique::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +{ + if (is_nullable) + { + const UInt8 null_flag = 1; + const UInt8 not_null_flag = 0; + + auto pos = arena.allocContinue(sizeof(null_flag), begin); + auto & flag = (n == getNullValueIndex() ? null_flag : not_null_flag); + memcpy(pos, &flag, sizeof(flag)); + + size_t nested_size = 0; + + if (n == getNullValueIndex()) + nested_size = column_holder->serializeValueIntoArena(n, arena, begin).size; + + return StringRef(pos, sizeof(null_flag) + nested_size); + } + + return column_holder->serializeValueIntoArena(n, arena, begin); +} + template size_t ColumnUnique::uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos) { + if (is_nullable) + { + UInt8 val = *reinterpret_cast(pos); + pos += sizeof(val); + + if (val) + { + new_pos = pos; + return getNullValueIndex(); + } + } + auto column = getRawColumnPtr(); size_t prev_size = column->size(); new_pos = column->deserializeAndInsertFromArena(pos); From b0ac72922827e433268fa7ed5337666f44f6e5f6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 1 Nov 2018 17:56:37 +0300 Subject: [PATCH 47/79] CLICKHOUSE-4094: Add ability to kill pending queries --- .../InterpreterKillQueryQuery.cpp | 13 +++++++-- dbms/src/Interpreters/ProcessList.cpp | 5 ++-- dbms/src/Interpreters/ProcessList.h | 2 ++ dbms/src/Interpreters/executeQuery.cpp | 11 ++++++-- .../0_stateless/00417_kill_query.reference | 1 + .../queries/0_stateless/00417_kill_query.sh | 27 +++++++++++++++++++ 6 files changed, 53 insertions(+), 6 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp index f0add31dc38..5a9d423dae6 100644 --- a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes { extern const int READONLY; extern const int LOGICAL_ERROR; + extern const int CANNOT_KILL; } @@ -138,13 +139,16 @@ public: auto code = process_list.sendCancelToQuery(curr_process.query_id, curr_process.user, true); - if (code != CancellationCode::QueryIsNotInitializedYet && code != CancellationCode::CancelSent) + /// Raise exception if this query is immortal, user have to know + if (code == CancellationCode::CancelCannotBeSent) + throw Exception("Can't kill query '" + curr_process.query_id + "' it consits of unkillable stages", ErrorCodes::CANNOT_KILL); + else if (code != CancellationCode::QueryIsNotInitializedYet && code != CancellationCode::CancelSent) { curr_process.processed = true; insertResultRow(curr_process.source_num, code, processes_block, res_sample_block, columns); ++num_processed_queries; } - /// Wait if QueryIsNotInitializedYet or CancelSent + /// Wait if CancelSent } /// KILL QUERY could be killed also @@ -194,6 +198,11 @@ BlockIO InterpreterKillQueryQuery::execute() for (const auto & query_desc : queries_to_stop) { auto code = (query.test) ? CancellationCode::Unknown : process_list.sendCancelToQuery(query_desc.query_id, query_desc.user, true); + + /// Raise exception if this query is immortal, user have to know + if (code == CancellationCode::CancelCannotBeSent) + throw Exception("Can't kill query '" + query_desc.query_id + "' it consits of unkillable stages", ErrorCodes::CANNOT_KILL); + insertResultRow(query_desc.source_num, code, processes_block, header, res_columns); } diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index 557a006663d..e0233e830ef 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -396,8 +396,9 @@ ProcessList::CancellationCode ProcessList::sendCancelToQuery(const String & curr } return CancellationCode::CancelCannotBeSent; } - - return CancellationCode::QueryIsNotInitializedYet; + /// Query is not even started + elem->is_killed.store(true); + return CancellationCode::CancelSent; } diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h index 87e43162202..d96209d885f 100644 --- a/dbms/src/Interpreters/ProcessList.h +++ b/dbms/src/Interpreters/ProcessList.h @@ -191,6 +191,8 @@ public: /// Get query in/out pointers from BlockIO bool tryGetQueryStreams(BlockInputStreamPtr & in, BlockOutputStreamPtr & out) const; + + bool isKilled() const { return is_killed; } }; diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index d949ae3b932..5c4f082eefd 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -33,6 +33,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int QUERY_IS_TOO_LARGE; extern const int INTO_OUTFILE_NOT_ALLOWED; + extern const int QUERY_WAS_CANCELLED; } @@ -204,9 +205,15 @@ static std::tuple executeQueryImpl( auto interpreter = InterpreterFactory::get(ast, context, stage); res = interpreter->execute(); - /// Delayed initialization of query streams (required for KILL QUERY purposes) if (process_list_entry) - (*process_list_entry)->setQueryStreams(res); + { + /// Query was killed before execution + if ((*process_list_entry)->isKilled()) + throw Exception("Query '" + (*process_list_entry)->getInfo().client_info.current_query_id + "' is killed in pending state", + ErrorCodes::QUERY_WAS_CANCELLED); + else + (*process_list_entry)->setQueryStreams(res); + } /// Hold element of process list till end of query execution. res.process_list_entry = process_list_entry; diff --git a/dbms/tests/queries/0_stateless/00417_kill_query.reference b/dbms/tests/queries/0_stateless/00417_kill_query.reference index 7e89d9674db..844ee1838ca 100644 --- a/dbms/tests/queries/0_stateless/00417_kill_query.reference +++ b/dbms/tests/queries/0_stateless/00417_kill_query.reference @@ -1,2 +1,3 @@ SELECT sleep(1) FROM system.numbers LIMIT 4 SELECT sleep(1) FROM system.numbers LIMIT 5 +0 diff --git a/dbms/tests/queries/0_stateless/00417_kill_query.sh b/dbms/tests/queries/0_stateless/00417_kill_query.sh index 04bce8df014..332d0e4591a 100755 --- a/dbms/tests/queries/0_stateless/00417_kill_query.sh +++ b/dbms/tests/queries/0_stateless/00417_kill_query.sh @@ -19,3 +19,30 @@ $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 ASYNC" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 FORMAT TabSeparated" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 SYNC FORMAT TabSeparated" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 1 TEST" &>/dev/null + + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.cannot_kill_query" +$CLICKHOUSE_CLIENT -q "CREATE TABLE test.cannot_kill_query (x UInt64) ENGINE = MergeTree ORDER BY x" &> /dev/null +$CLICKHOUSE_CLIENT -q "INSERT INTO test.cannot_kill_query SELECT * FROM numbers(10000000)" &> /dev/null + +query_for_pending="SELECT count() FROM test.cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1" +$CLICKHOUSE_CLIENT -q "$query_for_pending" &>/dev/null & + +$CLICKHOUSE_CLIENT -q "ALTER TABLE test.cannot_kill_query MODIFY COLUMN x UInt64" &>/dev/null & + +query_to_kill="SELECT sum(1) FROM test.cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1" +$CLICKHOUSE_CLIENT -q "$query_to_kill" &>/dev/null & + +sleep 1 # just to be sure that 'KILL ...' will be executed after 'SELECT ... WHERE NOT ignore(sleep(1))' + +$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query='$query_to_kill'" &>/dev/null & + +# 'SELECT ... WHERE NOT ignore(sleep(1))' is executing much longer than 3 secs, so this sleep doesn't fail test logic +# but guarantees to eliminate flaps, when SELECT from system.process is executed before KILL is completed +sleep 3 + +$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes where query='$query_to_kill'" + +$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query='$query_for_pending'" &>/dev/null & # kill pending query + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.cannot_kill_query" &>/dev/null From dc688ef5e1c10b241d455c691c051503b758d525 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 1 Nov 2018 17:08:15 +0300 Subject: [PATCH 48/79] Fix nullable comparsion for LowCardinality(Nullable()). #3227 #3256 #3212 --- dbms/src/Columns/ColumnUnique.h | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/dbms/src/Columns/ColumnUnique.h b/dbms/src/Columns/ColumnUnique.h index b60b6446155..be5d71c3e29 100644 --- a/dbms/src/Columns/ColumnUnique.h +++ b/dbms/src/Columns/ColumnUnique.h @@ -68,11 +68,7 @@ public: return getNestedColumn()->updateHashWithValue(n, hash); } - int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override - { - auto & column_unique = static_cast(rhs); - return getNestedColumn()->compareAt(n, m, *column_unique.getNestedColumn(), nan_direction_hint); - } + int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); } bool valuesHaveFixedSize() const override { return column_holder->valuesHaveFixedSize(); } @@ -350,6 +346,28 @@ size_t ColumnUnique::uniqueDeserializeAndInsertFromArena(const char return static_cast(index_pos); } +template +int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +{ + if (is_nullable) + { + /// See ColumnNullable::compareAt + bool lval_is_null = n == getNullValueIndex(); + bool rval_is_null = m == getNullValueIndex(); + + if (unlikely(lval_is_null || rval_is_null)) + { + if (lval_is_null && rval_is_null) + return 0; + else + return lval_is_null ? nan_direction_hint : -nan_direction_hint; + } + } + + auto & column_unique = static_cast(rhs); + return getNestedColumn()->compareAt(n, m, *column_unique.getNestedColumn(), nan_direction_hint); +} + template static void checkIndexes(const ColumnVector & indexes, size_t max_dictionary_size) { From f2c64dc5dcdcccf55f67ff839ad2dcb8d3acb816 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 1 Nov 2018 17:37:47 +0300 Subject: [PATCH 49/79] Added test with group by LowCardinality(Nullable) #3227 #3256 #3212 --- .../00751_low_cardinality_nullable_group_by.reference | 10 ++++++++++ .../00751_low_cardinality_nullable_group_by.sql | 8 ++++++++ 2 files changed, 18 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00751_low_cardinality_nullable_group_by.reference create mode 100644 dbms/tests/queries/0_stateless/00751_low_cardinality_nullable_group_by.sql diff --git a/dbms/tests/queries/0_stateless/00751_low_cardinality_nullable_group_by.reference b/dbms/tests/queries/0_stateless/00751_low_cardinality_nullable_group_by.reference new file mode 100644 index 00000000000..e321189cb32 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00751_low_cardinality_nullable_group_by.reference @@ -0,0 +1,10 @@ +\N 333334 +1 1 +2 1 +4 1 +5 1 +7 1 +8 1 +10 1 +11 1 +13 1 diff --git a/dbms/tests/queries/0_stateless/00751_low_cardinality_nullable_group_by.sql b/dbms/tests/queries/0_stateless/00751_low_cardinality_nullable_group_by.sql new file mode 100644 index 00000000000..299daeb615b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00751_low_cardinality_nullable_group_by.sql @@ -0,0 +1,8 @@ +SET allow_experimental_low_cardinality_type = 1; +drop table if exists test.low_null_float; +CREATE TABLE test.low_null_float (a LowCardinality(Nullable(Float64))) ENGINE = MergeTree order by tuple(); +INSERT INTO test.low_null_float (a) SELECT if(number % 3 == 0, Null, number) FROM system.numbers LIMIT 1000000; + +SELECT a, count() FROM test.low_null_float GROUP BY a ORDER BY count() desc, a LIMIT 10; +drop table if exists test.low_null_float; + From c6d16772220c47d8a05dcb87efdd1c9cd54d21d6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 1 Nov 2018 18:09:19 +0300 Subject: [PATCH 50/79] CLICKHOUSE-4097: Remove redundant whitespace --- dbms/src/Parsers/ASTKillQueryQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Parsers/ASTKillQueryQuery.cpp b/dbms/src/Parsers/ASTKillQueryQuery.cpp index a8b351cdb39..0b9e6bcf4bc 100644 --- a/dbms/src/Parsers/ASTKillQueryQuery.cpp +++ b/dbms/src/Parsers/ASTKillQueryQuery.cpp @@ -10,7 +10,7 @@ String ASTKillQueryQuery::getID() const void ASTKillQueryQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "KILL QUERY "; + settings.ostr << (settings.hilite ? hilite_keyword : "") << "KILL QUERY"; formatOnCluster(settings); settings.ostr << " WHERE " << (settings.hilite ? hilite_none : ""); From 11092e895cf93ade2de1ddafb322ac181f28d577 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 1 Nov 2018 18:22:59 +0300 Subject: [PATCH 51/79] Better comment --- dbms/src/Interpreters/InterpreterKillQueryQuery.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp index 5a9d423dae6..0c86d590747 100644 --- a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -140,6 +140,7 @@ public: auto code = process_list.sendCancelToQuery(curr_process.query_id, curr_process.user, true); /// Raise exception if this query is immortal, user have to know + /// This could happen only if query generate streams that don't implement IProfilingBlockInputStream if (code == CancellationCode::CancelCannotBeSent) throw Exception("Can't kill query '" + curr_process.query_id + "' it consits of unkillable stages", ErrorCodes::CANNOT_KILL); else if (code != CancellationCode::QueryIsNotInitializedYet && code != CancellationCode::CancelSent) @@ -200,6 +201,7 @@ BlockIO InterpreterKillQueryQuery::execute() auto code = (query.test) ? CancellationCode::Unknown : process_list.sendCancelToQuery(query_desc.query_id, query_desc.user, true); /// Raise exception if this query is immortal, user have to know + /// This could happen only if query generate streams that don't implement IProfilingBlockInputStream if (code == CancellationCode::CancelCannotBeSent) throw Exception("Can't kill query '" + query_desc.query_id + "' it consits of unkillable stages", ErrorCodes::CANNOT_KILL); From 719efbe60a903261c144a3b174b40d598f74d7b7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Nov 2018 18:47:08 +0300 Subject: [PATCH 52/79] Fixed idiosyncrasy introduced in #3451 --- dbms/src/Functions/FunctionsHashing.h | 318 ++++++++++-------- .../0_stateless/00678_murmurhash.reference | 21 +- .../queries/0_stateless/00678_murmurhash.sql | 1 + .../00746_hashing_tuples.reference | 30 +- 4 files changed, 196 insertions(+), 174 deletions(-) diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index 04fe4069775..84610786d22 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -113,6 +113,10 @@ struct HalfMD5Impl UInt64 hashes[] = {h1, h2}; return apply(reinterpret_cast(hashes), 16); } + + /// If true, it will use intHash32 or intHash64 to hash POD types. This behaviour is intended for better performance of some functions. + /// Otherwise it will hash bytes in memory as a string using corresponding hash function. + static constexpr bool use_int_hash_for_pods = false; }; struct MD5Impl @@ -186,8 +190,9 @@ struct SipHash64Impl UInt64 hashes[] = {h1, h2}; return apply(reinterpret_cast(hashes), 16); } -}; + static constexpr bool use_int_hash_for_pods = false; +}; struct SipHash128Impl { @@ -201,6 +206,156 @@ struct SipHash128Impl }; +/** Why we need MurmurHash2? + * MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash. + * Usually there is no reason to use MurmurHash. + * It is needed for the cases when you already have MurmurHash in some applications and you want to reproduce it + * in ClickHouse as is. For example, it is needed to reproduce the behaviour + * for NGINX a/b testing module: https://nginx.ru/en/docs/http/ngx_http_split_clients_module.html + */ +struct MurmurHash2Impl32 +{ + static constexpr auto name = "murmurHash2_32"; + + using ReturnType = UInt32; + + static UInt32 apply(const char * data, const size_t size) + { + return MurmurHash2(data, size, 0); + } + + static UInt32 combineHashes(UInt32 h1, UInt32 h2) + { + return IntHash32Impl::apply(h1) ^ h2; + } + + static constexpr bool use_int_hash_for_pods = false; +}; + +struct MurmurHash2Impl64 +{ + static constexpr auto name = "murmurHash2_64"; + using ReturnType = UInt64; + + static UInt64 apply(const char * data, const size_t size) + { + return MurmurHash64A(data, size, 0); + } + + static UInt64 combineHashes(UInt64 h1, UInt64 h2) + { + return IntHash64Impl::apply(h1) ^ h2; + } + + static constexpr bool use_int_hash_for_pods = false; +}; + +struct MurmurHash3Impl32 +{ + static constexpr auto name = "murmurHash3_32"; + using ReturnType = UInt32; + + static UInt32 apply(const char * data, const size_t size) + { + union + { + UInt32 h; + char bytes[sizeof(h)]; + }; + MurmurHash3_x86_32(data, size, 0, bytes); + return h; + } + + static UInt32 combineHashes(UInt32 h1, UInt32 h2) + { + return IntHash32Impl::apply(h1) ^ h2; + } + + static constexpr bool use_int_hash_for_pods = false; +}; + +struct MurmurHash3Impl64 +{ + static constexpr auto name = "murmurHash3_64"; + using ReturnType = UInt64; + + static UInt64 apply(const char * data, const size_t size) + { + union + { + UInt64 h[2]; + char bytes[16]; + }; + MurmurHash3_x64_128(data, size, 0, bytes); + return h[0] ^ h[1]; + } + + static UInt64 combineHashes(UInt64 h1, UInt64 h2) + { + return IntHash64Impl::apply(h1) ^ h2; + } + + static constexpr bool use_int_hash_for_pods = false; +}; + +struct MurmurHash3Impl128 +{ + static constexpr auto name = "murmurHash3_128"; + enum { length = 16 }; + + static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + { + MurmurHash3_x64_128(begin, size, 0, out_char_data); + } +}; + +struct ImplCityHash64 +{ + static constexpr auto name = "cityHash64"; + using ReturnType = UInt64; + using uint128_t = CityHash_v1_0_2::uint128; + + static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } + static auto apply(const char * s, const size_t len) { return CityHash_v1_0_2::CityHash64(s, len); } + static constexpr bool use_int_hash_for_pods = true; +}; + +// see farmhash.h for definition of NAMESPACE_FOR_HASH_FUNCTIONS +struct ImplFarmHash64 +{ + static constexpr auto name = "farmHash64"; + using ReturnType = UInt64; + using uint128_t = NAMESPACE_FOR_HASH_FUNCTIONS::uint128_t; + + static auto combineHashes(UInt64 h1, UInt64 h2) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash128to64(uint128_t(h1, h2)); } + static auto apply(const char * s, const size_t len) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(s, len); } + static constexpr bool use_int_hash_for_pods = true; +}; + +struct ImplMetroHash64 +{ + static constexpr auto name = "metroHash64"; + using ReturnType = UInt64; + using uint128_t = CityHash_v1_0_2::uint128; + + static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } + static auto apply(const char * s, const size_t len) + { + union + { + UInt64 u64; + UInt8 u8[sizeof(u64)]; + }; + + metrohash64_1(reinterpret_cast(s), len, 0, u8); + + return u64; + } + + static constexpr bool use_int_hash_for_pods = true; +}; + + template class FunctionStringHashFixedString : public IFunction { @@ -259,12 +414,6 @@ public: }; -inline bool allowIntHash(const IDataType * data_type) -{ - return data_type->isValueRepresentedByNumber(); -} - - template class FunctionIntHash : public IFunction { @@ -308,7 +457,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (!allowIntHash(arguments[0].get())) + if (!arguments[0]->isValueRepresentedByNumber()) throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -359,10 +508,19 @@ private: for (size_t i = 0; i < size; ++i) { ToType h; - if constexpr (std::is_same_v) - h = IntHash64Impl::apply(ext::bit_cast(vec_from[i])); + + if constexpr (Impl::use_int_hash_for_pods) + { + if constexpr (std::is_same_v) + h = IntHash64Impl::apply(ext::bit_cast(vec_from[i])); + else + h = IntHash32Impl::apply(ext::bit_cast(vec_from[i])); + } else - h = IntHash32Impl::apply(ext::bit_cast(vec_from[i])); + { + h = Impl::apply(reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); + } + if (first) vec_to[i] = h; else @@ -610,102 +768,6 @@ public: }; -/** Why we need MurmurHash2? - * MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash. - * Usually there is no reason to use MurmurHash. - * It is needed for the cases when you already have MurmurHash in some applications and you want to reproduce it - * in ClickHouse as is. For example, it is needed to reproduce the behaviour - * for NGINX a/b testing module: https://nginx.ru/en/docs/http/ngx_http_split_clients_module.html - */ -struct MurmurHash2Impl32 -{ - static constexpr auto name = "murmurHash2_32"; - - using ReturnType = UInt32; - - static UInt32 apply(const char * data, const size_t size) - { - return MurmurHash2(data, size, 0); - } - - static UInt32 combineHashes(UInt32 h1, UInt32 h2) - { - return IntHash32Impl::apply(h1) ^ h2; - } -}; - -struct MurmurHash2Impl64 -{ - static constexpr auto name = "murmurHash2_64"; - using ReturnType = UInt64; - - static UInt64 apply(const char * data, const size_t size) - { - return MurmurHash64A(data, size, 0); - } - - static UInt64 combineHashes(UInt64 h1, UInt64 h2) - { - return IntHash64Impl::apply(h1) ^ h2; - } -}; - -struct MurmurHash3Impl32 -{ - static constexpr auto name = "murmurHash3_32"; - using ReturnType = UInt32; - - static UInt32 apply(const char * data, const size_t size) - { - union - { - UInt32 h; - char bytes[sizeof(h)]; - }; - MurmurHash3_x86_32(data, size, 0, bytes); - return h; - } - - static UInt32 combineHashes(UInt32 h1, UInt32 h2) - { - return IntHash32Impl::apply(h1) ^ h2; - } -}; - -struct MurmurHash3Impl64 -{ - static constexpr auto name = "murmurHash3_64"; - using ReturnType = UInt64; - - static UInt64 apply(const char * data, const size_t size) - { - union - { - UInt64 h[2]; - char bytes[16]; - }; - MurmurHash3_x64_128(data, size, 0, bytes); - return h[0] ^ h[1]; - } - - static UInt64 combineHashes(UInt64 h1, UInt64 h2) - { - return IntHash64Impl::apply(h1) ^ h2; - } -}; - -struct MurmurHash3Impl128 -{ - static constexpr auto name = "murmurHash3_128"; - enum { length = 16 }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - MurmurHash3_x64_128(begin, size, 0, out_char_data); - } -}; - - struct URLHashImpl { static UInt64 apply(const char * data, const size_t size) @@ -899,48 +961,6 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; }; struct NameIntHash64 { static constexpr auto name = "intHash64"; }; -struct ImplCityHash64 -{ - static constexpr auto name = "cityHash64"; - using ReturnType = UInt64; - using uint128_t = CityHash_v1_0_2::uint128; - - static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } - static auto apply(const char * s, const size_t len) { return CityHash_v1_0_2::CityHash64(s, len); } -}; - -// see farmhash.h for definition of NAMESPACE_FOR_HASH_FUNCTIONS -struct ImplFarmHash64 -{ - static constexpr auto name = "farmHash64"; - using ReturnType = UInt64; - using uint128_t = NAMESPACE_FOR_HASH_FUNCTIONS::uint128_t; - - static auto combineHashes(UInt64 h1, UInt64 h2) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash128to64(uint128_t(h1, h2)); } - static auto apply(const char * s, const size_t len) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(s, len); } -}; - -struct ImplMetroHash64 -{ - static constexpr auto name = "metroHash64"; - using ReturnType = UInt64; - using uint128_t = CityHash_v1_0_2::uint128; - - static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); } - static auto apply(const char * s, const size_t len) - { - union - { - UInt64 u64; - UInt8 u8[sizeof(u64)]; - }; - - metrohash64_1(reinterpret_cast(s), len, 0, u8); - - return u64; - } -}; - using FunctionHalfMD5 = FunctionAnyHash; using FunctionSipHash64 = FunctionAnyHash; using FunctionIntHash32 = FunctionIntHash; diff --git a/dbms/tests/queries/0_stateless/00678_murmurhash.reference b/dbms/tests/queries/0_stateless/00678_murmurhash.reference index 2699ce220f5..548c5c1cae6 100644 --- a/dbms/tests/queries/0_stateless/00678_murmurhash.reference +++ b/dbms/tests/queries/0_stateless/00678_murmurhash.reference @@ -1,25 +1,26 @@ -3012058918 -1298551497 -864444010 -367840556 +623211862 +3533626746 +2388617433 +2708309598 2414502773 670491991 -1343103100 0 0 0 0 -1343103100 -1996614413 +0 +0 +0 +1 1 14834356025302342401 12725806677685968135 -10577349846663553072 +12725806677685968135 4138058784 3831157163 -1343103100 +3831157163 11303473983767132390 956517343494314387 -10577349846663553072 +956517343494314387 6145F501578671E2877DBA2BE487AF7E 16FE7483905CCE7A85670E43E4678877 diff --git a/dbms/tests/queries/0_stateless/00678_murmurhash.sql b/dbms/tests/queries/0_stateless/00678_murmurhash.sql index 1b800c7eb9c..9d20b56aa93 100644 --- a/dbms/tests/queries/0_stateless/00678_murmurhash.sql +++ b/dbms/tests/queries/0_stateless/00678_murmurhash.sql @@ -13,6 +13,7 @@ SELECT murmurHash2_32('\x03\0\0'); SELECT murmurHash2_32(1); SELECT murmurHash2_32(toUInt16(2)); +SELECT murmurHash2_32(2) = bitXor(toUInt32(0x5bd1e995 * bitXor(toUInt32(3 * 0x5bd1e995) AS a, bitShiftRight(a, 13))) AS b, bitShiftRight(b, 15)); SELECT murmurHash2_32('\x02') = bitXor(toUInt32(0x5bd1e995 * bitXor(toUInt32(3 * 0x5bd1e995) AS a, bitShiftRight(a, 13))) AS b, bitShiftRight(b, 15)); SELECT murmurHash2_64('foo'); diff --git a/dbms/tests/queries/0_stateless/00746_hashing_tuples.reference b/dbms/tests/queries/0_stateless/00746_hashing_tuples.reference index 391a59e012f..4c84566e975 100644 --- a/dbms/tests/queries/0_stateless/00746_hashing_tuples.reference +++ b/dbms/tests/queries/0_stateless/00746_hashing_tuples.reference @@ -1,15 +1,15 @@ -8732148587615156034 -3856459458360415155 -1993857991550209231 -5465424717626995012 -15495040516566687427 -13266110974878256384 -617416965 -3293554683 -4210800467 -6847376565456338547 -15499510486101262177 -13552202417419166072 -6847376565456338547 -15499510486101262177 -14474638290107799038 +12940785793559895259 +17926972817233444501 +7456555839952096623 +955237314186186656 +8175794665478042155 +9325786087413524176 +2822869866 +1460833561 +222444531 +13951512892560982617 +4952008279444388047 +15509665835504406222 +8163029322371165472 +8788309436660676487 +236561483980029756 From e7bb9d5d84655ff5b8cf4e12e95685961ea895ad Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Nov 2018 18:50:30 +0300 Subject: [PATCH 53/79] Added test #3519 --- .../tests/queries/0_stateless/00751_hashing_ints.reference | 7 +++++++ dbms/tests/queries/0_stateless/00751_hashing_ints.sql | 7 +++++++ 2 files changed, 14 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00751_hashing_ints.reference create mode 100644 dbms/tests/queries/0_stateless/00751_hashing_ints.sql diff --git a/dbms/tests/queries/0_stateless/00751_hashing_ints.reference b/dbms/tests/queries/0_stateless/00751_hashing_ints.reference new file mode 100644 index 00000000000..8eccccd0449 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00751_hashing_ints.reference @@ -0,0 +1,7 @@ +1452224150530656417 +11717965186011240346 +13379111408315310133 +13379111408315310133 +13379111408315310133 +623211862 +9052087431341907723 diff --git a/dbms/tests/queries/0_stateless/00751_hashing_ints.sql b/dbms/tests/queries/0_stateless/00751_hashing_ints.sql new file mode 100644 index 00000000000..d2f0a26cef4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00751_hashing_ints.sql @@ -0,0 +1,7 @@ +SELECT halfMD5(123456); +SELECT sipHash64(123456); +SELECT cityHash64(123456); +SELECT farmHash64(123456); +SELECT metroHash64(123456); +SELECT murmurHash2_32(123456); +SELECT murmurHash2_64(123456); From 77af9f7abbb7d7b7ad851cc96c004671842f1f8e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Nov 2018 18:55:28 +0300 Subject: [PATCH 54/79] Fixed error code in test #3503 --- dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql b/dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql index 027f9b6b472..9421bbb8e5c 100644 --- a/dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql +++ b/dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql @@ -3,7 +3,7 @@ DROP TABLE IF EXISTS test.arraytest; CREATE TABLE test.arraytest ( created_date Date DEFAULT toDate(created_at), created_at DateTime DEFAULT now(), strings Array(String) DEFAULT emptyArrayString()) ENGINE = MergeTree(created_date, cityHash64(created_at), (created_date, cityHash64(created_at)), 8192); INSERT INTO test.arraytest (created_at, strings) VALUES (now(), ['aaaaa', 'bbbbb', 'ccccc']); -INSERT INTO test.arraytest (created_at, strings) VALUES (now(), ['aaaaa', 'bbbbb', null]); -- { clientError 53 } +INSERT INTO test.arraytest (created_at, strings) VALUES (now(), ['aaaaa', 'bbbbb', null]); -- { clientError 321 } SELECT strings from test.arraytest; From f86d1a703490f0d4214fcfe37b9d8ee6ba8dbf7f Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 1 Nov 2018 20:07:20 +0300 Subject: [PATCH 55/79] CONTRIB-254 Arcadia fixes (#3475) * Macos: set ENABLE_EMBEDDED_COMPILER=0 by default (because contrib/llvm temporary broken under macos) * \n * arcadia fix * fix arcadia link * wip * wip * wip * fix * wip * wip * fix * try use users from server dir * Revert "try use users from server dir" This reverts commit 79b2c6fd9c8d3d07cd3d06ed42abd49721563f88. * Fix logging * fix * Update ZooKeeperImpl.h --- dbms/src/Common/CurrentThread.cpp | 26 ++++++++++++++++--- dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp | 22 +--------------- dbms/src/Common/ZooKeeper/ZooKeeperImpl.h | 25 ++++++++++++++++++ dbms/src/Dictionaries/TrieDictionary.cpp | 2 +- dbms/src/Functions/FunctionsHashing.h | 2 +- .../Storages/MergeTree/LevelMergeSelector.cpp | 4 +-- .../MergeTree/SimpleMergeSelector.cpp | 4 +-- dbms/tests/clickhouse-test-server | 4 ++- libs/libdaemon/src/BaseDaemon.cpp | 4 +-- 9 files changed, 60 insertions(+), 33 deletions(-) diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index 1f869ca972f..a3919108724 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -11,6 +11,11 @@ #include +#if defined(ARCADIA_ROOT) +# include +#endif + + namespace DB { @@ -21,10 +26,25 @@ namespace ErrorCodes SimpleObjectPool task_stats_info_getter_pool; +// Smoker's implementation to avoid thread_local usage: error: undefined symbol: __cxa_thread_atexit +#if defined(ARCADIA_ROOT) +struct ThreadStatusPtrHolder : ThreadStatusPtr +{ + ThreadStatusPtrHolder() { ThreadStatusPtr::operator=(ThreadStatus::create()); } +}; +struct ThreadScopePtrHolder : CurrentThread::ThreadScopePtr +{ + ThreadScopePtrHolder() { CurrentThread::ThreadScopePtr::operator=(std::make_shared()); } +}; +# define current_thread (*FastTlsSingleton()) +# define current_thread_scope (*FastTlsSingleton()) +#else /// Order of current_thread and current_thread_scope matters -thread_local ThreadStatusPtr current_thread = ThreadStatus::create(); -thread_local CurrentThread::ThreadScopePtr current_thread_scope = std::make_shared(); - +thread_local ThreadStatusPtr _current_thread = ThreadStatus::create(); +thread_local CurrentThread::ThreadScopePtr _current_thread_scope = std::make_shared(); +# define current_thread _current_thread +# define current_thread_scope _current_thread_scope +#endif void CurrentThread::updatePerformanceCounters() { diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 9a4d059d461..2298725fc66 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -395,24 +395,8 @@ void ZooKeeper::read(T & x) } -struct ZooKeeperResponse; -using ZooKeeperResponsePtr = std::shared_ptr; - -struct ZooKeeperRequest : virtual Request -{ - ZooKeeper::XID xid = 0; - bool has_watch = false; - /// If the request was not send and the error happens, we definitely sure, that is has not been processed by the server. - /// If the request was sent and we didn't get the response and the error happens, then we cannot be sure was it processed or not. - bool probably_sent = false; - - virtual ~ZooKeeperRequest() {} - - virtual ZooKeeper::OpNum getOpNum() const = 0; - - /// Writes length, xid, op_num, then the rest. - void write(WriteBuffer & out) const + void ZooKeeperRequest::write(WriteBuffer & out) const { /// Excessive copy to calculate length. WriteBufferFromOwnString buf; @@ -423,10 +407,6 @@ struct ZooKeeperRequest : virtual Request out.next(); } - virtual void writeImpl(WriteBuffer &) const = 0; - - virtual ZooKeeperResponsePtr makeResponse() const = 0; -}; struct ZooKeeperResponse : virtual Response { diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h index 0df5b58a60c..4df36742d44 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -240,4 +240,29 @@ private: CurrentMetrics::Increment active_session_metric_increment{CurrentMetrics::ZooKeeperSession}; }; +struct ZooKeeperResponse; +using ZooKeeperResponsePtr = std::shared_ptr; + +/// Exposed in header file for Yandex.Metrica code. +struct ZooKeeperRequest : virtual Request +{ + ZooKeeper::XID xid = 0; + bool has_watch = false; + /// If the request was not send and the error happens, we definitely sure, that is has not been processed by the server. + /// If the request was sent and we didn't get the response and the error happens, then we cannot be sure was it processed or not. + bool probably_sent = false; + + virtual ~ZooKeeperRequest() {} + + virtual ZooKeeper::OpNum getOpNum() const = 0; + + /// Writes length, xid, op_num, then the rest. + void write(WriteBuffer & out) const; + + virtual void writeImpl(WriteBuffer &) const = 0; + + virtual ZooKeeperResponsePtr makeResponse() const = 0; +}; + + } diff --git a/dbms/src/Dictionaries/TrieDictionary.cpp b/dbms/src/Dictionaries/TrieDictionary.cpp index 3792a5fb8c8..38dcaed9a1f 100644 --- a/dbms/src/Dictionaries/TrieDictionary.cpp +++ b/dbms/src/Dictionaries/TrieDictionary.cpp @@ -619,7 +619,7 @@ Columns TrieDictionary::getKeyColumns() const #if defined(__SIZEOF_INT128__) auto getter = [& ip_column, & mask_column](__uint128_t ip, size_t mask) { - UInt64 * ip_array = reinterpret_cast(&ip); + Poco::UInt64 * ip_array = reinterpret_cast(&ip); // Poco:: for old poco + macos ip_array[0] = Poco::ByteOrder::fromNetwork(ip_array[0]); ip_array[1] = Poco::ByteOrder::fromNetwork(ip_array[1]); std::swap(ip_array[0], ip_array[1]); diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index 04fe4069775..980c331f058 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -105,7 +105,7 @@ struct HalfMD5Impl MD5_Update(&ctx, reinterpret_cast(begin), size); MD5_Final(buf.char_data, &ctx); - return Poco::ByteOrder::flipBytes(buf.uint64_data); /// Compatibility with existing code. + return Poco::ByteOrder::flipBytes(static_cast(buf.uint64_data)); /// Compatibility with existing code. Cast need for old poco AND macos where UInt64 != uint64_t } static UInt64 combineHashes(UInt64 h1, UInt64 h2) diff --git a/dbms/src/Storages/MergeTree/LevelMergeSelector.cpp b/dbms/src/Storages/MergeTree/LevelMergeSelector.cpp index c9a058d0aa5..c6bec6540b4 100644 --- a/dbms/src/Storages/MergeTree/LevelMergeSelector.cpp +++ b/dbms/src/Storages/MergeTree/LevelMergeSelector.cpp @@ -34,8 +34,8 @@ struct Estimator } double min_score = 0; - Iterator best_begin; - Iterator best_end; + Iterator best_begin {}; + Iterator best_end {}; }; diff --git a/dbms/src/Storages/MergeTree/SimpleMergeSelector.cpp b/dbms/src/Storages/MergeTree/SimpleMergeSelector.cpp index 882d1dfd8a9..a566e474a5f 100644 --- a/dbms/src/Storages/MergeTree/SimpleMergeSelector.cpp +++ b/dbms/src/Storages/MergeTree/SimpleMergeSelector.cpp @@ -68,8 +68,8 @@ struct Estimator } double min_score = 0; - Iterator best_begin; - Iterator best_end; + Iterator best_begin {}; + Iterator best_end {}; }; diff --git a/dbms/tests/clickhouse-test-server b/dbms/tests/clickhouse-test-server index 44963ca8658..a05eb98302f 100755 --- a/dbms/tests/clickhouse-test-server +++ b/dbms/tests/clickhouse-test-server @@ -21,6 +21,8 @@ CONFIG_SERVER_DIR=${CONFIG_SERVER_DIR=$CONFIG_DIR} [ ! -f "${CONFIG_SERVER_DIR}server-test.xml" ] && CONFIG_SERVER_DIR=${CONFIG_SERVER_DIR:=/etc/clickhouse-server/} export CLICKHOUSE_CONFIG_CLIENT=${CLICKHOUSE_CONFIG_CLIENT:=${CONFIG_CLIENT_DIR}client-test.xml} export CLICKHOUSE_CONFIG=${CLICKHOUSE_CONFIG:=${CONFIG_SERVER_DIR}server-test.xml} +CLICKHOUSE_CONFIG_USERS=${CONFIG_SERVER_DIR}users.xml +[ ! -f "$CLICKHOUSE_CONFIG_USERS" ] && CLICKHOUSE_CONFIG_USERS=$CUR_DIR/../programs/server/users.xml [ -x "$CUR_DIR/clickhouse-test" ] && TEST_DIR=${TEST_DIR=$CUR_DIR/} [ -d "$CUR_DIR/queries" ] && QUERIES_DIR=${QUERIES_DIR=$CUR_DIR/queries} [ ! -d "$QUERIES_DIR" ] && [ -d "/usr/local/share/clickhouse-test/queries" ] && QUERIES_DIR=${QUERIES_DIR=/usr/local/share/clickhouse-test/queries} @@ -43,7 +45,7 @@ mkdir -p $LOG_DIR $DATA_DIR/etc || true if [ "$DATA_DIR_PATTERN" != "$DATA_DIR" ]; then cat $CLICKHOUSE_CONFIG | sed -e s!$DATA_DIR_PATTERN!$DATA_DIR! > $DATA_DIR/etc/server-config.xml export CLICKHOUSE_CONFIG=$DATA_DIR/etc/server-config.xml - cp ${CONFIG_SERVER_DIR}users.xml $DATA_DIR/etc + cp $CLICKHOUSE_CONFIG_USERS $DATA_DIR/etc fi CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="${BIN_DIR}${CLICKHOUSE_BINARY}-extract-from-config --config=$CLICKHOUSE_CONFIG"} diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 9a0e181352e..34297c937cf 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -669,7 +669,7 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config) std::cerr << "Logging " << log_level << " to " << log_path << std::endl; // Set up two channel chains. - Poco::AutoPtr log_file = new FileChannel; + log_file = new FileChannel; log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(log_path).absolute().toString()); log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M")); log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number"); @@ -691,7 +691,7 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config) createDirectory(errorlog_path); std::cerr << "Logging errors to " << errorlog_path << std::endl; - Poco::AutoPtr error_log_file = new FileChannel; + error_log_file = new FileChannel; error_log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(errorlog_path).absolute().toString()); error_log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M")); error_log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number"); From ac10a55b6f1a1b45d755a7032c37b51792c92689 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 1 Nov 2018 20:20:14 +0300 Subject: [PATCH 56/79] Update StorageSystemDataTypeFamilies.cpp --- dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp b/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp index 824aa88550e..56b20bc52c1 100644 --- a/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp +++ b/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp @@ -28,7 +28,7 @@ void StorageSystemDataTypeFamilies::fillData(MutableColumns & res_columns, const if (factory.isAlias(name)) res_columns[2]->insert(factory.aliasTo(name)); else - res_columns[2]->insert(""); + res_columns[2]->insert(String()); } } From e10fd083efe4a99e1e5ef197209c0123ad098b8f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 1 Nov 2018 20:20:41 +0300 Subject: [PATCH 57/79] Update StorageSystemFunctions.cpp --- dbms/src/Storages/System/StorageSystemFunctions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/System/StorageSystemFunctions.cpp b/dbms/src/Storages/System/StorageSystemFunctions.cpp index 1de904c3520..46d687ea33b 100644 --- a/dbms/src/Storages/System/StorageSystemFunctions.cpp +++ b/dbms/src/Storages/System/StorageSystemFunctions.cpp @@ -20,7 +20,7 @@ namespace if (f.isAlias(name)) res_columns[3]->insert(f.aliasTo(name)); else - res_columns[3]->insert(""); + res_columns[3]->insert(String()); } } From 1a4a99377a75c2b2d69084ce81ffa031b2f070d1 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 1 Nov 2018 20:21:15 +0300 Subject: [PATCH 58/79] Update StorageSystemFunctions.cpp --- dbms/src/Storages/System/StorageSystemFunctions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/System/StorageSystemFunctions.cpp b/dbms/src/Storages/System/StorageSystemFunctions.cpp index 46d687ea33b..c9601373df3 100644 --- a/dbms/src/Storages/System/StorageSystemFunctions.cpp +++ b/dbms/src/Storages/System/StorageSystemFunctions.cpp @@ -20,7 +20,7 @@ namespace if (f.isAlias(name)) res_columns[3]->insert(f.aliasTo(name)); else - res_columns[3]->insert(String()); + res_columns[3]->insertDefault(); } } From 4dedead38f4b806ee29ef528f2edea74e23302ef Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 1 Nov 2018 20:21:43 +0300 Subject: [PATCH 59/79] Update StorageSystemDataTypeFamilies.cpp --- dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp b/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp index 56b20bc52c1..28fb2076c21 100644 --- a/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp +++ b/dbms/src/Storages/System/StorageSystemDataTypeFamilies.cpp @@ -28,7 +28,7 @@ void StorageSystemDataTypeFamilies::fillData(MutableColumns & res_columns, const if (factory.isAlias(name)) res_columns[2]->insert(factory.aliasTo(name)); else - res_columns[2]->insert(String()); + res_columns[2]->insertDefault(); } } From 03b2b609b162bbc8fc14c291255baccfdcb2d067 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 1 Nov 2018 20:03:05 +0300 Subject: [PATCH 60/79] Fix ColumnLowCardinality::getExtremes. #3288 --- dbms/src/Columns/ColumnLowCardinality.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Columns/ColumnLowCardinality.h b/dbms/src/Columns/ColumnLowCardinality.h index a125fee35ab..80fc21431df 100644 --- a/dbms/src/Columns/ColumnLowCardinality.h +++ b/dbms/src/Columns/ColumnLowCardinality.h @@ -117,7 +117,7 @@ public: void getExtremes(Field & min, Field & max) const override { - return getDictionary().index(getIndexes(), 0)->getExtremes(min, max); /// TODO: optimize + return dictionary.getColumnUnique().getNestedColumn()->index(getIndexes(), 0)->getExtremes(min, max); /// TODO: optimize } void reserve(size_t n) override { idx.reserve(n); } From e7b6937152ffba2ae9d80c583ce510f1c1c94bc6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 1 Nov 2018 20:21:21 +0300 Subject: [PATCH 61/79] Added tests with LowCardinality and mv. #3288 Added tests with LowCardinality and mv. #3288 --- .../00752_low_cardinality_mv_1.reference | 4 ++++ .../0_stateless/00752_low_cardinality_mv_1.sql | 17 +++++++++++++++++ .../00752_low_cardinality_mv_2.reference | 1 + .../0_stateless/00752_low_cardinality_mv_2.sql | 16 ++++++++++++++++ 4 files changed, 38 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00752_low_cardinality_mv_1.reference create mode 100644 dbms/tests/queries/0_stateless/00752_low_cardinality_mv_1.sql create mode 100644 dbms/tests/queries/0_stateless/00752_low_cardinality_mv_2.reference create mode 100644 dbms/tests/queries/0_stateless/00752_low_cardinality_mv_2.sql diff --git a/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_1.reference b/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_1.reference new file mode 100644 index 00000000000..4ab40faa2dd --- /dev/null +++ b/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_1.reference @@ -0,0 +1,4 @@ +a 1 6 +a 1 6 +b 3 8 +b 3 8 diff --git a/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_1.sql b/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_1.sql new file mode 100644 index 00000000000..7eeebf33d6e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_1.sql @@ -0,0 +1,17 @@ +SET allow_experimental_low_cardinality_type = 1; + +drop table if exists test.lc; +drop table if exists test.lc_mv; + +create table test.lc (str StringWithDictionary) engine = MergeTree order by tuple(); + +insert into test.lc values ('a'), ('bbb'), ('ab'), ('accccc'), ('baasddas'), ('bcde'); + +CREATE MATERIALIZED VIEW test.lc_mv ENGINE = AggregatingMergeTree() ORDER BY tuple() populate AS SELECT substring(str, 1, 1) as letter, min(length(str)) AS min_len, max(length(str)) AS max_len FROM test.lc GROUP BY substring(str, 1, 1); + +insert into test.lc values ('a'), ('bbb'), ('ab'), ('accccc'), ('baasddas'), ('bcde'); +select * from test.lc_mv order by letter; + +drop table if exists test.lc; +drop table if exists test.lc_mv; + diff --git a/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_2.reference b/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_2.reference new file mode 100644 index 00000000000..b362d0138a0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_2.reference @@ -0,0 +1 @@ +c 2018-10-10 15:45:00 3 10 2018-10-10 15:54:21 1 1 diff --git a/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_2.sql b/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_2.sql new file mode 100644 index 00000000000..850f744a2f1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00752_low_cardinality_mv_2.sql @@ -0,0 +1,16 @@ +SET allow_experimental_low_cardinality_type = 1; + +drop table if exists test.radacct; +drop table if exists test.mv_traffic_by_tadig15min; + +CREATE TABLE test.radacct ( radacctid UInt64, f3gppchargingid Nullable(String), f3gppggsnaddress Nullable(String), f3gppggsnmccmnc Nullable(String), f3gppgprsqos Nullable(String), f3gppimeisv Nullable(String), f3gppimsi Nullable(UInt64), f3gppimsimccmnc Nullable(String), f3gpploci Nullable(String), f3gppnsapi Nullable(String), f3gpprattype Nullable(String), f3gppsgsnaddress Nullable(String), f3gppsgsnmccmnc Nullable(String), acctdelaytime Nullable(UInt32), acctinputoctets Nullable(UInt64), acctinputpackets Nullable(UInt64), acctoutputoctets Nullable(UInt64), acctoutputpackets Nullable(UInt64), acctsessionid String, acctstatustype Nullable(String), acctuniqueid String, calledstationid Nullable(String), callingstationid Nullable(String), framedipaddress Nullable(String), nasidentifier Nullable(String), nasipaddress Nullable(String), acctstarttime Nullable(DateTime), acctstoptime Nullable(DateTime), acctsessiontime Nullable(UInt32), acctterminatecause Nullable(String), acctstartdelay Nullable(UInt32), acctstopdelay Nullable(UInt32), connectinfo_start Nullable(String), connectinfo_stop Nullable(String), timestamp DateTime, username Nullable(String), realm Nullable(String), f3gppimsi_int UInt64, f3gppsgsnaddress_int Nullable(UInt32), timestamp_date Date, tac Nullable(String), mnc Nullable(String), tadig LowCardinality(String), country LowCardinality(String), tadig_op_ip Nullable(String) DEFAULT CAST('TADIG NOT FOUND', 'Nullable(String)'), mcc Nullable(UInt16) MATERIALIZED toUInt16OrNull(substring(f3gppsgsnmccmnc, 1, 6))) ENGINE = MergeTree(timestamp_date, (timestamp, radacctid, acctuniqueid), 8192); + +insert into test.radacct values (1, 'a', 'b', 'c', 'd', 'e', 2, 'a', 'b', 'c', 'd', 'e', 'f', 3, 4, 5, 6, 7, 'a', 'Stop', 'c', 'd', 'e', 'f', 'g', 'h', '2018-10-10 15:54:21', '2018-10-10 15:54:21', 8, 'a', 9, 10, 'a', 'b', '2018-10-10 15:54:21', 'a', 'b', 11, 12, '2018-10-10', 'a', 'b', 'c', 'd', 'e'); + +create materialized view test.mv_traffic_by_tadig15min Engine=AggregatingMergeTree partition by tadig order by (ts,tadig) populate as select toStartOfFifteenMinutes(timestamp) ts,toDayOfWeek(timestamp) dow, tadig, sumState(acctinputoctets+acctoutputoctets) traffic_bytes,maxState(timestamp) last_stop, minState(radacctid) min_radacctid,maxState(radacctid) max_radacctid from test.radacct where acctstatustype='Stop' and acctinputoctets+acctoutputoctets > 0 group by tadig,ts,dow; + +select tadig, ts, dow, sumMerge(traffic_bytes), maxMerge(last_stop), minMerge(min_radacctid), maxMerge(max_radacctid) from test.mv_traffic_by_tadig15min group by tadig, ts, dow; + +drop table if exists test.radacct; +drop table if exists test.mv_traffic_by_tadig15min; + From 30a2513fac85e890be8ad5b48e3140ac9f62d53e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Nov 2018 20:55:53 +0300 Subject: [PATCH 62/79] Whitespace [#CLICKHOUSE-2] --- dbms/src/Interpreters/ExternalLoader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index caf37df43a5..2827e9ea283 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -239,7 +239,7 @@ void ExternalLoader::reloadFromConfigFiles(const bool throw_on_error, const bool if (current_config.find(loadable.first) == std::end(current_config)) removed_loadable_objects.emplace_back(loadable.first); } - for(const auto & name : removed_loadable_objects) + for (const auto & name : removed_loadable_objects) loadable_objects.erase(name); } From 3809f39ddf80a135a2c771bf2cdac1ea65de7afa Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 1 Nov 2018 21:00:43 +0300 Subject: [PATCH 63/79] CLICKHOUSE-3751: stem terms while searching docs (#3522) * CLICKHOUSE-4063: less manual html @ index.md * CLICKHOUSE-4063: recommend markdown="1" in README.md * CLICKHOUSE-4003: manually purge custom.css for now * CLICKHOUSE-4064: expand
before any print (including to pdf) * CLICKHOUSE-3927: rearrange interfaces/formats.md a bit * CLICKHOUSE-3306: add few http headers * Remove copy-paste introduced in #3392 * Hopefully better chinese fonts #3392 * get rid of tabs @ custom.css * Apply comments and patch from #3384 * Add jdbc.md to ToC and some translation, though it still looks badly incomplete * minor punctuation * Add some backlinks to official website from mirrors that just blindly take markdown sources * Do not make fonts extra light * find . -name '*.md' -type f | xargs -I{} perl -pi -e 's//g' {} * find . -name '*.md' -type f | xargs -I{} perl -pi -e 's/ sql/g' {} * Remove outdated stuff from roadmap.md * Not so light font on front page too * Refactor Chinese formats.md to match recent changes in other languages * Update some links on front page * Remove some outdated comment * Add twitter link to front page * More front page links tuning * Add Amsterdam meetup link * Smaller font to avoid second line * Add Amsterdam link to README.md * Proper docs nav translation * Back to 300 font-weight except Chinese * fix docs build * Update Amsterdam link * remove symlinks * more zh punctuation * apply lost comment by @zhang2014 * Apply comments by @zhang2014 from #3417 * Remove Beijing link * rm incorrect symlink * restore content of docs/zh/operations/table_engines/index.md * CLICKHOUSE-3751: stem terms while searching docs --- .../mkdocs-material-theme/assets/javascripts/application.js | 6 ++++++ docs/zh/operations/table_engines/index.md | 3 ++- website/index.html | 3 +-- 3 files changed, 9 insertions(+), 3 deletions(-) mode change 120000 => 100644 docs/zh/operations/table_engines/index.md diff --git a/docs/tools/mkdocs-material-theme/assets/javascripts/application.js b/docs/tools/mkdocs-material-theme/assets/javascripts/application.js index 0d4c888548b..3ab61ed15c5 100644 --- a/docs/tools/mkdocs-material-theme/assets/javascripts/application.js +++ b/docs/tools/mkdocs-material-theme/assets/javascripts/application.js @@ -4887,6 +4887,12 @@ var Result = function () { /* Append trailing wildcard to all terms for prefix querying */ .query(function (query) { _this.value_.toLowerCase().split(" ").filter(Boolean).forEach(function (term) { + for (var lang in _this.lang_) { + lang = _this.lang_[lang]; + if (typeof(_exposeLoaderLunrLunr2.default[lang]) !== 'undefined' && typeof(_exposeLoaderLunrLunr2.default[lang].stemmer) !== 'undefined') { + term = _exposeLoaderLunrLunr2.default[lang].stemmer(new _exposeLoaderLunrLunr2.default.Token(term)).toString(); + } + } query.term(term, { wildcard: _exposeLoaderLunrLunr2.default.Query.wildcard.TRAILING }); }); }) diff --git a/docs/zh/operations/table_engines/index.md b/docs/zh/operations/table_engines/index.md deleted file mode 120000 index b1d1c5db49d..00000000000 --- a/docs/zh/operations/table_engines/index.md +++ /dev/null @@ -1,13 +0,0 @@ -# 表引擎 -表引擎(即表的类型)决定了: - -* 数据的存储方式和位置,写到哪里以及从哪里读取数据。 -* 支持哪些查询以及如何支持。 -* 并发数据访问。 -* 索引的使用(如果存在)。 -* 是否可以执行多线程请求。 -* 数据复制参数。 - -在读取时,引擎只需要输出所请求的列,但在某些情况下,引擎可以在响应请求时部分处理数据。 - -对于大多数正式的任务,应该使用MergeTree族中的引擎。 diff --git a/docs/zh/operations/table_engines/index.md b/docs/zh/operations/table_engines/index.md new file mode 100644 index 00000000000..000a1f6cadd --- /dev/null +++ b/docs/zh/operations/table_engines/index.md @@ -0,0 +1,14 @@ +# 表引擎 + +表引擎(即表的类型)决定了: + +* 数据的存储方式和位置,写到哪里以及从哪里读取数据 +* 支持哪些查询以及如何支持。 +* 并发数据访问。 +* 索引的使用(如果存在)。 +* 是否可以执行多线程请求。 +* 数据复制参数。 + +在读取时,引擎只需要输出所请求的列,但在某些情况下,引擎可以在响应请求时部分处理数据。 + +对于大多数正式的任务,应该使用MergeTree族中的引擎。 diff --git a/website/index.html b/website/index.html index 3b6ca4f318a..fffeed2e211 100644 --- a/website/index.html +++ b/website/index.html @@ -92,8 +92,7 @@
- Upcoming meetups in Beijing on October 28 - and Amsterdam on November 15 + Upcoming meetup in Amsterdam on November 15
From c2a818b64d8090387d24e6df0dd7d644805dc2b6 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 1 Nov 2018 21:01:10 +0300 Subject: [PATCH 64/79] StorageSystemContributors clean (#3524) * StorageSystemContributors clean * Fix --- dbms/src/Storages/System/CMakeLists.txt | 4 +- .../System/StorageSystemContributors.cpp | 6 +- .../StorageSystemContributors.generated.cpp | 521 +++++++++--------- .../System/StorageSystemContributors.h | 4 - .../System/StorageSystemContributors.sh | 4 +- .../Storages/System/attachSystemTables.cpp | 6 +- .../0_stateless/00747_contributors.sql | 3 +- 7 files changed, 267 insertions(+), 281 deletions(-) diff --git a/dbms/src/Storages/System/CMakeLists.txt b/dbms/src/Storages/System/CMakeLists.txt index f445a3e1e29..b4783ffa315 100644 --- a/dbms/src/Storages/System/CMakeLists.txt +++ b/dbms/src/Storages/System/CMakeLists.txt @@ -1,8 +1,6 @@ # The file StorageSystemContributors.cpp is generated at release time and committed to the source tree. # You can also regenerate it manually this way: -#if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.generated.cpp) -# execute_process(COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh) -#endif() +# execute_process(COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh) set (CONFIG_BUILD ${CMAKE_CURRENT_BINARY_DIR}/StorageSystemBuildOptions.generated.cpp) get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPERTY COMPILE_DEFINITIONS) diff --git a/dbms/src/Storages/System/StorageSystemContributors.cpp b/dbms/src/Storages/System/StorageSystemContributors.cpp index 99c720e0f97..6e165a4ee40 100644 --- a/dbms/src/Storages/System/StorageSystemContributors.cpp +++ b/dbms/src/Storages/System/StorageSystemContributors.cpp @@ -1,10 +1,8 @@ -#if __has_include("StorageSystemContributors.generated.cpp") - #include "StorageSystemContributors.h" #include -#include #include +#include #include @@ -32,5 +30,3 @@ void StorageSystemContributors::fillData(MutableColumns & res_columns, const Con res_columns[0]->insert(String(it)); } } - -#endif diff --git a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp index a92e0ccfafb..135fb357ff5 100644 --- a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp @@ -1,262 +1,263 @@ // autogenerated by ./StorageSystemContributors.sh const char * auto_contributors[] { -"Alberto", -"Aleksandra (Ася)", -"Alex Bocharov", -"Alex Krash", -"Alex Zatelepin", -"Alexander Avdonkin", -"Alexander Krasheninnikov", -"Alexander Lukin", -"Alexander Makarov", -"Alexander Marshalov", -"Alexander Millin", -"Alexander Prudaev", -"Alexander Sapin", -"Alexandr Krasheninnikov", -"Alexandr Orlov", -"Alexei Averchenko", -"Alexey Arno", -"Alexey Milovidov", -"Alexey Tronov", -"Alexey Vasiliev", -"Alexey Zatelepin", -"Alexsey Shestakov", -"Aliaksandr Pliutau", -"Amos Bird", -"Amy Krishnevsky", -"Anastasiya Tsarkova", -"AndreevDm", -"Andrew Grigorev", -"Andrey", -"Andrey Dudin", -"Andrey M", -"Andrey Mironov", -"Andrey Urusov", -"Anton Popov", -"Anton Tihonov", -"Anton Tikhonov", -"Arsen Hakobyan", -"Artem Andreenko", -"Artem Zuikov", -"Artemeey", -"Artemkin Pavel", -"Arthur Tokarchuk", -"Atri Sharma", -"Babacar Diassé", -"BayoNet", -"BlahGeek", -"Bogdan", -"Bogdan Voronin", -"Bolinov", -"Brett Hoerner", -"Bulat Gaifullin", -"Chen Yufei", -"Ciprian Hacman", -"Constantin S. Pan", -"CurtizJ", -"Daniel Bershatsky", -"Denis Burlaka", -"Denis Zhuravlev", -"Derek Perkins", -"Dmitry Bilunov", -"Dmitry Galuza", -"Dmitry Luhtionov", -"Dmitry Moskowski", -"Dmitry S..ky / skype: dvska-at-skype", -"Eugene Klimov", -"Eugene Konkov", -"Evgeniy Gatov", -"Evgeniy Udodov", -"Evgeny Konkov", -"Flowyi", -"Fruit of Eden", -"George", -"George3d6", -"Guillaume Tassery", -"Hamoon", -"Hiroaki Nakamura", -"Igor", -"Igor Hatarist", -"Igor Strykhar", -"Ilya", -"Ilya Breev", -"Ilya Khomutov", -"Ilya Korolev", -"Ilya Shipitsin", -"Ivan Babrou", -"Ivan Blinkov", -"Ivan He", -"Ivan Zhukov", -"Jason", -"Jean Baptiste Favre", -"Jonatas Freitas", -"Keiji Yoshida", -"Kirill Malev", -"Kirill Shvakov", -"KochetovNicolai", -"Konstantin Grabar", -"Konstantin Lebedev", -"Konstantin Podshumok", -"Leonardo Cecchi", -"Lev Borodin", -"Liu Cong", -"LiuCong", -"LiuYangkuan", -"Luis Bosque", -"Maks Skorokhod", -"Maksim", -"Marek Vavrusa", -"Marek Vavruša", -"Marek Vavruša", -"Marsel Arduanov", -"Max Akhmedov", -"Max Vetrov", -"Maxim Fridental", -"Maxim Khrisanfov", -"Maxim Nikulin", -"MaximAL", -"Michael Furmur", -"Michael Kolupaev", -"Michael Razuvaev", -"Mikhail Filimonov", -"Mikhail Salosin", -"Mikhail Surin", -"Mikhail f. Shiryaev", -"Milad Arabi", -"Narek Galstyan", -"Nicolae Vartolomei", -"Nikita Vasilev", -"Nikolai Kochetov", -"Nikolay Kirsh", -"Nikolay Vasiliev", -"Nikolay Volosatov", -"Okada Haruki", -"Oleg Komarov", -"Oleg Obleukhov", -"Olga Khvostikova", -"Orivej Desh", -"Pavel", -"Pavel Kartaviy", -"Pavel Kartavyy", -"Pavel Litvinenko", -"Pavel Patrin", -"Pavel Yakunin", -"Pawel Rog", -"Ravengg", -"Reto Kromer", -"Roman Lipovsky", -"Roman Nozdrin", -"Roman Peshkurov", -"Roman Tsisyk", -"SaltTan", -"Sergei Tsetlin (rekub)", -"Sergey Elantsev", -"Sergey Fedorov", -"Sergey Lazarev", -"Sergey Magidovich", -"Sergey V. Galtsev", -"Sergey Zaikin", -"Silviu Caragea", -"Simon Podlipsky", -"Sjoerd Mulder", -"Snow", -"Stanislav Pavlovichev", -"Stas Pavlovichev", -"SuperBot", -"Tangaev", -"The-Alchemist", -"Tobias Adamson", -"Tsarkova Anastasia", -"Vadim", -"Vadim Plakhtinskiy", -"Vadim Skipin", -"VadimPE", -"Valera Ryaboshapko", -"Vasily Nemkov", -"Vasily Okunev", -"Veloman Yunkan", -"Veniamin Gvozdikov", -"Victor Tarnavsky", -"Vitaliy Lyudvichenko", -"Vitaly Samigullin", -"Vlad Arkhipov", -"Vladimir Chebotarev", -"Vladimir Kolobaev", -"Vladimir Kozbin", -"Vladimir Smirnov", -"Vladislav Rassokhin", -"Vojtech Splichal", -"Vsevolod Orlov", -"Vyacheslav Alipov", -"William Shallum", -"Winter Zhang", -"Yegor Andreenko", -"Yuri Dyachenko", -"Yurii Vlasenko", -"Yury Karpovich", -"Yury Stankevich", -"abyss7", -"alesapin", -"alexey-milovidov", -"ap11", -"aprudaev", -"artpaul", -"avsharapov", -"blazerer", -"bseng", -"cekc", -"champtar", -"chenxing-xc", -"chenxing.xc", -"chertus", -"daoready", -"decaseal", -"egatov", -"elBroom", -"ezhaka", -"f1yegor", -"felixoid", -"filimonov", -"flow", -"glockbender", -"hotid", -"igor", -"ivanzhukov", -"javi", -"javi santana", -"kmeaw", -"ks1322", -"kshvakov", -"leozhang", -"liuyimin", -"lomberts", -"mfridental", -"morty", -"ns-vasilev", -"orantius", -"peshkurov", -"proller", -"pyos", -"robot-clickhouse", -"robot-metrika-test", -"root", -"santaux", -"serebrserg", -"shedx", -"stavrolia", -"sundy-li", -"sundyli", -"topvisor", -"velom", -"zamulla", -"zhang2014", -"Георгий Кондратьев", -"Дмитрий Канатников", -"Иванов Евгений", -"Павел Литвиненко", -"Смитюх Вячеслав", -"Сундуков Алексей", -"张健", -"谢磊", -nullptr }; + "Alberto", + "Aleksandra (Ася)", + "Alex Bocharov", + "Alex Krash", + "Alex Zatelepin", + "Alexander Avdonkin", + "Alexander Krasheninnikov", + "Alexander Lukin", + "Alexander Makarov", + "Alexander Marshalov", + "Alexander Millin", + "Alexander Prudaev", + "Alexander Sapin", + "Alexandr Krasheninnikov", + "Alexandr Orlov", + "Alexei Averchenko", + "Alexey Arno", + "Alexey Milovidov", + "Alexey Tronov", + "Alexey Vasiliev", + "Alexey Zatelepin", + "Alexsey Shestakov", + "Aliaksandr Pliutau", + "Amos Bird", + "Amy Krishnevsky", + "Anastasiya Tsarkova", + "AndreevDm", + "Andrew Grigorev", + "Andrey", + "Andrey Dudin", + "Andrey M", + "Andrey Mironov", + "Andrey Urusov", + "Anton Popov", + "Anton Tihonov", + "Anton Tikhonov", + "Arsen Hakobyan", + "Artem Andreenko", + "Artem Zuikov", + "Artemeey", + "Artemkin Pavel", + "Arthur Tokarchuk", + "Atri Sharma", + "Babacar Diassé", + "BayoNet", + "BlahGeek", + "Bogdan", + "Bogdan Voronin", + "Bolinov", + "Brett Hoerner", + "Bulat Gaifullin", + "Chen Yufei", + "Ciprian Hacman", + "Constantin S. Pan", + "CurtizJ", + "Daniel Bershatsky", + "Denis Burlaka", + "Denis Zhuravlev", + "Derek Perkins", + "Dmitry Bilunov", + "Dmitry Galuza", + "Dmitry Luhtionov", + "Dmitry Moskowski", + "Dmitry S..ky / skype: dvska-at-skype", + "Eugene Klimov", + "Eugene Konkov", + "Evgeniy Gatov", + "Evgeniy Udodov", + "Evgeny Konkov", + "Flowyi", + "Fruit of Eden", + "George", + "George3d6", + "Guillaume Tassery", + "Hamoon", + "Hiroaki Nakamura", + "Igor", + "Igor Hatarist", + "Igor Strykhar", + "Ilya", + "Ilya Breev", + "Ilya Khomutov", + "Ilya Korolev", + "Ilya Shipitsin", + "Ivan Babrou", + "Ivan Blinkov", + "Ivan He", + "Ivan Zhukov", + "Jason", + "Jean Baptiste Favre", + "Jonatas Freitas", + "Keiji Yoshida", + "Kirill Malev", + "Kirill Shvakov", + "KochetovNicolai", + "Konstantin Grabar", + "Konstantin Lebedev", + "Konstantin Podshumok", + "Leonardo Cecchi", + "Lev Borodin", + "Liu Cong", + "LiuCong", + "LiuYangkuan", + "Luis Bosque", + "Maks Skorokhod", + "Maksim", + "Marek Vavrusa", + "Marek Vavruša", + "Marek Vavruša", + "Marsel Arduanov", + "Max Akhmedov", + "Max Vetrov", + "Maxim Fridental", + "Maxim Khrisanfov", + "Maxim Nikulin", + "MaximAL", + "Michael Furmur", + "Michael Kolupaev", + "Michael Razuvaev", + "Mikhail Filimonov", + "Mikhail Salosin", + "Mikhail Surin", + "Mikhail f. Shiryaev", + "Milad Arabi", + "Narek Galstyan", + "Nicolae Vartolomei", + "Nikita Vasilev", + "Nikolai Kochetov", + "Nikolay Kirsh", + "Nikolay Vasiliev", + "Nikolay Volosatov", + "Okada Haruki", + "Oleg Komarov", + "Oleg Obleukhov", + "Olga Khvostikova", + "Orivej Desh", + "Pavel", + "Pavel Kartaviy", + "Pavel Kartavyy", + "Pavel Litvinenko", + "Pavel Patrin", + "Pavel Yakunin", + "Pawel Rog", + "Ravengg", + "Reto Kromer", + "Roman Lipovsky", + "Roman Nozdrin", + "Roman Peshkurov", + "Roman Tsisyk", + "SaltTan", + "Sergei Tsetlin (rekub)", + "Sergey Elantsev", + "Sergey Fedorov", + "Sergey Lazarev", + "Sergey Magidovich", + "Sergey V. Galtsev", + "Sergey Zaikin", + "Silviu Caragea", + "Simon Podlipsky", + "Sjoerd Mulder", + "Snow", + "Stanislav Pavlovichev", + "Stas Pavlovichev", + "SuperBot", + "Tangaev", + "The-Alchemist", + "Tobias Adamson", + "Tsarkova Anastasia", + "Vadim", + "Vadim Plakhtinskiy", + "Vadim Skipin", + "VadimPE", + "Valera Ryaboshapko", + "Vasily Nemkov", + "Vasily Okunev", + "Veloman Yunkan", + "Veniamin Gvozdikov", + "Victor Tarnavsky", + "Vitaliy Lyudvichenko", + "Vitaly Samigullin", + "Vlad Arkhipov", + "Vladimir Chebotarev", + "Vladimir Kolobaev", + "Vladimir Kozbin", + "Vladimir Smirnov", + "Vladislav Rassokhin", + "Vojtech Splichal", + "Vsevolod Orlov", + "Vyacheslav Alipov", + "William Shallum", + "Winter Zhang", + "Yegor Andreenko", + "Yuri Dyachenko", + "Yurii Vlasenko", + "Yury Karpovich", + "Yury Stankevich", + "abyss7", + "alesapin", + "alexey-milovidov", + "ap11", + "aprudaev", + "artpaul", + "avsharapov", + "blazerer", + "bseng", + "cekc", + "champtar", + "chenxing-xc", + "chenxing.xc", + "chertus", + "daoready", + "decaseal", + "egatov", + "elBroom", + "ezhaka", + "f1yegor", + "felixoid", + "filimonov", + "flow", + "ggerogery", + "glockbender", + "hotid", + "igor", + "ivanzhukov", + "javi", + "javi santana", + "kmeaw", + "ks1322", + "kshvakov", + "leozhang", + "liuyimin", + "lomberts", + "mfridental", + "morty", + "ns-vasilev", + "orantius", + "peshkurov", + "proller", + "pyos", + "robot-clickhouse", + "robot-metrika-test", + "root", + "santaux", + "serebrserg", + "shedx", + "stavrolia", + "sundy-li", + "sundyli", + "topvisor", + "velom", + "zamulla", + "zhang2014", + "Георгий Кондратьев", + "Дмитрий Канатников", + "Иванов Евгений", + "Павел Литвиненко", + "Смитюх Вячеслав", + "Сундуков Алексей", + "张健", + "谢磊", + nullptr}; diff --git a/dbms/src/Storages/System/StorageSystemContributors.h b/dbms/src/Storages/System/StorageSystemContributors.h index b62895d5788..4e2a47960f3 100644 --- a/dbms/src/Storages/System/StorageSystemContributors.h +++ b/dbms/src/Storages/System/StorageSystemContributors.h @@ -1,7 +1,5 @@ #pragma once -#if __has_include("StorageSystemContributors.generated.cpp") - #include #include @@ -30,5 +28,3 @@ public: static NamesAndTypesList getNamesAndTypes(); }; } - -#endif diff --git a/dbms/src/Storages/System/StorageSystemContributors.sh b/dbms/src/Storages/System/StorageSystemContributors.sh index 58895d1b781..93f97d19027 100755 --- a/dbms/src/Storages/System/StorageSystemContributors.sh +++ b/dbms/src/Storages/System/StorageSystemContributors.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CONTRIBUTORS_FILE=${CONTRIBUTORS_FILE=$CUR_DIR/StorageSystemContributors.generated.cpp} -git shortlog --summary | perl -lnE 's/^\s+\d+\s+(.+)/"$1",/; next unless $1; say $_' > $CONTRIBUTORS_FILE.tmp +git shortlog --summary | perl -lnE 's/^\s+\d+\s+(.+)/ "$1",/; next unless $1; say $_' > $CONTRIBUTORS_FILE.tmp # If git history not available - dont make target file if [ ! -s $CONTRIBUTORS_FILE.tmp ]; then @@ -18,7 +18,7 @@ fi echo "// autogenerated by $0" > $CONTRIBUTORS_FILE echo "const char * auto_contributors[] {" >> $CONTRIBUTORS_FILE cat $CONTRIBUTORS_FILE.tmp >> $CONTRIBUTORS_FILE -echo "nullptr };" >> $CONTRIBUTORS_FILE +echo -e " nullptr};" >> $CONTRIBUTORS_FILE echo "Collected `cat $CONTRIBUTORS_FILE.tmp | wc -l` contributors." rm $CONTRIBUTORS_FILE.tmp diff --git a/dbms/src/Storages/System/attachSystemTables.cpp b/dbms/src/Storages/System/attachSystemTables.cpp index facaa6c4fd3..34e03032ff1 100644 --- a/dbms/src/Storages/System/attachSystemTables.cpp +++ b/dbms/src/Storages/System/attachSystemTables.cpp @@ -32,9 +32,7 @@ #include #include #include -#if __has_include("StorageSystemContributors.generated.cpp") -# include -#endif +#include namespace DB @@ -59,9 +57,7 @@ void attachSystemTablesLocal(IDatabase & system_database) system_database.attachTable("data_type_families", StorageSystemDataTypeFamilies::create("data_type_families")); system_database.attachTable("collations", StorageSystemCollations::create("collations")); system_database.attachTable("table_engines", StorageSystemTableEngines::create("table_engines")); -#if __has_include("StorageSystemContributors.generated.cpp") system_database.attachTable("contributors", StorageSystemContributors::create("contributors")); -#endif } void attachSystemTablesServer(IDatabase & system_database, bool has_zookeeper) diff --git a/dbms/tests/queries/0_stateless/00747_contributors.sql b/dbms/tests/queries/0_stateless/00747_contributors.sql index ec75d877841..d32cd367e1e 100644 --- a/dbms/tests/queries/0_stateless/00747_contributors.sql +++ b/dbms/tests/queries/0_stateless/00747_contributors.sql @@ -1,2 +1 @@ --- Normally table should contain 250+ contributors. But when fast git clone used (--depth=X) (Travis build) table will contain only <=X contributors -SELECT if ((SELECT count(*) FROM system.contributors) > 1, 'ok', 'fail'); +SELECT if ((SELECT count(*) FROM system.contributors) > 200, 'ok', 'fail'); From 97662f07d1195844c0d05272b72c410186156feb Mon Sep 17 00:00:00 2001 From: Amy Krishnevsky Date: Thu, 1 Nov 2018 22:58:24 +0300 Subject: [PATCH 65/79] translated updates to changelog --- CHANGELOG.md | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 855931a8da5..99d022ea792 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,151 @@ +## ClickHouse release 18.14.11, 2018-10-29 + +### Bug fixes: + +* Fixed the error `Block structure mismatch in UNION stream: different number of columns` in LIMIT queries. [#2156](https://github.com/yandex/ClickHouse/issues/2156) +* Fixed errors when merging data in tables containing arrays inside Nested structures. [#3397](https://github.com/yandex/ClickHouse/pull/3397) +* Fixed incorrect query results if the `merge_tree_uniform_read_distribution` setting is disabled (it is enabled by default). [#3429](https://github.com/yandex/ClickHouse/pull/3429) +* Fixed an error on inserts to a Distributed table in Native format. [#3411](https://github.com/yandex/ClickHouse/issues/3411) + +## ClickHouse release 18.14.10, 2018-10-23 + +* The `compile_expressions` setting (JIT compilation of expressions) is disabled by default. [#3410](https://github.com/yandex/ClickHouse/pull/3410) +* The `enable_optimize_predicate_expression` setting is disabled by default. + +## ClickHouse release 18.14.9, 2018-10-16 + +### New features: + +* The `WITH CUBE` modifier for `GROUP BY` (the alternative syntax `GROUP BY CUBE(...)` is also available). [#3172](https://github.com/yandex/ClickHouse/pull/3172) +* Added the `formatDateTime` function. [Alexandr Krasheninnikov](https://github.com/yandex/ClickHouse/pull/2770) +* Added the `JDBC` table engine and `jdbc` table function (requires installing clickhouse-jdbc-bridge). [Alexandr Krasheninnikov](https://github.com/yandex/ClickHouse/pull/3210) +* Added functions for working with the ISO week number: `toISOWeek`, `toISOYear`, `toStartOfISOYear`, and `toDayOfYear`. [#3146](https://github.com/yandex/ClickHouse/pull/3146) +* Now you can use `Nullable` columns for `MySQL` and `ODBC` tables. [#3362](https://github.com/yandex/ClickHouse/pull/3362) +* Nested data structures can be read as nested objects in `JSONEachRow` format. Added the `input_format_import_nested_json` setting. [Veloman Yunkan](https://github.com/yandex/ClickHouse/pull/3144) +* Parallel processing is available for many `MATERIALIZED VIEW`s when inserting data. See the `parallel_view_processing` setting. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3208) +* Added the `SYSTEM FLUSH LOGS` query (forced log flushes to system tables such as `query_log`) [#3321](https://github.com/yandex/ClickHouse/pull/3321) +* Now you can use pre-defined `database` and `table` macros when declaring `Replicated` tables. [#3251](https://github.com/yandex/ClickHouse/pull/3251) +* Added the ability to read `Decimal` type values in engineering notation (indicating powers of ten). [#3153](https://github.com/yandex/ClickHouse/pull/3153) + +### Experimental features: + +* Optimization of the GROUP BY clause for `LowCardinality data types.` [#3138](https://github.com/yandex/ClickHouse/pull/3138) +* Optimized calculation of expressions for `LowCardinality data types.` [#3200](https://github.com/yandex/ClickHouse/pull/3200) + +### Improvements: + +* Significantly reduced memory consumption for requests with `ORDER BY` and `LIMIT`. See the `max_bytes_before_remerge_sort` setting. [#3205](https://github.com/yandex/ClickHouse/pull/3205) +* In the absence of `JOIN` (`LEFT`, `INNER`, ...), `INNER JOIN` is assumed. [#3147](https://github.com/yandex/ClickHouse/pull/3147) +* Qualified asterisks work correctly in queries with `JOIN`. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3202) +* The `ODBC` table engine correctly chooses the method for quoting identifiers in the SQL dialect of a remote database. [Alexandr Krasheninnikov](https://github.com/yandex/ClickHouse/pull/3210) +* The `compile_expressions` setting (JIT compilation of expressions) is enabled by default. +* Fixed behavior for simultaneous DROP DATABASE/TABLE IF EXISTS and CREATE DATABASE/TABLE IF NOT EXISTS. Previously, a `CREATE DATABASE ... IF NOT EXISTS` query could return the error message "File ... already exists", and the `CREATE TABLE ... IF NOT EXISTS` and `DROP TABLE IF EXISTS` queries could return `Table ... is creating or attaching right now`. [#3101](https://github.com/yandex/ClickHouse/pull/3101) +* LIKE and IN expressions with a constant right half are passed to the remote server when querying from MySQL or ODBC tables. [#3182](https://github.com/yandex/ClickHouse/pull/3182) +* Comparisons with constant expressions in a WHERE clause are passed to the remote server when querying from MySQL and ODBC tables. Previously, only comparisons with constants were passed. [#3182](https://github.com/yandex/ClickHouse/pull/3182) +* Correct calculation of row width in the terminal for `Pretty` formats, including strings with hieroglyphs. [Amos Bird](https://github.com/yandex/ClickHouse/pull/3257). +* `ON CLUSTER` can be specified for `ALTER UPDATE` queries. +* Improved performance for reading data in `JSONEachRow` format. [#3332](https://github.com/yandex/ClickHouse/pull/3332) +* Added synonyms for the `LENGTH` and `CHARACTER_LENGTH` functions for compatibility. The `CONCAT` function is no longer case-sensitive. [#3306](https://github.com/yandex/ClickHouse/pull/3306) +* Added the `TIMESTAMP` synonym for the `DateTime` type. [#3390](https://github.com/yandex/ClickHouse/pull/3390) +* There is always space reserved for query_id in the server logs, even if the log line is not related to a query. This makes it easier to parse server text logs with third-party tools. +* Memory consumption by a query is logged when it exceeds the next level of an integer number of gigabytes. [#3205](https://github.com/yandex/ClickHouse/pull/3205) +* Added compatibility mode for the case when the client library that uses the Native protocol sends fewer columns by mistake than the server expects for the INSERT query. This scenario was possible when using the clickhouse-cpp library. Previously, this scenario caused the server to crash. [#3171](https://github.com/yandex/ClickHouse/pull/3171) +* In a user-defined WHERE expression in `clickhouse-copier`, you can now use a `partition_key` alias (for additional filtering by source table partition). This is useful if the partitioning scheme changes during copying, but only changes slightly. [#3166](https://github.com/yandex/ClickHouse/pull/3166) +* The workflow of the `Kafka` engine has been moved to a background thread pool in order to automatically reduce the speed of data reading at high loads. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3215). +* Support for reading `Tuple` and `Nested` values of structures like `struct` in the `Cap'n'Proto format`. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3216) +* The list of top-level domains for the `firstSignificantSubdomain` function now includes the domain `biz`. [decaseal](https://github.com/yandex/ClickHouse/pull/3219) +* In the configuration of external dictionaries, `null_value` is interpreted as the value of the default data type. [#3330](https://github.com/yandex/ClickHouse/pull/3330) +* Support for the `intDiv` and `intDivOrZero` functions for `Decimal`. [b48402e8](https://github.com/yandex/ClickHouse/commit/b48402e8712e2b9b151e0eef8193811d433a1264) +* Support for the `Date`, `DateTime`, `UUID`, and `Decimal` types as a key for the `sumMap` aggregate function. [#3281](https://github.com/yandex/ClickHouse/pull/3281) +* Support for the `Decimal` data type in external dictionaries. [#3324](https://github.com/yandex/ClickHouse/pull/3324) +* Support for the `Decimal` data type in `SummingMergeTree` tables. [#3348](https://github.com/yandex/ClickHouse/pull/3348) +* Added specializations for `UUID` in `if`. [#3366](https://github.com/yandex/ClickHouse/pull/3366) +* Reduced the number of `open` and `close` system calls when reading from a `MergeTree table`. [#3283](https://github.com/yandex/ClickHouse/pull/3283) +* A `TRUNCATE TABLE` query can be executed on any replica (the query is passed to the leader replica). [Kirill Shvakov](https://github.com/yandex/ClickHouse/pull/3375) + +### Bug fixes: + +* Fixed an issue with `Dictionary` tables for `range_hashed` dictionaries. This error occurred in version 18.12.17. [#1702](https://github.com/yandex/ClickHouse/pull/1702) +* Fixed an error when loading `range_hashed` dictionaries (the message `Unsupported type Nullable (...)`). This error occurred in version 18.12.17. [#3362](https://github.com/yandex/ClickHouse/pull/3362) +* Fixed errors in the `pointInPolygon` function due to the accumulation of inaccurate calculations for polygons with a large number of vertices located close to each other. [#3331](https://github.com/yandex/ClickHouse/pull/3331) [#3341](https://github.com/yandex/ClickHouse/pull/3341) +* If after merging data parts, the checksum for the resulting part differs from the result of the same merge in another replica, the result of the merge is deleted and the data part is downloaded from the other replica (this is the correct behavior). But after downloading the data part, it couldn't be added to the working set because of an error that the part already exists (because the data part was deleted with some delay after the merge). This led to cyclical attempts to download the same data. [#3194](https://github.com/yandex/ClickHouse/pull/3194) +* Fixed incorrect calculation of total memory consumption by queries (because of incorrect calculation, the `max_memory_usage_for_all_queries` setting worked incorrectly and the `MemoryTracking` metric had an incorrect value). This error occurred in version 18.12.13. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3344) +* Fixed the functionality of `CREATE TABLE ... ON CLUSTER ... AS SELECT ...` This error occurred in version 18.12.13. [#3247](https://github.com/yandex/ClickHouse/pull/3247) +* Fixed unnecessary preparation of data structures for `JOIN`s on the server that initiates the request if the `JOIN` is only performed on remote servers. [#3340](https://github.com/yandex/ClickHouse/pull/3340) +* Fixed bugs in the `Kafka` engine: deadlocks after exceptions when starting to read data, and locks upon completion [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3215). +* For `Kafka` tables, the optional `schema` parameter was not passed (the schema of the `Cap'n'Proto` format). [Vojtech Splichal](https://github.com/yandex/ClickHouse/pull/3150) +* If the ensemble of ZooKeeper servers has servers that accept the connection but then immediately close it instead of responding to the handshake, ClickHouse chooses to connect another server. Previously, this produced the error `Cannot read all data. Bytes read: 0. Bytes expected: 4.` and the server couldn't start. [8218cf3a](https://github.com/yandex/ClickHouse/commit/8218cf3a5f39a43401953769d6d12a0bb8d29da9) +* If the ensemble of ZooKeeper servers contains servers for which the DNS query returns an error, these servers are ignored. [17b8e209](https://github.com/yandex/ClickHouse/commit/17b8e209221061325ad7ba0539f03c6e65f87f29) +* Fixed type conversion between `Date` and `DateTime` when inserting data in the `VALUES` format (if `input_format_values_interpret_expressions = 1`). Previously, the conversion was performed between the numerical value of the number of days in Unix Epoch time and the Unix timestamp, which led to unexpected results. [#3229](https://github.com/yandex/ClickHouse/pull/3229) +* Corrected type conversion between `Decimal` and integer numbers. [#3211](https://github.com/yandex/ClickHouse/pull/3211) +* Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3231) +* Fixed a parsing error in CSV format with floating-point numbers if a non-default CSV separator is used, such as `;` [#3155](https://github.com/yandex/ClickHouse/pull/3155) +* Fixed the `arrayCumSumNonNegative` function (it does not accumulate negative values if the accumulator is less than zero). [Aleksey Studnev](https://github.com/yandex/ClickHouse/pull/3163) +* Fixed how `Merge` tables work on top of `Distributed` tables when using `PREWHERE`. [#3165](https://github.com/yandex/ClickHouse/pull/3165) +* Bug fixes in the `ALTER UPDATE` query. +* Fixed bugs in the `odbc` table function that appeared in version 18.12. [#3197](https://github.com/yandex/ClickHouse/pull/3197) +* Fixed the operation of aggregate functions with `StateArray` combinators. [#3188](https://github.com/yandex/ClickHouse/pull/3188) +* Fixed a crash when dividing a `Decimal` value by zero. [69dd6609](https://github.com/yandex/ClickHouse/commit/69dd6609193beb4e7acd3e6ad216eca0ccfb8179) +* Fixed output of types for operations using `Decimal` and integer arguments. [#3224](https://github.com/yandex/ClickHouse/pull/3224) +* Fixed the segfault during `GROUP BY` on `Decimal128`. [3359ba06](https://github.com/yandex/ClickHouse/commit/3359ba06c39fcd05bfdb87d6c64154819621e13a) +* The `log_query_threads` setting (logging information about each thread of query execution) now takes effect only if the `log_queries` option (logging information about queries) is set to 1. Since the `log_query_threads` option is enabled by default, information about threads was previously logged even if query logging was disabled. [#3241](https://github.com/yandex/ClickHouse/pull/3241) +* Fixed an error in the distributed operation of the quantiles aggregate function (the error message `Not found column quantile...`). [292a8855](https://github.com/yandex/ClickHouse/commit/292a885533b8e3b41ce8993867069d14cbd5a664) +* Fixed the compatibility problem when working on a cluster of version 18.12.17 servers and older servers at the same time. For distributed queries with GROUP BY keys of both fixed and non-fixed length, if there was a large amount of data to aggregate, the returned data was not always fully aggregated (two different rows contained the same aggregation keys). [#3254](https://github.com/yandex/ClickHouse/pull/3254) +* Fixed handling of substitutions in `clickhouse-performance-test`, if the query contains only part of the substitutions declared in the test. [#3263](https://github.com/yandex/ClickHouse/pull/3263) +* Fixed an error when using `FINAL` with `PREWHERE`. [#3298](https://github.com/yandex/ClickHouse/pull/3298) +* Fixed an error when using `PREWHERE` over columns that were added during `ALTER`. [#3298](https://github.com/yandex/ClickHouse/pull/3298) +* Added a check for the absence of `arrayJoin` for `DEFAULT` and `MATERIALIZED` expressions. Previously, `arrayJoin` led to an error when inserting data. [#3337](https://github.com/yandex/ClickHouse/pull/3337) +* Added a check for the absence of `arrayJoin` in a `PREWHERE` clause. Previously, this led to messages like `Size ... doesn't match` or `Unknown compression method` when executing queries. [#3357](https://github.com/yandex/ClickHouse/pull/3357) +* Fixed segfault that could occur in rare cases after optimization that replaced AND chains from equality evaluations with the corresponding IN expression. [liuyimin-bytedance](https://github.com/yandex/ClickHouse/pull/3339) +* Minor corrections to `clickhouse-benchmark`: previously, client information was not sent to the server; now the number of queries executed is calculated more accurately when shutting down and for limiting the number of iterations. [#3351](https://github.com/yandex/ClickHouse/pull/3351) [#3352](https://github.com/yandex/ClickHouse/pull/3352) + +### Backward incompatible changes: + +* Removed the `allow_experimental_decimal_type` option. The `Decimal` data type is available for default use. [#3329](https://github.com/yandex/ClickHouse/pull/3329) + +## ClickHouse release 18.12.17, 2018-09-16 + +### New features: + +* `invalidate_query` (the ability to specify a query to check whether an external dictionary needs to be updated) is implemented for the `clickhouse` source. [#3126](https://github.com/yandex/ClickHouse/pull/3126) +* Added the ability to use `UInt*`, `Int*`, and `DateTime` data types (along with the `Date` type) as a `range_hashed` external dictionary key that defines the boundaries of ranges. Now `NULL` can be used to designate an open range. [Vasily Nemkov](https://github.com/yandex/ClickHouse/pull/3123) +* The `Decimal` type now supports `var*` and `stddev*` aggregate functions. [#3129](https://github.com/yandex/ClickHouse/pull/3129) +* The `Decimal` type now supports mathematical functions (`exp`, `sin` and so on.) [#3129](https://github.com/yandex/ClickHouse/pull/3129) +* The `system.part_log` table now has the `partition_id` column. [#3089](https://github.com/yandex/ClickHouse/pull/3089) + +### Bug fixes: + +* `Merge` now works correctly on `Distributed` tables. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3159) +* Fixed incompatibility (unnecessary dependency on the `glibc` version) that made it impossible to run ClickHouse on `Ubuntu Precise` and older versions. The incompatibility arose in version 18.12.13. [#3130](https://github.com/yandex/ClickHouse/pull/3130) +* Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3107) +* Fixed a minor issue with backwards compatibility that appeared when working with a cluster of replicas on versions earlier than 18.12.13 and simultaneously creating a new replica of a table on a server with a newer version (shown in the message `Can not clone replica, because the ... updated to new ClickHouse version`, which is logical, but shouldn't happen). [#3122](https://github.com/yandex/ClickHouse/pull/3122) + +### Backward incompatible changes: + +* The `enable_optimize_predicate_expression` option is enabled by default (which is rather optimistic). If query analysis errors occur that are related to searching for the column names, set `enable_optimize_predicate_expression` to 0. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3107) + +## ClickHouse release 18.12.14, 2018-09-13 + +### New features: + +* Added support for `ALTER UPDATE` queries. [#3035](https://github.com/yandex/ClickHouse/pull/3035) +* Added the `allow_ddl` option, which restricts the user's access to DDL queries. [#3104](https://github.com/yandex/ClickHouse/pull/3104) +* Added the `min_merge_bytes_to_use_direct_io` option for `MergeTree` engines, which allows you to set a threshold for the total size of the merge (when above the threshold, data part files will be handled using O_DIRECT). [#3117](https://github.com/yandex/ClickHouse/pull/3117) +* The `system.merges` system table now contains the `partition_id` column. [#3099](https://github.com/yandex/ClickHouse/pull/3099) + +### Improvements + +* If a data part remains unchanged during mutation, it isn't downloaded by replicas. [#3103](https://github.com/yandex/ClickHouse/pull/3103) +* Autocomplete is available for names of settings when working with `clickhouse-client`. [#3106](https://github.com/yandex/ClickHouse/pull/3106) + +### Bug fixes: + +* Added a check for the sizes of arrays that are elements of `Nested` type fields when inserting. [#3118](https://github.com/yandex/ClickHouse/pull/3118) +* Fixed an error updating external dictionaries with the `ODBC` source and `hashed` storage. This error occurred in version 18.12.13. +* Fixed a crash when creating a temporary table from a query with an `IN` condition. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3098) +* Fixed an error in aggregate functions for arrays that can have `NULL` elements. [Winter Zhang](https://github.com/yandex/ClickHouse/pull/3097) + + ## ClickHouse release 18.12.13, 2018-09-10 ### New features: From 9332dd5d2fe5c1424461795cbc0af7716de6a226 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 2 Nov 2018 12:56:37 +0300 Subject: [PATCH 66/79] Removed column clone from project action. --- dbms/src/Interpreters/ExpressionActions.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 1daf0bc3729..c2ab8b3a662 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -485,8 +485,6 @@ void ExpressionAction::execute(Block & block, std::unordered_map Date: Fri, 2 Nov 2018 12:57:29 +0300 Subject: [PATCH 67/79] Removed column clone from project action. --- dbms/src/Interpreters/ExpressionActions.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index c2ab8b3a662..e06a8d11aeb 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -271,8 +271,6 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings) const std::string & name = projection[i].first; const std::string & alias = projection[i].second; ColumnWithTypeAndName column = sample_block.getByName(name); - if (column.column) - column.column = (*std::move(column.column)).mutate(); if (alias != "") column.name = alias; new_block.insert(std::move(column)); From 120e2cbe2ff4fbad626c28042d9b28781c805afe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Nov 2018 13:03:05 +0300 Subject: [PATCH 68/79] Use "ALL" as "join_default_strictness" setting by default for compatibility [#CLICKHOUSE-4087] --- dbms/src/Interpreters/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 13c800ed296..c1436267ffc 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -176,7 +176,7 @@ struct Settings \ M(SettingBool, join_use_nulls, 0, "Use NULLs for non-joined rows of outer JOINs. If false, use default value of corresponding columns data type.") \ \ - M(SettingJoinStrictness, join_default_strictness, JoinStrictness::Unspecified, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.") \ + M(SettingJoinStrictness, join_default_strictness, JoinStrictness::ALL, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.") \ \ M(SettingUInt64, preferred_block_size_bytes, 1000000, "") \ \ From ce678f0e9c9ebe4874e57ec75e7f84c2f9316437 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 2 Nov 2018 14:46:16 +0300 Subject: [PATCH 69/79] fix join_default_strictness according to new default settings --- .../queries/0_stateless/00701_join_default_strictness.sql | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00701_join_default_strictness.sql b/dbms/tests/queries/0_stateless/00701_join_default_strictness.sql index a246ca68d45..0a952d7210b 100644 --- a/dbms/tests/queries/0_stateless/00701_join_default_strictness.sql +++ b/dbms/tests/queries/0_stateless/00701_join_default_strictness.sql @@ -14,11 +14,8 @@ INSERT INTO test.a2 VALUES (1, 2); INSERT INTO test.a2 VALUES (1, 3); INSERT INTO test.a2 VALUES (1, 4); -SELECT a, b FROM test.a1 LEFT JOIN (SELECT a, b FROM test.a2) USING a ORDER BY b; -- { serverError 417 } - SELECT a, b FROM test.a1 LEFT JOIN (SELECT a, b FROM test.a2) USING a ORDER BY b SETTINGS join_default_strictness='ANY'; - -SELECT a, b FROM test.a1 LEFT JOIN (SELECT a, b FROM test.a2) USING a ORDER BY b SETTINGS join_default_strictness='ALL'; +SELECT a, b FROM test.a1 LEFT JOIN (SELECT a, b FROM test.a2) USING a ORDER BY b; -- default SETTINGS join_default_strictness='ALL'; DROP TABLE IF EXISTS test.a1; DROP TABLE IF EXISTS test.a2; From 28806049b44f9b6a59a166303a1e3537b0560545 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 2 Nov 2018 14:54:24 +0300 Subject: [PATCH 70/79] Return client error code to 53 --- dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql b/dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql index 9421bbb8e5c..027f9b6b472 100644 --- a/dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql +++ b/dbms/tests/queries/0_stateless/00748_insert_array_with_null.sql @@ -3,7 +3,7 @@ DROP TABLE IF EXISTS test.arraytest; CREATE TABLE test.arraytest ( created_date Date DEFAULT toDate(created_at), created_at DateTime DEFAULT now(), strings Array(String) DEFAULT emptyArrayString()) ENGINE = MergeTree(created_date, cityHash64(created_at), (created_date, cityHash64(created_at)), 8192); INSERT INTO test.arraytest (created_at, strings) VALUES (now(), ['aaaaa', 'bbbbb', 'ccccc']); -INSERT INTO test.arraytest (created_at, strings) VALUES (now(), ['aaaaa', 'bbbbb', null]); -- { clientError 321 } +INSERT INTO test.arraytest (created_at, strings) VALUES (now(), ['aaaaa', 'bbbbb', null]); -- { clientError 53 } SELECT strings from test.arraytest; From 0f5488dd830de600802e6d7b0d7cead4ab7575bd Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 2 Nov 2018 17:16:50 +0300 Subject: [PATCH 71/79] Add russia changelog for 18.14.12 --- CHANGELOG_RU.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index 2569c655c9b..18d23e3a6ce 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,3 +1,11 @@ +## ClickHouse release 18.14.12, 2018-11-02 + +### Исправления ошибок: + +* Исправлена ошибка при join-запросе двух неименованных подзапросов. [#3505](https://github.com/yandex/ClickHouse/pull/3505) +* Исправлена генерация пустой `WHERE`-части при запросах к внешним базам. [hotid](https://github.com/yandex/ClickHouse/pull/3477) +* Исправлена ошибка использования неправильной настройки таймаута в ODBC-словарях. [Marek Vavruša](https://github.com/yandex/ClickHouse/pull/3511) + ## ClickHouse release 18.14.11, 2018-10-29 ### Исправления ошибок: From ef985ce7ad793f2d33787423595768024e29dc52 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 2 Nov 2018 18:54:36 +0300 Subject: [PATCH 72/79] Test to reproduce problem --- .../test_block_structure_mismatch/__init__.py | 0 .../configs/remote_servers.xml | 18 +++++++ .../test_block_structure_mismatch/test.py | 51 +++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 dbms/tests/integration/test_block_structure_mismatch/__init__.py create mode 100644 dbms/tests/integration/test_block_structure_mismatch/configs/remote_servers.xml create mode 100644 dbms/tests/integration/test_block_structure_mismatch/test.py diff --git a/dbms/tests/integration/test_block_structure_mismatch/__init__.py b/dbms/tests/integration/test_block_structure_mismatch/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_block_structure_mismatch/configs/remote_servers.xml b/dbms/tests/integration/test_block_structure_mismatch/configs/remote_servers.xml new file mode 100644 index 00000000000..77453b0aa1c --- /dev/null +++ b/dbms/tests/integration/test_block_structure_mismatch/configs/remote_servers.xml @@ -0,0 +1,18 @@ + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + diff --git a/dbms/tests/integration/test_block_structure_mismatch/test.py b/dbms/tests/integration/test_block_structure_mismatch/test.py new file mode 100644 index 00000000000..36913d5ebe3 --- /dev/null +++ b/dbms/tests/integration/test_block_structure_mismatch/test.py @@ -0,0 +1,51 @@ +import time +import pytest + +from contextlib import contextmanager +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) +node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) + +#test reproducing issue https://github.com/yandex/ClickHouse/issues/3162 +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node1.query(''' +CREATE TABLE local_test ON CLUSTER testcluster ( + t UInt64, + date Date MATERIALIZED toDate(t/1000), + shard UInt64, + col1 String, + col2 String +) ENGINE = MergeTree +PARTITION BY toRelativeDayNum(date) +ORDER BY (t) +SETTINGS index_granularity=8192 + ''') + + node1.query(''' +CREATE TABLE dist_test ON CLUSTER testcluster ( + t UInt64, + shard UInt64, + date Date MATERIALIZED toDate(t/1000), + col1 String, + col2 String +) Engine = Distributed(testcluster, default, local_test, shard) + ''') + + time.sleep(0.5) + + yield cluster + + finally: + cluster.shutdown() + +def test(started_cluster): + node1.query("INSERT INTO dist_test (t, shard, col1, col2) VALUES (1000, 1, 'foo', 'bar'), (1000, 2, 'x', 'y')") + time.sleep(3) + assert node1.query("SELECT col1, col2 FROM dist_test WHERE (t < 3600000) AND (col1 = 'foo') ORDER BY t ASC") == "foo\tbar" From 5367b1b8d0a62c8e86e252d7b272ecc60fde952b Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 2 Nov 2018 19:10:08 +0300 Subject: [PATCH 73/79] Better answer --- dbms/tests/integration/test_block_structure_mismatch/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_block_structure_mismatch/test.py b/dbms/tests/integration/test_block_structure_mismatch/test.py index 36913d5ebe3..4262c312fe4 100644 --- a/dbms/tests/integration/test_block_structure_mismatch/test.py +++ b/dbms/tests/integration/test_block_structure_mismatch/test.py @@ -48,4 +48,4 @@ CREATE TABLE dist_test ON CLUSTER testcluster ( def test(started_cluster): node1.query("INSERT INTO dist_test (t, shard, col1, col2) VALUES (1000, 1, 'foo', 'bar'), (1000, 2, 'x', 'y')") time.sleep(3) - assert node1.query("SELECT col1, col2 FROM dist_test WHERE (t < 3600000) AND (col1 = 'foo') ORDER BY t ASC") == "foo\tbar" + assert node1.query("SELECT col1, col2 FROM dist_test WHERE (t < 3600000) AND (col1 = 'foo') ORDER BY t ASC") == "foo\tbar\n" From 6f00f165e561904b9a46044e93e9410bc54efbd4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Nov 2018 12:32:59 +0300 Subject: [PATCH 74/79] Clarifications; potential improvement #3490 --- dbms/src/Common/HashTable/HashTable.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index adf6785c3aa..8ac81887caa 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -445,12 +445,15 @@ protected: Derived & operator++() { + /// If iterator was pointed to ZeroValueStorage, move it to the beginning of the main buffer. if (unlikely(ptr->isZero(*container))) ptr = container->buf; else ++ptr; - while (ptr < container->buf + container->grower.bufSize() && ptr->isZero(*container)) + /// Skip empty cells in the main buffer. + auto buf_end = container->buf + container->grower.bufSize(); + while (ptr < buf_end && ptr->isZero(*container)) ++ptr; return static_cast(*this); From a191c322542d0249ab867b624a9b92913861a745 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Nov 2018 13:34:48 +0300 Subject: [PATCH 75/79] Potential improvement #3490 --- dbms/src/Common/HashTable/HashTable.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index 8ac81887caa..df8fca21647 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -572,7 +572,8 @@ public: return iteratorToZero(); const Cell * ptr = buf; - while (ptr < buf + grower.bufSize() && ptr->isZero(*this)) + auto buf_end = buf + grower.bufSize(); + while (ptr < buf_end && ptr->isZero(*this)) ++ptr; return const_iterator(this, ptr); @@ -587,7 +588,8 @@ public: return iteratorToZero(); Cell * ptr = buf; - while (ptr < buf + grower.bufSize() && ptr->isZero(*this)) + auto buf_end = buf + grower.bufSize(); + while (ptr < buf_end && ptr->isZero(*this)) ++ptr; return iterator(this, ptr); @@ -814,9 +816,9 @@ public: if (this->hasZero()) this->zeroValue()->write(wb); - for (size_t i = 0; i < grower.bufSize(); ++i) - if (!buf[i].isZero(*this)) - buf[i].write(wb); + for (auto ptr = buf, buf_end = buf + grower.bufSize(); ptr < buf_end; ++ptr) + if (!ptr->isZero(*this)) + ptr->write(wb); } void writeText(DB::WriteBuffer & wb) const @@ -830,12 +832,12 @@ public: this->zeroValue()->writeText(wb); } - for (size_t i = 0; i < grower.bufSize(); ++i) + for (auto ptr = buf, buf_end = buf + grower.bufSize(); ptr < buf_end; ++ptr) { - if (!buf[i].isZero(*this)) + if (!ptr->isZero(*this)) { DB::writeChar(',', wb); - buf[i].writeText(wb); + ptr->writeText(wb); } } } From 070a66454823782b9b8d668846f22a3000637249 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 5 Nov 2018 17:20:26 +0300 Subject: [PATCH 76/79] Do not regenerate contributors list in ./release --- release | 2 -- 1 file changed, 2 deletions(-) diff --git a/release b/release index 2a76b9bbdd4..23bfd6f2dd6 100755 --- a/release +++ b/release @@ -107,8 +107,6 @@ echo -e "\nCurrent version is $VERSION_STRING" gen_changelog "$VERSION_STRING" "" "$AUTHOR" "" -$CURDIR/dbms/src/Storages/System/StorageSystemContributors.sh - if [ -z "$USE_PBUILDER" ] ; then DEB_CC=${DEB_CC:=`which gcc-7 gcc-8 gcc | head -n1`} DEB_CXX=${DEB_CXX:=`which g++-7 g++-8 g++ | head -n1`} From c5173782dc823d1d7256ee7350cc96d302e6019c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Nov 2018 17:29:38 +0300 Subject: [PATCH 77/79] Potentially better #3490 --- dbms/src/Common/HashTable/HashTable.h | 5 ++++- dbms/src/Interpreters/Aggregator.cpp | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index df8fca21647..237d2955404 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -420,7 +420,7 @@ protected: void destroyElements() { if (!std::is_trivially_destructible_v) - for (iterator it = begin(); it != end(); ++it) + for (iterator it = begin(), it_end = end(); it != it_end; ++it) it.ptr->~Cell(); } @@ -579,6 +579,8 @@ public: return const_iterator(this, ptr); } + const_iterator cbegin() const { return begin(); } + iterator begin() { if (!buf) @@ -596,6 +598,7 @@ public: } const_iterator end() const { return const_iterator(this, buf + grower.bufSize()); } + const_iterator cend() const { return end(); } iterator end() { return iterator(this, buf + grower.bufSize()); } diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index f854bdb894b..2019ad9f04e 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1477,7 +1477,7 @@ void NO_INLINE Aggregator::mergeDataImpl( { for (auto it = table_src.begin(), end = table_src.end(); it != end; ++it) { - decltype(it) res_it; + typename Table::iterator res_it; bool inserted; table_dst.emplace(it->first, res_it, inserted, it.getHash()); @@ -1512,9 +1512,9 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl( Table & table_src, Arena * arena) const { - for (auto it = table_src.begin(); it != table_src.end(); ++it) + for (auto it = table_src.begin(), end = table_src.end(); it != end; ++it) { - decltype(it) res_it = table_dst.find(it->first, it.getHash()); + typename Table::iterator res_it = table_dst.find(it->first, it.getHash()); AggregateDataPtr res_data = table_dst.end() == res_it ? overflows From d7992b11d87b7424c7d0f3c27a9a5ef53a170505 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 6 Nov 2018 13:11:37 +0300 Subject: [PATCH 78/79] Fix block structure mismatch in MergingSorted stream. --- .../src/Interpreters/InterpreterSelectQuery.cpp | 11 +++++++++++ .../test_block_structure_mismatch/test.py | 17 ++++++++--------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index d81c3b42bc0..fa6b94017fe 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1236,6 +1236,17 @@ void InterpreterSelectQuery::executeMergeSorted(Pipeline & pipeline) /// If there are several streams, then we merge them into one if (pipeline.hasMoreThanOneStream()) { + /// Unify streams in case they have different headers. + auto first_header = pipeline.streams.at(0)->getHeader(); + for (size_t i = 1; i < pipeline.streams.size(); ++i) + { + auto & stream = pipeline.streams[i]; + auto header = stream->getHeader(); + auto mode = ConvertingBlockInputStream::MatchColumnsMode::Name; + if (!blocksHaveEqualStructure(first_header, header)) + stream = std::make_shared(context, stream, first_header, mode); + } + /** MergingSortedBlockInputStream reads the sources sequentially. * To make the data on the remote servers prepared in parallel, we wrap it in AsynchronousBlockInputStream. */ diff --git a/dbms/tests/integration/test_block_structure_mismatch/test.py b/dbms/tests/integration/test_block_structure_mismatch/test.py index 4262c312fe4..44989c53da5 100644 --- a/dbms/tests/integration/test_block_structure_mismatch/test.py +++ b/dbms/tests/integration/test_block_structure_mismatch/test.py @@ -15,8 +15,9 @@ def started_cluster(): try: cluster.start() - node1.query(''' -CREATE TABLE local_test ON CLUSTER testcluster ( + for node in (node1, node2): + node.query(''' +CREATE TABLE local_test ( t UInt64, date Date MATERIALIZED toDate(t/1000), shard UInt64, @@ -26,19 +27,17 @@ CREATE TABLE local_test ON CLUSTER testcluster ( PARTITION BY toRelativeDayNum(date) ORDER BY (t) SETTINGS index_granularity=8192 - ''') + ''') - node1.query(''' -CREATE TABLE dist_test ON CLUSTER testcluster ( + node.query(''' +CREATE TABLE dist_test ( t UInt64, shard UInt64, date Date MATERIALIZED toDate(t/1000), col1 String, col2 String ) Engine = Distributed(testcluster, default, local_test, shard) - ''') - - time.sleep(0.5) + ''') yield cluster @@ -47,5 +46,5 @@ CREATE TABLE dist_test ON CLUSTER testcluster ( def test(started_cluster): node1.query("INSERT INTO dist_test (t, shard, col1, col2) VALUES (1000, 1, 'foo', 'bar'), (1000, 2, 'x', 'y')") - time.sleep(3) + #time.sleep(3) assert node1.query("SELECT col1, col2 FROM dist_test WHERE (t < 3600000) AND (col1 = 'foo') ORDER BY t ASC") == "foo\tbar\n" From 8df77930ec75f681ae122621927fde58fd2de0f9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 6 Nov 2018 14:44:45 +0300 Subject: [PATCH 79/79] Added InterpreterSelectQuery::unifyStreams --- .../Interpreters/InterpreterSelectQuery.cpp | 39 +++++++++---------- .../src/Interpreters/InterpreterSelectQuery.h | 3 ++ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index fa6b94017fe..223f6ba2f08 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1236,16 +1236,7 @@ void InterpreterSelectQuery::executeMergeSorted(Pipeline & pipeline) /// If there are several streams, then we merge them into one if (pipeline.hasMoreThanOneStream()) { - /// Unify streams in case they have different headers. - auto first_header = pipeline.streams.at(0)->getHeader(); - for (size_t i = 1; i < pipeline.streams.size(); ++i) - { - auto & stream = pipeline.streams[i]; - auto header = stream->getHeader(); - auto mode = ConvertingBlockInputStream::MatchColumnsMode::Name; - if (!blocksHaveEqualStructure(first_header, header)) - stream = std::make_shared(context, stream, first_header, mode); - } + unifyStreams(pipeline); /** MergingSortedBlockInputStream reads the sources sequentially. * To make the data on the remote servers prepared in parallel, we wrap it in AsynchronousBlockInputStream. @@ -1301,16 +1292,7 @@ void InterpreterSelectQuery::executeUnion(Pipeline & pipeline) /// If there are still several streams, then we combine them into one if (pipeline.hasMoreThanOneStream()) { - /// Unify streams in case they have different headers. - auto first_header = pipeline.streams.at(0)->getHeader(); - for (size_t i = 1; i < pipeline.streams.size(); ++i) - { - auto & stream = pipeline.streams[i]; - auto header = stream->getHeader(); - auto mode = ConvertingBlockInputStream::MatchColumnsMode::Name; - if (!blocksHaveEqualStructure(first_header, header)) - stream = std::make_shared(context, stream, first_header, mode); - } + unifyStreams(pipeline); pipeline.firstStream() = std::make_shared>(pipeline.streams, pipeline.stream_with_non_joined_data, max_streams); pipeline.stream_with_non_joined_data = nullptr; @@ -1440,6 +1422,23 @@ void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(Pipeline & pipeline SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode)); } +void InterpreterSelectQuery::unifyStreams(Pipeline & pipeline) +{ + if (pipeline.hasMoreThanOneStream()) + { + /// Unify streams in case they have different headers. + auto first_header = pipeline.streams.at(0)->getHeader(); + for (size_t i = 1; i < pipeline.streams.size(); ++i) + { + auto & stream = pipeline.streams[i]; + auto header = stream->getHeader(); + auto mode = ConvertingBlockInputStream::MatchColumnsMode::Name; + if (!blocksHaveEqualStructure(first_header, header)) + stream = std::make_shared(context, stream, first_header, mode); + } + } +} + void InterpreterSelectQuery::ignoreWithTotals() { diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 8f8bda3dbd2..7ae577979d1 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -190,6 +190,9 @@ private: void executeExtremes(Pipeline & pipeline); void executeSubqueriesInSetsAndJoins(Pipeline & pipeline, std::unordered_map & subqueries_for_sets); + /// If pipeline has several streams with different headers, add ConvertingBlockInputStream to first header. + void unifyStreams(Pipeline & pipeline); + enum class Modificator { ROLLUP = 0,